# Intent Classification  &  Entity Name Recognition

## Intent Classification
Is a part of Natural Language Understanding, where the machine algorithm learns to classify a given phrase on the basis of the ones it has been trained on

## Entity Name Recognition
Is a subtask of information extraction that seeks to locate and classify named entities mentioned in unstructured text into pre-defined categories such as person names, organizations, locations, medical codes, time expressions, quantities, monetary values, percentages, etc.

In [1]:
import numpy as np
import pickle
import random
import json
import nltk
import spacy
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

lemmatizer = WordNetLemmatizer()
nlp = spacy.load('en_core_web_sm')

In [2]:
words=[]
classes = []
documents = []
ignore_words = ['?', '!']
data_file = open('intents.json').read()
intents = json.loads(data_file)


for intent in intents['intent']:
    for pattern in intent['patterns']:

        #tokenize each word
        w = word_tokenize(pattern)
        words.extend(w)
        
        #add documents in the corpus
        documents.append((w, intent['tag']))

        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# lemmaztize and lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# sort classes
classes = sorted(list(set(classes)))

# documents = combination between patterns and intents
print (len(documents), "documents")
print("*"*50)

# classes = intents
print (len(classes), "classes", classes)
print("*"*50)

# words = all words, vocabulary
print (len(words), "unique lemmatized words", words)

75 documents
**************************************************
12 classes ['alarm', 'date', 'getweather', 'goodbye', 'greeting', 'info', 'options', 'play_music', 'thanks', 'time', 'turnOFFlight', 'turnONlight']
**************************************************
147 unique lemmatized words ["'s", ',', '11', '4', '5', '5:45', '6:15', '6:30', '7', '8:00', '9', '[', ']', 'a', 'about', 'alarm', 'am', 'an', 'and', 'another', 'anyone', 'are', 'ask', 'at', 'awesome', 'be', 'brief', 'bye', 'cairo', 'can', 'clock', 'cookie', 'could', 'create', 'current', 'date', 'day', 'definition', 'dim', 'dinner', 'do', 'doe', 'doing', 'for', 'forecast', 'from', 'gaga', 'garage', 'get', 'give', 'go', 'going', 'good', 'goodbye', 'hear', 'hello', 'help', 'helpful', 'helping', 'hey', 'hi', 'hola', 'hour', 'how', 'i', 'in', 'information', 'is', 'it', 'jazz', 'kitchen', 'know', 'lady', 'later', 'let', 'light', 'like', 'listen', 'looking', 'lunch', 'main', 'make', 'maroon', 'me', 'minute', 'music', 'my', 'need', 'n

In [3]:
# create our training data
training = []

# create an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    
    # initialize our bag of words
    bag = []
    
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    
    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    training.append([bag, output_row])
    
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)

# create train and test lists. X - patterns, Y - intents
train_x = list(training[:,0])
train_y = list(training[:,1])
print("Training data created")

Training data created


  training = np.array(training)


In [4]:
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

In [5]:
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])



In [6]:
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155

Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


In [7]:
model.save('Model.h5',hist)

with open('classes.pkl','wb') as file:  
    pickle.dump(classes,file)

with open('words.pkl','wb') as file:  
    pickle.dump(words,file)


In [8]:
from keras.models import load_model
model = load_model('Model.h5')

with open('classes.pkl','rb') as file:  
    classes = pickle.load(file)

with open('words.pkl','rb') as file:  
    words = pickle.load(file)


def clean_up_sentence(sentence):
    # tokenize the pattern - split words into array
    sentence_words = word_tokenize(sentence)
    
    # stem each word - create short form for word
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words, show_details=True):
    
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    
    # bag of words - matrix of N words, vocabulary matrix
    bag = [0]*len(words)  
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s: 
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)
    return(np.array(bag))

def predict_class(sentence, model):
    
    # filter out predictions below a threshold
    p = bow(sentence, words,show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i,r] for i,r in enumerate(res) if r>ERROR_THRESHOLD]
    
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"Intent": classes[r[0]], "Probability": str(r[1])})
    return return_list

In [9]:
predict_class('what is the weather in cairo?',model)

[{'Intent': 'getweather', 'Probability': '0.9999659'}]

In [10]:
predict_class('please turn on the light of room',model)

[{'Intent': 'turnONlight', 'Probability': '0.9999627'}]

In [11]:
def ner(sentence):
    result = []
    for ent in nlp(sentence).ents:
        result.append({"Name": ent.text , "Label": ent.label_})
    return result

In [12]:
def nlu(sentence):
    intent = predict_class(sentence,model)
    entity = ner(sentence)
    print(intent,entity)

In [13]:
nlu('What is the time now in cairo?')

[{'Intent': 'time', 'Probability': '0.99999976'}] [{'Name': 'cairo', 'Label': 'GPE'}]


In [14]:
nlu('Do you feel different because of weather?')

[{'Intent': 'getweather', 'Probability': '0.95905155'}] []


In [15]:
nlu("Do you know what the weather will be like tomorrow?")

[{'Intent': 'getweather', 'Probability': '0.99956137'}] [{'Name': 'tomorrow', 'Label': 'DATE'}]


In [16]:
nlu( "Do you know if it's going to rain tomorrow?" )

[{'Intent': 'getweather', 'Probability': '0.9300208'}] [{'Name': 'tomorrow', 'Label': 'DATE'}]


In [17]:
nlu('Tell me some information about History of Ancient Egyptian')

[{'Intent': 'info', 'Probability': '0.9191558'}] []


In [18]:
nlu('Get an information about climate changes')

[{'Intent': 'greeting', 'Probability': '0.6700375'}, {'Intent': 'info', 'Probability': '0.3023287'}] []
