### Importing required libraries

In [2]:
import nltk
import json
import pickle
import numpy as np
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense,Dropout
from nltk.stem import WordNetLemmatizer
import random

In [3]:
#loading the json intent file
data_file = open('intents.json').read()
intents = json.loads(data_file)

### Data preprocessing

In [4]:
#initializing lemmatizer
lemmatizer = WordNetLemmatizer()

In [5]:
#assigning empty list variables to append class labels,all words as a corpus,tokenized sentences and its tags
words=[]
classes = []
documents = []

In [None]:
#characters to be ignored while lemmatizing
ignore_words = ['?', '!']

In [6]:
for intent in intents['intents']:
    for pattern in intent['patterns']:
        #tokenizing each word
        tokens = nltk.word_tokenize(pattern)
        words.extend(tokens)
        
        #adding tokenized words,tag of the words in a tuple
        documents.append((tokens, intent['tag']))
        
        # adding the distinct classes to know how many class labels present 
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [7]:
# lemmatizing, lowering ,sorting each word and removing duplicates using set
words = [lemmatizer.lemmatize(w.lower())for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# sort classes
classes = sorted(list(set(classes)))

# documents = combination between patterns and intents
print(len(documents), "documents")
print()

# classes = intents
print(len(classes), "classes", classes)
print()

# words = all words, vocabulary
print(len(words), "unique lemmatized words", words)

43 documents

10 classes ['caring', 'goodbye', 'greeting', 'help', 'hobbies', 'investigating', 'joke', 'personel_info', 'smalltalk', 'thanks']

77 unique lemmatized words ["'ll", "'m", "'s", 'a', 'about', 'afternoon', 'any', 'are', 'back', 'be', 'can', 'care', 'catch', 'confused', 'do', 'doing', 'enjoy', 'evening', 'explain', 'family', 'favorite', 'fine', 'for', 'free', 'from', 'fun', 'good', 'goodbye', 'have', 'hello', 'help', 'helpful', 'hey', 'hi', 'hobby', 'how', 'i', 'in', 'interest', 'joke', 'later', 'like', 'look', 'lost', 'me', 'morning', 'movie', 'music', "n't", 'need', 'of', 'old', 'outside', 'say', 'see', 'so', 'stuck', 'take', 'talk', 'tell', 'thank', 'thanks', 'that', 'there', 'this', 'time', 'to', 'travel', 'understand', 'wa', 'want', 'what', 'where', 'who', 'work', 'you', 'your']


In [None]:
#saving the word corpus,class labels of our collection as a pickle file for later purpose
pickle.dump(words,open('words.pkl','wb'))
pickle.dump(classes,open('classes.pkl','wb'))

In [42]:
# creating our training data
training = []

# creating an empty array for our output
output_empty = [0] * len(classes)

# training set, bag of words for each sentence
for doc in documents:
    # initializing our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # lemmatizing each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    
# create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)
    # output is a '0' for each tag and '1' for current tag (for each pattern)
    
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])
    
# shuffling our features and converting into array
random.shuffle(training)
training = np.array(training)

# create train and test lists. X - patterns, Y - intents
train_x = list(training[:,0])
train_y = list(training[:,1])
print("Training data is created")

Training data created


  training = np.array(training)


### model building

In [43]:
# Creating sequential model with 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
#1st layer
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))

#adding dropout layer1 to avoid over fitting
model.add(Dropout(0.5))
#2nd layer
model.add(Dense(64, activation='relu')

#adding dropout layer2
model.add(Dropout(0.5))

#3rd layer
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compiling model with adam optimizer which produces great results
model.compile(loss='categorical_crossentropy', optimizer= tf.keras.optimizers.Adam(learning_rate=0.001), metrics=['accuracy'])

#fitting and saving the model 
hist = model.fit(np.array(train_x), np.array(train_y), epochs=199, batch_size=8, verbose=1)

Epoch 1/199
Epoch 2/199
Epoch 3/199
Epoch 4/199
Epoch 5/199
Epoch 6/199
Epoch 7/199
Epoch 8/199
Epoch 9/199
Epoch 10/199
Epoch 11/199
Epoch 12/199
Epoch 13/199
Epoch 14/199
Epoch 15/199
Epoch 16/199
Epoch 17/199
Epoch 18/199
Epoch 19/199
Epoch 20/199
Epoch 21/199
Epoch 22/199
Epoch 23/199
Epoch 24/199
Epoch 25/199
Epoch 26/199
Epoch 27/199
Epoch 28/199
Epoch 29/199
Epoch 30/199
Epoch 31/199
Epoch 32/199
Epoch 33/199
Epoch 34/199
Epoch 35/199
Epoch 36/199
Epoch 37/199
Epoch 38/199
Epoch 39/199
Epoch 40/199
Epoch 41/199
Epoch 42/199
Epoch 43/199
Epoch 44/199
Epoch 45/199
Epoch 46/199
Epoch 47/199
Epoch 48/199
Epoch 49/199
Epoch 50/199
Epoch 51/199
Epoch 52/199
Epoch 53/199
Epoch 54/199
Epoch 55/199
Epoch 56/199
Epoch 57/199
Epoch 58/199
Epoch 59/199
Epoch 60/199
Epoch 61/199
Epoch 62/199
Epoch 63/199
Epoch 64/199
Epoch 65/199
Epoch 66/199
Epoch 67/199
Epoch 68/199
Epoch 69/199
Epoch 70/199
Epoch 71/199
Epoch 72/199
Epoch 73/199
Epoch 74/199
Epoch 75/199
Epoch 76/199
Epoch 77/199
Epoch 78

Epoch 165/199
Epoch 166/199
Epoch 167/199
Epoch 168/199
Epoch 169/199
Epoch 170/199
Epoch 171/199
Epoch 172/199
Epoch 173/199
Epoch 174/199
Epoch 175/199
Epoch 176/199
Epoch 177/199
Epoch 178/199
Epoch 179/199
Epoch 180/199
Epoch 181/199
Epoch 182/199
Epoch 183/199
Epoch 184/199
Epoch 185/199
Epoch 186/199
Epoch 187/199
Epoch 188/199
Epoch 189/199
Epoch 190/199
Epoch 191/199
Epoch 192/199
Epoch 193/199
Epoch 194/199
Epoch 195/199
Epoch 196/199
Epoch 197/199
Epoch 198/199
Epoch 199/199


### saving the trained model.

In [44]:
model.save('chatbot_model.h5')