In [1]:
import nltk
from nltk.stem import WordNetLemmatizer as wnl
import json
import pickle
import numpy as np
import random
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD

In [2]:
lemmatizer = wnl()  
words = []
classes = []
documents = []
ignore_words = ['?', '!']
with open('intents.json', "r") as data_file:
    intents = json.load(data_file)

# Preprocessing the data:
for intent in intents['intents']:
    for pattern in intent['patterns']:
        w = nltk.word_tokenize(pattern)
        words.extend(w)

        documents.append((w, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [None]:
print(documents[:5])
print(classes[:6])
print(words[:10])

In [17]:
# Grouping the words together according to wordNet:
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
# Removing the duplicates and sorting them:
words = sorted(list(set(words)))
classes = sorted(list(set(classes)))
with open('words.pkl', 'wb') as fd1:
    pickle.dump(words, fd1) 
with open('classes.pkl', 'wb') as fd2:
    pickle.dump(classes, fd2) 

In [20]:
training = []
# create an empty list with with default values 0 
output = [0]*len(classes)
for doc in documents:
    bag = []
    pattern_words = doc[0]
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    
    # create a bag to represent the words found as 1 else 0 
    # and an output_row with the corresponding tag as set to 1 for training 
    for w in words:    
        bag.append(1) if w in pattern_words else bag.append(0)
        output_row = list(output)
        output_row[classes.index(doc[1])] = 1
        training.append([bag, output_row])

In [None]:
print(training[:3])

In [36]:
random.shuffle(training)
training = np.array(training)

trainX = list(training[:,0])
trainY = list(training[:,1])

In [None]:
print(trainX[0:1])
print(trainY[0:1])

## Model

In [41]:
model = Sequential()
model.add(Dense(128,input_shape=(len(trainX[0]),),activation ='relu'))
model.add(Dropout(.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(trainY[0]), activation='softmax'))

In [43]:
sgd = SGD(learning_rate=0.01,decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']);

In [None]:
MOD = model.fit(np.array(trainX), np.array(trainY), epochs=200, batch_size=5, verbose=1)

In [50]:
model.save("model.h5", MOD)