In [None]:
# CODE FOR CREATING AND TRAINING CHATBOT

# import libraries
import random
import json
import pickle
import numpy as np
import nltk
nltk.download('punkt')
nltk.download('wordnet')

from keras.models import Sequential
from nltk.stem import WordNetLemmatizer
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD

In [None]:
# load json file with tags, patterns & responses

intents = json.loads(open("intents.json").read())

In [None]:
# create lists to store data
words = []
labels = []
docs = []
ignore_letters = ["?", "!", ".", ","]

# separate words from patterns and add to words list
for intent in intents['intents']:
	for pattern in intent['patterns']:
		word_list = nltk.word_tokenize(pattern)
		words.extend(word_list) 
		
		# associate patterns with respective tags
		docs.append(((word_list), intent['tag']))

		# append tags to labels list
		if intent['tag'] not in labels:
			labels.append(intent['tag'])

In [None]:
# store root words (lemma)
lemmatizer = WordNetLemmatizer()

words = [lemmatizer.lemmatize(word)
		for word in words if word not in ignore_letters]
words = sorted(set(words))

In [None]:
# save words and labels lists to binary files

pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(labels, open('labels.pkl', 'wb'))

In [None]:
# binarise data for neural network processing:
# (0 = word not in pattern, 1 = word in pattern)

training = []
output_empty = [0]*len(labels)
for doc in docs:
	bag = []
	word_patterns = doc[0]
	word_patterns = [lemmatizer.lemmatize(
		word.lower()) for word in word_patterns]
	for word in words:
		bag.append(1) if word in word_patterns else bag.append(0)
  # shuffle training data
	output_row = list(output_empty)
	output_row[labels.index(doc[1])] = 1
	training.append([bag, output_row])
random.shuffle(training)
# Change starts here
random.shuffle(training)

train_x = []
train_y = []

for pair in training:
    train_x.append(pair[0])
    train_y.append(pair[1])

In [None]:
# split data using np array
train_x = np.array(train_x)
train_y = np.array(train_y)

In [None]:
# create Sequential Neural Network model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]), ),
				activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

In [None]:
# compile model
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
			optimizer=sgd, metrics=['accuracy'])
hist = model.fit(np.array(train_x), np.array(train_y),
				epochs=200, batch_size=5, verbose=1)

print("Training complete")

In [None]:
# saving the model
model.save("chatbotmodel.h5", hist)