In [26]:
import random
import json
import pickle
import numpy as np

import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD
import tensorflow

In [27]:
lemmatizer = WordNetLemmatizer()
intents = json.loads(open('intents.json').read())

words = []
classes = []
documents = []
ignore_letters = ['?', '!', '.', ',']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))

        if intent['tag'] not in classes:
            classes.append(intent['tag'])

print (documents)


[(['hello'], 'greetings'), (['hey'], 'greetings'), (['hi'], 'greetings'), (['good', 'day'], 'greetings'), (['greetings'], 'greetings'), (['what', "'s", 'up', '?'], 'greetings'), (['how', 'is', 'it', 'going'], 'greetings'), (['bye'], 'goodbye'), (['goodbye'], 'goodbye'), (['see', 'you', 'later'], 'goodbye'), (['have', 'a', 'good', 'day'], 'goodbye'), (['see', 'you', 'soon'], 'goodbye'), (['how', 'old'], 'age'), (['what', "'s", 'your', 'age'], 'age'), (['when', 'is', 'your', 'birthday'], 'age'), (['when', 'were', 'you', 'born'], 'age'), (['what', "'s", 'your', 'name'], 'name'), (['what', 'should', 'I', 'call', 'you'], 'name'), (['who', 'are', 'you'], 'name'), (['where', 'is', 'the', 'shop'], 'shop'), (['where', 'is', 'the', 'store'], 'shop'), (['where', 'is', 'the', 'supermarket'], 'shop'), (['when', 'do', 'you', 'open'], 'hours'), (['when', 'do', 'you', 'close'], 'hours'), (['what', 'are', 'your', 'hours'], 'hours'), (['thanks'], 'thanks'), (['thank', 'you'], 'thanks'), (['appreciate', 

In [28]:
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
wors = sorted(set(words))
classes = sorted(set(classes))

pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(words, open('classes.pkl', 'wb'))

print(words)

['hello', 'hey', 'hi', 'good', 'day', 'greeting', 'what', "'s", 'up', 'how', 'is', 'it', 'going', 'bye', 'goodbye', 'see', 'you', 'later', 'have', 'a', 'good', 'day', 'see', 'you', 'soon', 'how', 'old', 'what', "'s", 'your', 'age', 'when', 'is', 'your', 'birthday', 'when', 'were', 'you', 'born', 'what', "'s", 'your', 'name', 'what', 'should', 'I', 'call', 'you', 'who', 'are', 'you', 'where', 'is', 'the', 'shop', 'where', 'is', 'the', 'store', 'where', 'is', 'the', 'supermarket', 'when', 'do', 'you', 'open', 'when', 'do', 'you', 'close', 'what', 'are', 'your', 'hour', 'thanks', 'thank', 'you', 'appreciate', 'it', 'what', "'s", 'the', 'weather', 'how', "'s", 'the', 'weather', 'what', 'is', 'the', 'weather', 'what', 'stock', 'do', 'I', 'own', 'how', 'are', 'my', 'share']


In [29]:
training = []
output_empty = [0] * len(classes)

for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)
    
    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append([bag, output_row])

In [32]:
random.shuffle(training)
training = np.array(training, dtype=object)

train_x = np.array(list(training[:, 0]), dtype=float)
train_y = np.array(list(training[:, 1]), dtype=float)  # already one-hot encoded

# Model architecture
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Optimizer
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Train
model.fit(train_x, train_y, epochs=200, batch_size=5, verbose=1)

# Save
model.save('chatbot_model.keras')
print('Done')

Epoch 1/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.1212 - loss: 2.1797      
Epoch 2/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4242 - loss: 1.7821     
Epoch 3/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5758 - loss: 1.4565 
Epoch 4/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6061 - loss: 1.1989 
Epoch 5/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6061 - loss: 1.0852 
Epoch 6/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6970 - loss: 0.8716 
Epoch 7/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6061 - loss: 1.0037 
Epoch 8/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7576 - loss: 0.7407 
Epoch 9/200
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━