In [60]:
import random
import json
import numpy as np
import nltk
import pickle
from nltk.stem import WordNetLemmatizer

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD




In [61]:
lemmatizer = WordNetLemmatizer()

intents = json.loads(open('intents.json').read())

words = []
classes = []
documents = []
ignore_letters = ['?', '!', '.', ',']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])
            
print(documents)

[(['Hi'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Whats', 'up'], 'greeting'), (['Hey'], 'greeting'), (['greetings'], 'greeting'), (['cya'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['I', 'am', 'Leaving'], 'goodbye'), (['Have', 'a', 'Good', 'day'], 'goodbye'), (['bye'], 'goodbye'), (['cao'], 'goodbye'), (['see', 'ya'], 'goodbye'), (['What', 'is', 'progamming', '?'], 'programming'), (['What', 'is', 'coding', '?'], 'programming'), (['Tell', 'me', 'about', 'programming'], 'programming'), (['Tell', 'me', 'about', 'coding'], 'programming'), (['What', 'is', 'software', 'development', '?'], 'programming'), (['How', 'are', 'you', '?'], 'how'), (['How', 'is', 'your', 'day', '?'], 'how'), (['How', 'do', 'you', 'feel', '?'], 'how'), (['Are', 'you', 'good', '?'], 'how'), (['What', 'is', 'Flask', '?'], 'flask'), (['Do', 'you', 'know', 'about', 'Flask'

In [62]:
words = [lemmatizer.lemmatize(word) for word in words if word not in ignore_letters]
words = sorted(set(words))

In [63]:
print (words)
classes = sorted(set(classes))
print (classes)

["'s", 'Are', 'Can', 'Do', 'Explain', 'Flask', 'Give', 'Good', 'Goodbye', 'Have', 'Hello', 'Hey', 'Hi', 'How', 'I', 'Is', 'Leaving', 'See', 'Tell', 'What', 'Whats', 'a', 'about', 'am', 'anyone', 'are', 'bye', 'cao', 'coding', 'current', 'cya', 'day', 'development', 'do', 'feel', 'forecast', 'funny', 'gadget', 'good', 'greeting', 'in', 'is', 'it', 'joke', 'know', 'later', 'latest', 'laugh', 'like', 'make', 'me', 'new', 'news', 'of', 'progamming', 'programming', 'raining', 's', 'see', 'software', 'something', 'tech', 'technology', 'tell', 'temperature', 'the', 'there', 'time', 'to', 'today', 'trending', 'up', 'weather', 'world', 'ya', 'you', 'your', '’']
['flask', 'goodbye', 'greeting', 'how', 'joke', 'programming', 'tech_news', 'time', 'weather']


In [64]:


pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

In [65]:
training = []
output_empty = [0] * len(classes)   

# ...existing code...
for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)
        
    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append([bag, output_row])

random.shuffle(training)

# Ensure all elements in training have the same length
train_x = np.array([np.array(t[0]) for t in training])
train_y = np.array([np.array(t[1]) for t in training])

model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

hist = model.fit(train_x, train_y, epochs=200, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)
print("Done")

Epoch 1/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.1538 - loss: 2.2115  
Epoch 2/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.2384 - loss: 2.1957 
Epoch 3/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.1885 - loss: 2.2010 
Epoch 4/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2403 - loss: 2.0209 
Epoch 5/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3049 - loss: 2.0391 
Epoch 6/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3994 - loss: 1.9047 
Epoch 7/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.3384 - loss: 1.9309 
Epoch 8/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4738 - loss: 1.8251 
Epoch 9/200
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[



Done
