In [80]:
import json
import tensorflow as tf
import numpy as np
import pickle
from sklearn.utils import shuffle
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import pad_sequences, to_categorical
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dropout, Dense

In [29]:
with open("intents.json") as file:
  data = json.load(file)

In [30]:
questions = []
tags = []

In [31]:
for intent in data['intents']:
  for question in intent['patterns']:
    questions.append(question)
    tags.append(intent['tag'])

In [37]:
print(questions[0:10])
print(tags[0:10])

['Hi', 'Hello', 'How are you?', 'Hey', 'Hi there', 'Good morning', 'Good evening', 'Greetings!', 'Hey there!', "What's up?"]
['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting']


In [34]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(questions)
word_index = tokenizer.word_index
word_len = len(tokenizer.word_index)

In [36]:
sequences = tokenizer.texts_to_sequences(questions)
print(sequences[0:10])

[[73], [117], [27, 13, 1], [74], [73, 75], [20, 118], [20, 119], [120], [74, 75], [42, 52]]


In [38]:
maxlen = max([len(i) for i in sequences])
sequences = pad_sequences(sequences, maxlen=maxlen, padding="post")
print(sequences[0:10])

[[ 73   0   0   0   0   0   0   0]
 [117   0   0   0   0   0   0   0]
 [ 27  13   1   0   0   0   0   0]
 [ 74   0   0   0   0   0   0   0]
 [ 73  75   0   0   0   0   0   0]
 [ 20 118   0   0   0   0   0   0]
 [ 20 119   0   0   0   0   0   0]
 [120   0   0   0   0   0   0   0]
 [ 74  75   0   0   0   0   0   0]
 [ 42  52   0   0   0   0   0   0]]


In [39]:
unique_tags = set(tags)
tag_index = {t:i for i, t in enumerate(unique_tags)}
index_tag = {i:t for i, t in enumerate(unique_tags)}
labels = [tag_index[i] for i in tags]
print(labels[0:10])

[13, 13, 13, 13, 13, 13, 13, 13, 13, 13]


In [40]:
labels = to_categorical(labels)
print(labels[0:10])

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]]


In [41]:
X, y = shuffle(sequences, labels, random_state=42)
X = np.array(X)
y = np.array(y)

In [44]:
model = Sequential([
    Embedding(input_dim=word_len+1, output_dim=64, input_length=maxlen),
    LSTM(128, return_sequences=True),
    LSTM(64),

    Dense(64, activation="relu"),
    Dropout(0.5),
    Dense(len(unique_tags), activation="softmax")
])



In [45]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [46]:
with tf.device("/GPU:0"):
  model.fit(X, y, batch_size=8, epochs=100)

Epoch 1/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - accuracy: 0.0508 - loss: 3.4026
Epoch 2/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.0516 - loss: 3.3907
Epoch 3/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.0262 - loss: 3.2950
Epoch 4/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0808 - loss: 3.1841
Epoch 5/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.1414 - loss: 3.0468
Epoch 6/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1311 - loss: 2.9091
Epoch 7/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.1425 - loss: 2.7208
Epoch 8/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.2567 - loss: 2.4431
Epoch 9/100
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━

In [47]:
model.summary()

In [48]:
model.save("Chatbot.h5")



In [69]:
with open('tokenizer.pkl', 'wb') as handle:
    pickle.dump(tokenizer, handle)

In [70]:
with open("index_tag.json", "w") as outfile:
    json.dump(index_tag, outfile)