In [2]:
from google.colab import drive
drive.mount('/content/drive')
data_root = '/content/drive/My Drive/Chatbot'

Mounted at /content/drive


In [3]:
import json
import string
import random
import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [5]:
data_file = open(data_root + '/intents.json').read()
data = json.loads(data_file)

In [7]:
words = []
classes = []
data_X = []
data_y = []

for intent in data["intents"]:
  for pattern in intent["patterns"]:
    tokens = nltk.word_tokenize(pattern)
    words.extend(tokens)
    data_X.append(pattern)
    data_y.append(intent["tag"])

  if intent["tag"] not in classes:
    classes.append(intent["tag"])

lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
words = sorted(set(words))
classes = sorted(set(classes))

In [8]:
training = []
out_empty = [0] * len(classes)

for idx, doc in enumerate(data_X):
  bow = []
  text = lemmatizer.lemmatize(doc.lower())
  for word in words:
    bow.append(1) if word in text else bow.append(0)

  output_row = list(out_empty)
  output_row[classes.index(data_y[idx])] = 1
  training.append([bow, output_row])

random.shuffle(training)
training = np.array(training, dtype = object)
train_X = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))

In [18]:
model = Sequential()
model.add(Dense(128, input_shape = (len(train_X[0]), ), activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation = "relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation = "softmax"))
adam = tf.keras.optimizers.legacy.Adam(learning_rate = 0.001, decay = 1e-6)
model.compile(loss= "categorical_crossentropy",
              optimizer = adam,
              metrics = ["accuracy"])
print(model.summary())
model.fit(x = train_X, y = train_y, epochs = 100, verbose = 1)


Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_22 (Dense)            (None, 128)               7552      
                                                                 
 dropout_14 (Dropout)        (None, 128)               0         
                                                                 
 dense_23 (Dense)            (None, 64)                8256      
                                                                 
 dropout_15 (Dropout)        (None, 64)                0         
                                                                 
 dense_24 (Dense)            (None, 16)                1040      
                                                                 
Total params: 16848 (65.81 KB)
Trainable params: 16848 (65.81 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/100
E

<keras.src.callbacks.History at 0x7bb33e6abdf0>

In [29]:
def clean_text(text):
  tokens = nltk.word_tokenize(text)
  tokens = [lemmatizer.lemmatize(word) for word in tokens]
  return tokens

def bag_of_words(text, vocab):
  tokens = clean_text(text)
  bow = [0] * len(vocab)
  for w in tokens:
    for idx, word in enumerate(vocab):
      if word == w:
        bow[idx] = 1
  return np.array(bow)

def pred_class(text, vocab, levels):
  bow = bag_of_words(text, vocab)
  result = model.predict(np.array([bow]))[0]
  thresh = 0.5
  y_pred = [[indx, res] for indx, res in enumerate(result) if res > thresh]
  y_pred.sort(key = lambda x: x [1], reverse = True)
  return_list = []
  for r in y_pred:
    return_list.append(classes[r[0]])
  return return_list


In [30]:
def get_response(intents_list, intents_json):
  if len(intents_list) == 0:
    result = "Sorry! I do not understand"
  else:
    tag = intents_list[0]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents:
      if i["tag"] == tag:
        result = random.choice(i["responses"])
        break
  return result

In [33]:
print("Press 0 if you want to stop talking to the chatbot")
while True:
  message = input(" ")
  if message == "0":
    break
  intents = pred_class(message, words, classes)
  result = get_response(intents, data)
  print(result)


Press 0 if you want to stop talking to the chatbot
 0
