In [14]:
import nltk
import numpy as np
import pickle
import random

In [15]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [16]:
#Importing NLTK libraries...
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords 

In [17]:
#Reading file...
import json
with open('intents.json', 'r') as file:
  intents = json.load(file)

In [18]:
#init classes...
words = []
classes = []
documents = []
ignore_words = ["?","!","."]

In [19]:
#Tokenization...
for intent in intents['intents']:
  for pattern in intent['patterns']:
    tokenized_pattern = word_tokenize(pattern)
    #extending words...basically creating corpus...
    words.extend(tokenized_pattern)
    #creating docs like (['how','are','you'],'greeting')
    documents.append((tokenized_pattern, intent['tag']))
    #basically creating a list of tags called classes....
    if(intent['tag'] not in classes):
      classes.append(intent['tag'])

In [20]:
#Lemmatizing and cleaning corpus...
lemmatizer = WordNetLemmatizer()
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in ignore_words]

words=sorted(list(set(words)))

classes = sorted(list(set(classes)))

In [21]:
# creating pickle file for corpus ie words and for tags ie classes
pickle.dump(words, open('words.pkl','wb'))
pickle.dump(classes, open('classes.pkl','wb'))

In [22]:
training = []
output_empty = [0] * len(classes)

for doc in documents:
  #bag of wordss..
  bag = []

  #taking out intent pattern for that particular intent type...
  pattern_of_words = doc[0]

  #lemmatizing those patterns - create base word, in attempt to represent related words
  pattern_of_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_of_words]

  # create our bag of words array with 1, if word match found in current pattern
  for word in words:
    bag.append(1) if word in pattern_of_words else bag.append(0)
  # output is a '0' for each tag and '1' for current tag (for each pattern)
  output_row = list(output_empty)
  output_row[classes.index(doc[1])]=1

  training.append([bag, output_row])

In [23]:
random.shuffle(training)
training = np.array(training)

#create training data..
features = list(training[:,0])
target = list(training[:,1])

  


In [24]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.optimizers import SGD

In [25]:
#Created a 3 layered Sequential Neural Network with input of the array of each bag of word list...
#And output as each target layer 8 tag output to predict....

model = Sequential()

model.add(Dense(128, activation='relu', input_shape=(len(features[0]),)))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(target[0]), activation="softmax"))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               10752     
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 10)                650       
                                                                 
Total params: 19,658
Trainable params: 19,658
Non-trainable params: 0
_________________________________________________________________


In [26]:
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(learning_rate=0.01,momentum=0.9,nesterov=True,decay=1e-6)
model.compile(optimizer=sgd,loss='categorical_crossentropy',metrics=['accuracy'])

In [27]:
#fitting and saving model...
fittedModel = model.fit(np.array(features),np.array(target), epochs=57, batch_size=5,verbose=2)
model.save('chatbot_model_V1.h5',fittedModel)

Epoch 1/57
12/12 - 1s - loss: 2.3154 - accuracy: 0.1379 - 521ms/epoch - 43ms/step
Epoch 2/57
12/12 - 0s - loss: 2.1776 - accuracy: 0.2069 - 17ms/epoch - 1ms/step
Epoch 3/57
12/12 - 0s - loss: 2.1113 - accuracy: 0.2586 - 15ms/epoch - 1ms/step
Epoch 4/57
12/12 - 0s - loss: 2.0344 - accuracy: 0.3276 - 17ms/epoch - 1ms/step
Epoch 5/57
12/12 - 0s - loss: 1.9199 - accuracy: 0.2931 - 18ms/epoch - 2ms/step
Epoch 6/57
12/12 - 0s - loss: 1.7917 - accuracy: 0.4138 - 15ms/epoch - 1ms/step
Epoch 7/57
12/12 - 0s - loss: 1.7592 - accuracy: 0.3793 - 17ms/epoch - 1ms/step
Epoch 8/57
12/12 - 0s - loss: 1.5681 - accuracy: 0.5172 - 16ms/epoch - 1ms/step
Epoch 9/57
12/12 - 0s - loss: 1.5138 - accuracy: 0.5345 - 18ms/epoch - 2ms/step
Epoch 10/57
12/12 - 0s - loss: 1.4097 - accuracy: 0.5000 - 14ms/epoch - 1ms/step
Epoch 11/57
12/12 - 0s - loss: 1.3118 - accuracy: 0.5517 - 19ms/epoch - 2ms/step
Epoch 12/57
12/12 - 0s - loss: 1.1536 - accuracy: 0.6724 - 20ms/epoch - 2ms/step
Epoch 13/57
12/12 - 0s - loss: 1.24

In [53]:
inputFromClient = 'Hello how are you'
client_corpus = []
for word in words:
    client_corpus.append(1) if word in inputFromClient else client_corpus.append(0)

print(len(client_corpus))
client_corpus = np.array(client_corpus).reshape(1,-1)
client_corpus.shape

[0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]
83


(1, 83)

In [52]:
res = model.predict(client_corpus)[0]
ERROR_THRESHOLD = 0.25
results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
# sort by strength of probability
results.sort(key=lambda x: x[1], reverse=True)
return_list = []
for r in results:
    return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
return_list

[{'intent': 'greeting', 'probability': '0.85362005'}]

In [30]:
!pip install "fastapi[all]"



In [31]:
from fastapi import FastAPI

In [98]:
#app = FastAPI()

#@app.get("/predict")
def processRequest(inputFromClient):
  client_corpus = []
  #client_corpus = bow(inputFromClient, client_corpus)
  for word in words:
    client_corpus.append(1) if word in inputFromClient else client_corpus.append(0)
  client_corpus = np.array(client_corpus).reshape(1,-1)
  intent_list = predictIntent(client_corpus)
  print(getResponse(intent_list,intents))

# processing user input and creating bag of words...
def bow(inputFromClient, client_corpus):
  for word in words:
    client_corpus.append(1) if word in inputFromClient else client_corpus.append(0)
    return client_corpus

# predicting intents here...
#load classes/tags pickle file before executing this function...
def predictIntent(client_corpus):
  #load model here....
  #----------------
  res = model.predict(client_corpus)[0]
  ERROR_THRESHOLD = 0.25
  results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
  # sort by strength of probability
  results.sort(key=lambda x: x[1], reverse=True)
  return_list = []
  for r in results:
      return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
  print(return_list)
  return return_list

#generating response based on predicted intent...
#load intents json file before running this function...
def getResponse(intent_list,json):
  tag = intent_list[0]['intent']
  list_of_intents = json['intents']
  for intent in list_of_intents:
    if intent['tag'] == tag:
      result = random.choice(intent['responses'])
      return result


In [100]:
while(True):
  userInput = input('Say Something: ')
  processRequest(userInput)

KeyboardInterrupt: ignored