<a href="https://colab.research.google.com/github/SaurabhSRP/04-NLP-based-Projects/blob/main/Chatbot%20using%20NLTK%20and%20Keras/Chatbot_using_NLTK_and_Keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import nltk
nltk.download("punkt") #used for tokenization
nltk.download('wordnet') #used for lemmatization
nltk.download('omw-1.4')
from nltk.stem import WordNetLemmatizer
lemmatizer=WordNetLemmatizer()
import json
import pickle
import numpy as np


from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
from keras.models import load_model
import random

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


In [2]:
##load our JSON file
json_file=open('/content/intents.json')
intents=json.load(json_file)

In [3]:
intents

{'intents': [{'tag': 'greeting',
   'patterns': ['Hi there',
    'How are you',
    'Is anyone there?',
    'Hey',
    'Hola',
    'Hello',
    'Good day'],
   'responses': ['Hello, thanks for asking',
    'Good to see you again',
    'Hi there, how can I help?'],
   'context': ['']},
  {'tag': 'goodbye',
   'patterns': ['Bye',
    'See you later',
    'Goodbye',
    'Nice chatting to you, bye',
    'Till next time'],
   'responses': ['See you!', 'Have a nice day', 'Bye! Come back again soon.'],
   'context': ['']},
  {'tag': 'thanks',
   'patterns': ['Thanks',
    'Thank you',
    "That's helpful",
    'Awesome, thanks',
    'Thanks for helping me'],
   'responses': ['Happy to help!', 'Any time!', 'My pleasure'],
   'context': ['']},
  {'tag': 'noanswer',
   'patterns': [],
   'responses': ["Sorry, can't understand you",
    'Please give me more info',
    'Not sure I understand'],
   'context': ['']},
  {'tag': 'options',
   'patterns': ['How you could help me?',
    'What you can do

#**Preprocess the data**

In [4]:
words=[] #this list consists of patterns
classes=[] #this list consists of Tags
documents=[]
ignore_punctuation=['?','!',',']


In [5]:
for intent in intents['intents']:
  for pattern in intent['patterns']:
    #tokenize
    word_token=nltk.word_tokenize(pattern)
    words.extend(word_token) ##adding words to words=[] list
    documents.append((word_token,intent['tag']))

    if intent['tag'] not in classes:
      classes.append(intent['tag'])  ##adding all tags to classes=[] list


In [6]:
print(documents)

[(['Hi', 'there'], 'greeting'), (['How', 'are', 'you'], 'greeting'), (['Is', 'anyone', 'there', '?'], 'greeting'), (['Hey'], 'greeting'), (['Hola'], 'greeting'), (['Hello'], 'greeting'), (['Good', 'day'], 'greeting'), (['Bye'], 'goodbye'), (['See', 'you', 'later'], 'goodbye'), (['Goodbye'], 'goodbye'), (['Nice', 'chatting', 'to', 'you', ',', 'bye'], 'goodbye'), (['Till', 'next', 'time'], 'goodbye'), (['Thanks'], 'thanks'), (['Thank', 'you'], 'thanks'), (['That', "'s", 'helpful'], 'thanks'), (['Awesome', ',', 'thanks'], 'thanks'), (['Thanks', 'for', 'helping', 'me'], 'thanks'), (['How', 'you', 'could', 'help', 'me', '?'], 'options'), (['What', 'you', 'can', 'do', '?'], 'options'), (['What', 'help', 'you', 'provide', '?'], 'options'), (['How', 'you', 'can', 'be', 'helpful', '?'], 'options'), (['What', 'support', 'is', 'offered'], 'options'), (['How', 'to', 'check', 'Adverse', 'drug', 'reaction', '?'], 'adverse_drug'), (['Open', 'adverse', 'drugs', 'module'], 'adverse_drug'), (['Give', 'm

In [7]:
classes 

['greeting',
 'goodbye',
 'thanks',
 'options',
 'adverse_drug',
 'blood_pressure',
 'blood_pressure_search',
 'pharmacy_search',
 'hospital_search']

In [8]:
##Lemmatize,lower each word,remove duplicates and punctuation
words=[lemmatizer.lemmatize(word.lower()) for word in words if word not in ignore_punctuation]
words=sorted(list(set(words))) #set will remove all duplicates and give us a list of all unique words

In [9]:
#Sort the classes
classes=sorted(list(set(classes)))

In [10]:
##Pickle them for later use
pickle.dump(words,open('words.pkl','wb'))
pickle.dump(classes,open('classes.pkl','wb'))

#**Create train and test data**

In [11]:
train=[]
#create empty array for output
output_empty=[0]*len(classes)

for doc in documents:
  bow=[]  #initialize bag of words
  pattern_words=doc[0] #doc[0] takes only the pattern from documents list
  pattern_words=[lemmatizer.lemmatize(word.lower()) for word in pattern_words]
  #create BOW array with 1 if word match found in current patten
  for w in words:
    bow.append(1) if w in pattern_words else bow.append(0) 


  output_row=list(output_empty)
  output_row[classes.index(doc[1])]=1 #output is zero for each tag and '1' for current tag for each pattern

  train.append([bow,output_row])

In [12]:
# shuffle our features and turn into np.array
random.shuffle(train)
train = np.array(train)
# create train and test lists. X - patterns, Y - intents
X_train = list(train[:,0])
y_train = list(train[:,1])

  train = np.array(train)


#**Build Neural Network**

In [13]:
model = Sequential()
model.add(Dense(128, input_shape=(len(X_train[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(y_train[0]), activation='softmax'))
# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
#fitting and saving the model 
hist = model.fit(np.array(X_train), np.array(y_train), epochs=200, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)

Epoch 1/200


  super(SGD, self).__init__(name, **kwargs)


Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 7

#**Chatbot**

In [14]:
#you can create a seperate file where you can laod all the pickle dumps such as 
#intents = json.loads(open('intents.json').read())
#words = pickle.load(open('words.pkl','rb'))
#classes = pickle.load(open('classes.pkl','rb'))

In [15]:
##to predict the classes we will need to provide the input the same way we did for training module
def clean_sentence(sentence):
  sentence_tokens=nltk.word_tokenize(sentence)
  sentence_tokens=[lemmatizer.lemmatize(word.lower()) for word in sentence_tokens]
  return sentence_tokens


In [16]:
#for creating the bow 
def bow(sentence,words,show_details=True):
  sentence_words=clean_sentence(sentence)

  bow=[0]*len(words)

  for sent in sentence_words:
    for i,w in enumerate(words):
      if w==sent:
        bow[i]=1
        if show_details:
          print("Found in bow: %s" %w)
  return(np.array(bow))


In [17]:
#filter out prediction below a threshold
def predict_class(sentence,model):
  pred=bow(sentence,words,show_details=False)
  result=model.predict(np.array([pred]))[0]
  ERROR_THRESHOLD=0.25
  results=[[i,r] for i,r in enumerate(result) if r>ERROR_THRESHOLD]  

  results.sort(key=lambda x:x[1],reverse=True)
  return_list=[]

  for r in results:
    return_list.append({'intent':classes[r[0]],"probability":str(r[1])})
  return return_list      

In [18]:
intents_json=intents
def getResponse(ints,intents):
  tag=ints[0]['intent']
  list_of_intents=intents['intents']
  for i in list_of_intents:
    if (i['tag']==tag):
      result=random.choice(i['responses'])
      break
  return result

In [19]:
def chatbot_response(text):
  ints=predict_class(text,model)
  res=getResponse(ints,intents)
  return res

In [20]:
while True:
  message=input("")
  if message=='quit':
    break
  else:
     print(chatbot_response(message))

Hi
Good to see you again
quit
