# Chat Bot

### This is a **AI** based chat bot trained in a small dataset. 

## Importing all the required model

In [1]:
import tensorflow as tf
from tensorflow.keras import layers
import random
import numpy as np
import json
import spacy
import matplotlib.pyplot as plt

## Importing the user data

In [2]:
nlp = spacy.load("en_core_web_sm")

d = open('data.json')
json_data = json.load(d)
json_data

{'intends': [{'tag': ['greeting'],
   'question': ['hello', "what's up", 'hey', 'hi', 'hey there', 'greeting'],
   'answer': ['Hi!']},
  {'tag': ['name'],
   'question': ['what is your name',
    'introduce',
    'introduce yourself',
    'who',
    'name',
    'who are you',
    'may i have your name',
    'would you mind telling me your name',
    'how can i call you'],
   'answer': ['My name is Prajwal Bhandari']},
  {'tag': ['health'],
   'question': ['how are you',
    'fine',
    'how is your health',
    'health',
    'how are you feeling',
    'how are you doing now',
    'how is yours health been',
    'is everything okay about your health'],
   'answer': ["Yes I'm fine!"]},
  {'tag': ['situation'],
   'question': ['how is it going',
    'are you okay',
    'how do you do',
    'are you good',
    'how is your study going on'],
   'answer': ['Thikai!']},
  {'tag': ['address'],
   'question': ['where do you live',
    'home',
    'where is your home',
    'what is your address'

## Data Preprocessing

### Lemmanizing and Tokenizing the data

In [3]:
words = []
documents = []
classes = []



for intents in json_data['intends']:
    for question in intents['question']:
        
        
        doc = nlp(question.lower())
        text = [token.lemma_ for token in doc]
        
        
       
        words.extend(text)
        
        documents.append((text, intents["tag"]))
        
        if intents["tag"] not in list(classes):
            classes.append(intents["tag"])

        
        


        
        

In [4]:
words = sorted(set(words))
words

['I',
 'about',
 'address',
 'ask',
 'background',
 'base',
 'be',
 'bye',
 'call',
 'can',
 'care',
 'college',
 'could',
 'current',
 'currently',
 'do',
 'during',
 'education',
 'educational',
 'everything',
 'feel',
 'field',
 'fine',
 'from',
 'give',
 'go',
 'good',
 'goodbye',
 'greet',
 'have',
 'health',
 'hello',
 'hey',
 'hi',
 'home',
 'how',
 'introduce',
 'it',
 'later',
 'live',
 'locate',
 'location',
 'major',
 'may',
 'mind',
 'name',
 'next',
 'now',
 'okay',
 'on',
 'please',
 'see',
 'specialize',
 'study',
 'subject',
 'take',
 'tell',
 'texte',
 'there',
 'time',
 'university',
 'up',
 'what',
 'where',
 'which',
 'who',
 'would',
 'you',
 'your',
 'yours',
 'yourself']

In [5]:
documents

[(['hello'], ['greeting']),
 (['what', 'be', 'up'], ['greeting']),
 (['hey'], ['greeting']),
 (['hi'], ['greeting']),
 (['hey', 'there'], ['greeting']),
 (['greet'], ['greeting']),
 (['what', 'be', 'your', 'name'], ['name']),
 (['introduce'], ['name']),
 (['introduce', 'yourself'], ['name']),
 (['who'], ['name']),
 (['name'], ['name']),
 (['who', 'be', 'you'], ['name']),
 (['may', 'I', 'have', 'your', 'name'], ['name']),
 (['would', 'you', 'mind', 'tell', 'I', 'your', 'name'], ['name']),
 (['how', 'can', 'I', 'call', 'you'], ['name']),
 (['how', 'be', 'you'], ['health']),
 (['fine'], ['health']),
 (['how', 'be', 'your', 'health'], ['health']),
 (['health'], ['health']),
 (['how', 'be', 'you', 'feel'], ['health']),
 (['how', 'be', 'you', 'do', 'now'], ['health']),
 (['how', 'be', 'yours', 'health', 'be'], ['health']),
 (['be', 'everything', 'okay', 'about', 'your', 'health'], ['health']),
 (['how', 'be', 'it', 'go'], ['situation']),
 (['be', 'you', 'okay'], ['situation']),
 (['how', 'do

### Changing string data in numeric format

In [6]:
training_data =[]
output_empty = [0] * len(classes)
for document in documents:
    bag = []
   
    
    word_pattern = document[0]
            
    
    for word in words:
        bag.append(1) if word in word_pattern else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training_data.append([bag, output_row]) 
    
    


In [7]:
random.shuffle(training_data )


In [8]:
training_data = np.array(training_data)

train_x = list(training_data[:,0])
train_y = list(training_data[:,1])




  training_data = np.array(training_data)


## Creating our first model

In [9]:
inputs = layers.Input(shape = (len(train_x[0]),), dtype = tf.int32)

x = layers.Dense(264, activation = "relu")(inputs)
# lstm_rnn = layers.Bidirectional(layers.LSTM(512), name = "LSTM_layer")(first_dense)

x = layers.Dropout(0.5)(x)
x = layers.Dense(264, activation = "relu")(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(264, activation = "relu")(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(264, activation = "relu")(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(264, activation = "relu")(x)




outputs = layers.Dense(len(classes), activation = "softmax")(x)

model_0 = tf.keras.Model(inputs , outputs)

model_0.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 71)]              0         
                                                                 
 dense (Dense)               (None, 264)               19008     
                                                                 
 dropout (Dropout)           (None, 264)               0         
                                                                 
 dense_1 (Dense)             (None, 264)               69960     
                                                                 
 dropout_1 (Dropout)         (None, 264)               0         
                                                                 
 dense_2 (Dense)             (None, 264)               69960     
                                                                 
 dropout_2 (Dropout)         (None, 264)               0     

In [10]:
model_0.compile(loss = "categorical_crossentropy",
               optimizer = "adam",
               metrics = ["accuracy"])

model_0_history = model_0.fit(x = np.array(train_x), y = np.array(train_y), 
           epochs = 200 )

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

## Evaluating the data

In [11]:
val = "where do you study"

val_in_num = [0] * len(words)
for i ,word in enumerate(words):
        if word in val:
            val_in_num[i] = 1


In [12]:
pred = model_0.predict(np.array([val_in_num]))
pred



array([[7.6179288e-07, 1.2135545e-08, 2.1360047e-09, 1.2003484e-05,
        1.1935554e-05, 9.9978262e-01, 5.0141603e-05, 7.4370830e-05,
        6.8114517e-05]], dtype=float32)

In [13]:
pred = tf.argmax(pred ,1)
pred

<tf.Tensor: shape=(1,), dtype=int64, numpy=array([5], dtype=int64)>

In [14]:
classes

[['greeting'],
 ['name'],
 ['health'],
 ['situation'],
 ['address'],
 ['college'],
 ['education'],
 ['location'],
 ['goodbye']]

## Saving model and some data

In [15]:
import pickle

pickle.dump(words, open("words.p", "wb"))
pickle.dump(classes, open("classes.p", "wb"))

In [16]:
model_0.save('model.h5')