In [198]:
## The Main Library we're going to use in this Project
import nltk
## We'll be trying three different Stemming Algorithms and Choose one of 'em at the end ' 
from nltk.stem.lancaster import LancasterStemmer
from nltk.stem.snowball import SnowballStemmer
from nltk.stem import WordNetLemmatizer

## to create and train the Neural Network we'll use Tensorflow library especially the Keras
from tensorflow.keras.layers import Dropout, Activation, Dense
from tensorflow.keras.models import Sequential
## bec of confusion we'll experiment both the SGD and Adam Optimizers together and choose one at the end
from tensorflow.keras.optimizers import SGD, Adam ## unfortunately there's no longer Momentem Optimizer available in  TF 2.0

## Importing the essential Packages to read some external files
import pickle
import json

## The last Two essential Libraries
import numpy as np
import random as rd

   # ................................................ Now We're Ready ................................................

# ====> Data Preprocessing <====

In [199]:
## firstly creating the proper lists 
words = []
classes = []
docs = []

## special list of Characters we're going to ignore
IGNORE = ['?', '!', '_', '-', '.'] ## that list will be updated later so stay tuned !!

## context manager to read the JSON file
intents = json.load(open('intents.json')) ## i've tried other method using [with] but it failed :(


In [200]:
## Tokenizing and preprocessing
## some traditional steps at the beginning
for intent in intents['intents']:
    for pattern in intent['patterns']: ## just take care , here we should use [intent] as we've already entered the big dict
        ## tokenize each word , it's used to to split paragraphs and sentences into smaller units that can be more easily assigned meaning
        w = nltk.word_tokenize(pattern, language='English') ## we'll choose the English language
        ## add 'em to the main list
        words.extend(w) ## we've used [extend] not [append] to be able to add multiple elements at the end of the list unlike append
        ## add the tags to the docs main list
        docs.append((w, intent['tag']))
        
        ## if we didn't find the tag in the labels we should add it
        ## since it's only a single element so no need to use [extend]
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# Debugging Line

In [201]:
## debugging
classes ## that's all the Topics we're able to discuss with the BOT

['greeting',
 'goodbye',
 'thanks',
 'options',
 'order_tracking',
 'order_components',
 'missing_id',
 'Location',
 'search_person_by_id',
 'appointment status',
 'check_leave',
 'cost_lowering',
 'forgot_password',
 'email_id',
 'manufacturing_problems',
 'search_department',
 'competitors_in_market',
 'key_customers',
 'supplier_info',
 'highest_grossing',
 'connect_people',
 'project_handling_queries',
 'solve_problems',
 'version_update',
 'HR_related_problem',
 'factors_impacting_sale',
 'predict_performance',
 'customer_satisfaction',
 'maintainence',
 'gadgets',
 'commission',
 'invalid',
 'noans',
 'turnover',
 'predict_delay',
 'name',
 'about',
 'configuration',
 'Weather',
 'leave',
 'hours',
 'cabin',
 'domain']

In [202]:
## something to explain before we dig on 
## there's difference between Lemmatizing and stemming
## Stemming uses the stem of the word, while lemmatization uses the context in which the word is being used
## we'll use both here :)

## our stemmers
#lanc = LancasterStemmer()
#Snow = SnowballStemmer(language='english')

## the only lemmetizer here
lemma = WordNetLemmatizer()

## we're going to make three copies to check which one is the best
#words_lan = words.copy() ## to Lancaster
#words_snow = words.copy() ## to Snowball
words_lemma = words.copy() ## to lemmatizer

## Now applying everthing on each one according to the conditions
## here we remove the duplicates using set() then make 'em as a list then sort 'em
#words_lan = [lanc.stem(w.lower()) for w in words_lan if w not in IGNORE]
#words_lan = sorted(list(set(words_lan)))
## once for snowball
#words_snow = [Snow.stem(w.lower()) for w in words_snow if w not in IGNORE]
#words_snow = sorted(list(set(words_snow)))
## another for lemmatizer
words_lemma = [lemma.lemmatize(w.lower()) for w in words_lemma if w not in IGNORE]
words_lemma = sorted(list(set(words_lemma)))

## sort the classes
classes = sorted(list(set(classes)))

# Debugging Line

In [203]:
print(f"We've {len(classes)} Labels and {len(docs)} Documents.\nWe got also {len(words_lan)} Words for Lancaster Stemmer\n,{len(words_snow)} Words for Snowball Stemmer \nand Finally {len(words_lemma)} Words for the Lemmatizer.")

We've 43 Labels and 121 Documents.
We got also 225 Words for Lancaster Stemmer
,225 Words for Snowball Stemmer 
and Finally 240 Words for the Lemmatizer.


In [204]:
## create files to store the objects that will be used later for prediction
## one file for each pack of words  
#pickle.dump(words_lan ,open('words_lan.pickle', 'wb')) 
#pickle.dump(words_snow ,open('words_snow.pickle', 'wb'))
pickle.dump(words_lemma ,open('words_lemma.pickle', 'wb')) 

pickle.dump(classes, open('labels.pickle', 'wb'))

# Create and Perform Training

In [205]:
## create our training list
training = []
## create an empty output list
output = [0] * len(classes)

for x, doc in enumerate(docs):
    ## intialize the bag of words 
    bag = [] ## that's the list where it tells us how frequent the word appears
    ## list of tokenized words 
    pattern_wrds = doc[0]
    
    ## now apply each lemmatization and stemmizer
    ##wrds_lan = [lanc.stem(w.lower()) for w in pattern_wrds]
    ##wrds_snow = [Snow.stem(w.lower()) for w in pattern_wrds]
    wrds_lemma = [lemma.lemmatize(w.lower()) for w in pattern_wrds]
    
    ## now building our bag list whether the word exists or not
    ## 1 --> if it exists , 0 --> if it's not the desired or selected tag
    ## we can't add all of these in the same [bag] list so we'll try each algorithm alone
    for _ in words_lemma:
        if _ in wrds_lemma:
            bag.append(1)
        else:
            bag.append(0)
    ## it'll be 0 if the tag doesn't match , else 1
    out_row = list(output)
    out_row[classes.index(doc[1])] = 1
    training.append([bag, out_row]) ## add all of this to the training list and create a multidimensional array
    
## time to shuffle features and convert it to Numoy arrays
## shuffle takes a sequence, like a list, and reorganize the order of the items
rd.shuffle(training)
training = np.array(training)

## now a simple step , create train,test sets in the form of x,y
X_train = list(training[:,0])
y_train = list(training[:,1])

## just a checking message to make sure it's completed
print('Training sets Created')

Training sets Created




# Debugging Line

In [206]:
training

array([[list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
        list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])],
       [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

# Create a Neural Network to get Predictions

In [207]:
## Neural Network with two relu activation functions and output SoftMax function
model = Sequential()

## adding two relu activation functions
model.add(Dense(128, input_shape=(len(X_train[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
## the out is traditionally with SoftMax
model.add(Dense(len(y_train[0]), activation='softmax'))

## compiling the loss function using crossEntropy Loss
## since we can't test both SGD and ADAM together we'll use each one alone and choose one at the end
adam = Adam(learning_rate=.001)
model.compile(loss='categorical_crossentropy', 
              optimizer=adam, metrics=['accuracy'])
## display the results
model.summary()

Model: "sequential_31"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_84 (Dense)            (None, 128)               30848     
                                                                 
 dropout_20 (Dropout)        (None, 128)               0         
                                                                 
 dense_85 (Dense)            (None, 64)                8256      
                                                                 
 dropout_21 (Dropout)        (None, 64)                0         
                                                                 
 dense_86 (Dense)            (None, 43)                2795      
                                                                 
Total params: 41,899
Trainable params: 41,899
Non-trainable params: 0
_________________________________________________________________


In [208]:
## Fiting the data and getting predictions
## after executing : SGD ==> max accuarcy 71%, Adam > 97%
hist = model.fit(np.array(X_train), np.array(y_train), epochs=200, batch_size=5, verbose=1)
model.save('chatbot.h5', hist)
print('\nModel is Successfully Created')

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

# Finishing Up

In [209]:
## Now the Model is successfully created and trained and ready
## the following code is used temporarily instead of GUI that we'll build it later
## list of many functions used for these stuff

## that function will be changed using another algorithms
def clean_sentence_up(sentence):
    ## as we know some traditional steps
    ## tokenaize the words
    wrds = nltk.word_tokenize(sentence)
    ## return stemmed each word
    return [lemma.lemmatize(w.lower()) for w in wrds]

## create bag of words array [0,1] even if it exists or not
def bow(sentence, wrds):
    ## applying the function above
    sw = clean_sentence_up(sentence)
    ## get bag of words, intialized as empty
    bag = [0] * len(wrds)
    for s in sw:
        for i,w in enumerate(wrds):
            ## if we found a compitable then add 1 to the list
            if s == w:
                bag[i] = 1
    return (np.array(bag))

## predict the target class
def predict(sentence, model):
    p = bow(sentence, words_lemma)
    res = model.predict(np.array([p]))[0]
    results = [[i,r] for i,r in enumerate(res) if r >.25] ## error = .25
    
    ## sort by strength of probability
    results.sort(key=lambda x:x[1], reverse=True)
    return_l = []
    
    for r in results:
        return_l.append({"intent":classes[r[0]], "probability": str(r[1])})
    return return_l

## get response from model
def get_response(ints, intents_json):
    tag = ints[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if (i['tag'] == tag) :
            result = rd.choice(i['responses'])
            break
    return result

## predict class and get response
def chatbotResponse(text):
    ints = predict(text, model)
    return get_response(ints, intents)

## the main function that will start the chat bot
def START():
    print("Bot: Hello Sir here is H723IU2 Model , I'm here to help you :)")
    while True:
        ip = str(input()).lower()
        if ip == "end" or ip == 'bye':
            print('See you soon')
            break
        if ip in IGNORE:
            print("Sorry i don't understand")
        else:
            print(f"Bot: {chatbotResponse(ip)}" + "\n")
            print("-"*50)

In [211]:
START()

Bot: Hello Sir here is H723IU2 Model , I'm here to help you :)
hello im omar 
Bot: Good to see you again

--------------------------------------------------
email_id
Bot: Can't Understand Your Question

--------------------------------------------------
can i get coffe?
Bot: please elaborate your question

--------------------------------------------------
im hungry
Bot: please elaborate your question

--------------------------------------------------
what's time now
Bot: Can't Understand Your Question

--------------------------------------------------
what the fuck
Bot: Recent news of Demonetisation & recession

--------------------------------------------------
kids
Bot: please elaborate your question

--------------------------------------------------
hours
Bot: Can't Understand Your Question

--------------------------------------------------
time in hours?
Bot: It is open from Monday-Saturday 9:00am-7:30pm

--------------------------------------------------
1+21?
Bot: please ela