# <font color='cyan'>Chatbot API with Keras Model</font>

### Main challenges:
- Classify user input to recognise intent
- keep context

In [1]:
# Keras deep learning library to build classification model
# Lancaster stemming library used to collapse distinct word forms
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

In [None]:
# things we need for Tensorflow
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
import pandas as pd
import pickle
import random

Using TensorFlow backend.


In [None]:
'''
Chatbot intents & patterns to learn defined in a plain JSON file
Classification model can be created for small vocabulary
Need to build vocabulary > patterns processed
each word stemmed to produce generic root > help cover more combi for inputs
'''

### Read the JSON file

In [None]:
import json

with open('chat_intents.json') as intent_file:
    intents = json.load(intent_file)

In [None]:
nltk.download("punkt")

In [None]:
words = []
classes = []
documents = []
ignore_words = ['?']

#loop through each sentence in our intents patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in sentence
        w = nltk.word_tokenize(pattern)
        
        # add to words list
        words.extend(w)
        
        # add documents in corpus
        documents.append((w, intent['tag']))
        
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

# stem & lower ea word & remove dupes
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

# sort classes
classes = sorted(list(set(classes)))

# documents = combination between patterns & intents
print(len(documents), 'documents', documents)
# classes = intents
print(len(classes), "classes", classes)
# words = all words, vocab
print(len(words), "unique stemmed words", words)

In [None]:
'''
Training won't be run based on vocab of words (words meaningless for machine)
Need to translate words into bags of words with arr containing 0/1

Arr length will be equal to vocab size & 1 set when word from current
pattern is located in a given position
'''

In [None]:
# create our training data
training = []
# create an empty array for our output
output_empty = [0] * len(classes)

In [None]:
# training set, bag of words for ea sentence
for doc in documents:
    # initialise our bag of words
    bag = []
    #list tokenized words for the pattern
    pattern_words = doc[0]
    # stem ea word - create base word to represent related words
    # Please read stemming VS lemmatization
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    
    # create our bag of words arr with 1, if word match found in
    # current pattern
    length = 0
    for w in words:
        # bag.append(1) if w in pattern_words else bag.append(0)
        if w in pattern_words:
          bag.append(1)
          length += 1
        else:
          bag.append(0)
        
    # output is '0' for ea tag & '1' for current tag (for ea pattern)
    # output_row is basically the class/intents it falls in
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    
    # append length of sentence into training data
    # bag.append(length)
    
    training.append([bag, output_row])
    
    # print(training)

In [None]:
# shuffle our features & turn into np.array
random.shuffle(training)
training = np.array(training)

In [None]:
# create train & test lists. X - patterns, Y - intents
# training_x = [training[:,0], training[:,2]]
train_x = list(training[:,0])
train_y = list(training[:,1])
print(train_x)
print(len(train_x[0]))

In [None]:
#with open('model.pickle', 'wb') as f:
#  pickle.dump((words, labels, training))

In [None]:
'''
Training data - X (pattern converted into array [0,1,0,1...,0])
Y (intents converted into arr [1,0,0,0,..,0]), will be single 1 for intents arr

Model built on 3 layers

Classification output will be multiclass arr > help identify encoded intent

Use Softmax activation to produce multiclass classification output
(result returns arr of 0/1: [1,0,0...,0] - identifies encoded intent)
'''

## <font color='red'>**NOTE</font> <br>
Ok so Cael & Joe, <br>
This part below is basically what makes up a Neural network in ML

So basically a neural network has different layers in its hidden layer (main layer)
that tries to sift out features to identify what is going on and learn

In each layer we have number of neurons to help us know what's going on <br>
> Simple Eg. When you touch something you have nerves to tell you if you really touched something or not

Softmax is the activation function we're using to determine when to trigger these neurons <br>
> Eg. Did you really touch something?? How do you know? > Your nerves felt something and sent signals to your brain

Input shape = specify shape of your data its dealing with in 1st layer

In [None]:
# Create model - 3 layers
# 1st layer 128 neurons, 2nd layer 64 neurons & 3rd contains num of neurons
# equal to num of intents to predict output intent with softmax

model = Sequential()
model.add(Dense(128, input_shape = (len(train_x[0]), ), activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation = 'softmax'))

### Compile Keras model with SGD optimiser

In [None]:
# Compile model. Stochastic gradient descent with Nesterov accelerated
# gradient gives good results for this model

sgd = SGD(lr = 0.01, decay = 1e-6, momentum = 0.9, nesterov = True)
# print(dir(sgd))
# print(sgd.lr)
model.compile(loss = 'categorical_crossentropy', optimizer = sgd,
              metrics = ['accuracy'])

### Fit model

In [None]:
# Execute training & construct classification model

In [None]:
# Fit the model
# Epochs means iterations
model.fit(np.array(train_x), np.array(train_y), epochs = 200,
          batch_size = 5, verbose = 1)

### Define helper functions

In [None]:
def clean_up_sentence(sentence):
    # tokenize the pattern - split words into arr
    sentence_words = nltk.word_tokenize(sentence)
    # stem ea word - create short form for word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

In [None]:
# Translate user sentence into bag of words with arr 0/1
# 0 or 1 for ea word in bag that exists in the sentence
def bow(sentence, words, show_details = True):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # Initialise bag of words - matrix of N words, vocab matrix
    bag = [0] * len(words)
    
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                # assign 1 if current word in vocab position
                bag[i] = 1
                if show_details:
                    print(f"Found in bag: {w}")
                    
    # bag.append(len(sentence_words))
    return np.array(bag)

### Example translating sentence into bag of words

In [None]:
p = bow("what is cca", words)
print(p)
print(classes)

In [None]:
'''
Good practice to save trained model into pickle file to reuse to publish
through Flask REST API
'''

In [None]:
# Use pickle to load in pre-trained model
# global graph
# graph = tf.get_default_graph()

# with open(f"katana-assistant-model.pkl", 'rb') as f:
#     model = pickle.load(f)

In [None]:
'''
before publishing model through Flask REST API, always good to run extra test
use model.predict to classify user input & based on calculated
probability return intent (multiple intents can be returned)
'''

In [None]:
def classify_local(sentence):
    ERROR_THRESHOLD = 0.25
    
    # generate probabilities from model
    input_data = pd.DataFrame([bow(sentence, words)], dtype = float,
                              index = ['input'])
    print([input_data])
    
    results = model.predict([input_data])[0]
    print(results)
    # filter predictions below a threshold & provide intent index
    
#     for i, r in enumerate(results):
#       print(i)
#       print(r)
#       print(r > ERROR_THRESHOLD)
#       print()
    
    results = [[i, r] for i, r in enumerate(results) if r > ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key = lambda x: x[1], reverse = True)
    
    return_list = []
    for r in results:
        # print(r)
        return_list.append((classes[r[0]], str(r[1])))
    # return tuple of intent & probability
    return return_list

In [None]:
classify_local('how to create a cca')

### Publish same function through REST endpoint