In [15]:
import json
import string
import random 
import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer 
import tensorflow as tf 
from tensorflow.keras import Sequential 
from tensorflow.keras.layers import Dense, Dropout



In [16]:
# used a dictionary to represent an intents JSON file
with open("../training_data/intents.json", "r", encoding='utf-8') as f:
    data = json.load(f)
    
print(data)

{'intents': [{'tag': 'greetings', 'patterns': ['Hi', 'Hey', 'Hello', 'Good day'], 'responses': ["Hello, I'm the University Chatbot"], 'context_set': 'hwru?'}, {'tag': 'howami', 'patterns': ['How are you?', 'How are you feeling?', "What's up?"], 'responses': ["I'm feeling great as usual, how about you?"], 'context_filter': 'hwru?', 'context_set': 'question'}, {'tag': 'imgood', 'patterns': ["I'm good", "I'm okay", "I'm feeling great as well"], 'responses': ["That's good! So how can I help you today?"], 'context_filter': 'question'}, {'tag': 'imnot', 'patterns': ['Feeling bad', 'Not okay', 'I feel sad', "I don't feel so good", 'depressed', 'sad', 'not good'], 'responses': ['Oh, I see. I hope everything will turn out well in the end. Anyway, what can I do for you?'], 'context_filter': 'question'}, {'tag': 'thanks', 'patterns': ['Thanks', 'Thank you', "That's helpful", "Thank's a lot!"], 'responses': ['Happy to help!', 'Any time!', 'My pleasure'], 'context_set': ''}, {'tag': 'closing', 'pat

In [17]:
# initializing lemmatizer to get stem of words
lemmatizer = WordNetLemmatizer()
# Each list to create
words = []
classes = []
doc_X = []
doc_y = []
# Loop through all the intents
# tokenize each pattern and append tokens to words, the patterns and
# the associated tag to their associated list
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        tokens = nltk.word_tokenize(pattern)
        words.extend(tokens)
        doc_X.append(pattern)
        doc_y.append(intent["tag"])
    
    # add the tag to the classes if it's not there already 
    if intent["tag"] not in classes:
        classes.append(intent["tag"])
# lemmatize all the words in the vocab and convert them to lowercase
# if the words don't appear in punctuation
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
# sorting the vocab and classes in alphabetical order and taking the # set to ensure no duplicates occur
words = sorted(set(words))
classes = sorted(set(classes))
print(words)

["'m", "'s", 'a', 'about', 'ac', 'accountancy', 'address', 'am', 'an', 'and', 'applicant', 'application', 'applying', 'arch', 'archi', 'architecture', 'are', 'art', 'available', 'b', 'ba', 'bad', 'beced', 'beed', 'bio', 'biology', 'bpe', 'bsarchi', 'bsba', 'bsba-em', 'bsba-hrdm', 'bsce', 'bsche', 'bscs', 'bsed', 'bsee', 'bsem', 'bsge', 'bshm', 'bsie', 'bsit', 'bsme', 'bsmls', 'bspolsci', 'bsswk', 'by', 'bye', 'can', 'checklist', 'com', 'computing', 'contact', 'could', 'course', 'curiculim', 'curiculum', 'currently', 'day', 'dean', 'depressed', 'description', 'do', 'doe', 'education', 'email', 'eng', 'engineer', 'engineering', 'enginering', 'engl', 'enroll', 'enrolled', 'enrollment', 'entrance', 'entrep', 'exam', 'examination', 'existing', 'failed', 'fee', 'feel', 'feeling', 'finance', 'finman', 'for', 'freshies', 'freshman', 'funny', 'good', 'goodbye', 'grade', 'great', 'have', 'hello', 'helpful', 'hey', 'hi', 'hotline', 'how', 'i', 'if', 'in', 'incoming', 'industrial', 'information', 

In [18]:
# list for training data
training = []
out_empty = [0] * len(classes)
# creating the bag of words model
for idx, doc in enumerate(doc_X):
    bow = []
    text = lemmatizer.lemmatize(doc.lower())
    for word in words:
        bow.append(1) if word in text else bow.append(0)
    # mark the index of class that the current pattern is associated
    # to
    output_row = list(out_empty)
    output_row[classes.index(doc_y[idx])] = 1
    # add the one hot encoded BoW and associated classes to training 
    training.append([bow, output_row])
# shuffle the data and convert it to an array
random.shuffle(training)
training = np.array(training, dtype=object)
# split the features and target labels
train_X = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))

In [19]:
# defining some parameters
input_shape = (len(train_X[0]),)
output_shape = len(train_y[0])
epochs = 200
# the deep learning model
model = Sequential()
model.add(Dense(128, input_shape=input_shape, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(output_shape, activation = "softmax"))
adam = tf.keras.optimizers.Adam(learning_rate=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=["accuracy"])
print(model.summary())
model.fit(x=train_X, y=train_y, epochs=200, verbose=1)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 128)               25984     
_________________________________________________________________
dropout_4 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_5 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 41)                2665      
Total params: 36,905
Trainable params: 36,905
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epo

Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epo

Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<tensorflow.python.keras.callbacks.History at 0x1badaa60fc8>

In [20]:
context = {}

def clean_text(text): 
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return tokens

def bag_of_words(text, vocab): 
    tokens = clean_text(text)
    bow = [0] * len(vocab)
    for w in tokens: 
        for idx, word in enumerate(vocab):
            if word == w: 
                bow[idx] = 1
    return np.array(bow)

def pred_class(text): 
    bow = bag_of_words(text, words)
    results = model.predict(np.array([bow]))[0]
    thresh = 0.2
    y_pred = [[idx, res] for idx, res in enumerate(results) if res > thresh]

    y_pred.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in y_pred:
        return_list.append((classes[r[0]], r[1]))
    return return_list

def get_response(text, userID='context', show_details=True): 
    results = pred_class(text)
    while results:
        for i in data["intents"]: 
            if i["tag"] == results[0][0]:
                if "context_filter" in i and "context_set" in i:
                    context[userID] = i["context_set"]
                    return random.choice(i["responses"])
                
                if "context_set" in i:
                    if show_details: print ('context:', i['context_set'])
                    context[userID] = i["context_set"]
                    print(context[userID])

                if not "context_filter" in i or (userID in context and "context_filter" in i and i["context_filter"] == context[userID]):
                    if show_details: print ('tag:', i['tag'])
                    return random.choice(i["responses"])
                
                
                
                else:
                    return "Sorry, I can't understand your query."

        return resultsss
        

In [None]:
from flask import Flask, render_template, request, redirect, url_for 

app = Flask(__name__)
app.static_folder = 'static'

@app.route("/")
def home():
    return render_template("index.html")
    
@app.route("/get")
def get_bot_response():
    userText = request.args.get('msg')
    return str(get_response(userText))

if __name__ == '__main__':
    app.run()


 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [21/Jul/2021 17:12:40] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [21/Jul/2021 17:12:41] "GET /static/styles/style.css HTTP/1.1" 304 -
127.0.0.1 - - [21/Jul/2021 17:12:43] "GET /get?msg=picture HTTP/1.1" 200 -


context: 

tag: test
