In [1]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
import numpy as np
import tflearn
import tensorflow as tf
import random
import json

Instructions for updating:
non-resource variables are not supported in the long term
curses is not supported on this machine (please install/reinstall curses for an optimal experience)


In [12]:
stemmer = LancasterStemmer() #stemmer to get stem of a word. ex. 'say' would be stem word of 'saying'.

In [13]:
# gives stemmed, tokenized words list from sentence pattern without words in ignore_words list
def clean_pattern(pattern, ignore_words):
    stemmed_pattern = []
    wrds = nltk.word_tokenize(pattern)
    for w in wrds:
        if w not in ignore_words:
            stemmed_pattern.append(stemmer.stem(w.lower()))
    return stemmed_pattern

In [14]:
with open('intents.json') as file:
    data = json.load(file)
print(data)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'How are you', 'Is anyone there?', 'Hello', 'Good day', 'Whats up'], 'responses': ['Hello!', 'Good to see you again!', 'Hi there, how can I help?'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['cya', 'See you later', 'Goodbye', 'I am Leaving', 'Have a Good day'], 'responses': ['Sad to see you go :(', 'Talk to you later', 'Goodbye!'], 'context_set': ''}, {'tag': 'age', 'patterns': ['how old', 'how old is tim', 'what is your age', 'how old are you', 'age?'], 'responses': ['I am 18 years old!', '18 years young!'], 'context_set': ''}, {'tag': 'name', 'patterns': ['what is your name', 'what should I call you', 'whats your name?'], 'responses': ['You can call me Tim.', "I'm Tim!", "I'm Tim aka Tech With Tim."], 'context_set': ''}, {'tag': 'shop', 'patterns': ['Id like to buy something', 'whats on the menu', 'what do you reccommend?', 'could i get something to eat'], 'responses': ['We sell chocolate chip cookies for $2!', 'Cookies ar

#### Training model stuff

In [17]:
# Trying to load dict of saved variables
try:  
    with open('data.pickle', 'rb') as file:
        stemmed_words, tags, ignore_words, X, y = pickle.load(file) 
        
except:
    # Some cleaning
    stemmed_words = []
    tags = []
    ignore_words = ['!', '?', '.']
    corpus = []

    for intent in data['intents']:
        for pattern in intent['patterns']:
            stemmed_pattern = clean_pattern(pattern, ignore_words)
            stemmed_words.extend(stemmed_pattern)
            corpus.append((stemmed_pattern, intent['tag']))
        if intent['tag'] not in tags:
            tags.append(intent['tag'])

    # remove duplicates and sort
    stemmed_words = sorted(list(set(stemmed_words)))
    tags = sorted(list(set(tags)))

    print(stemmed_words)
    print(tags)
    print(corpus)

    # Creating numeric features out of cleaned data
    X = []
    y = []
    for item in corpus:
        bag = [] #array of 1 and 0. 1 if stemmed word is present stemmed pattern
        stemmed_pattern = item[0]
        for w in stemmed_words:
            if w in stemmed_pattern:
                bag.append(1)
            else:
                bag.append(0)

        tags_row = [] #array of 1 and 0. 1 for current tag and for everything else 0.
        current_tag = item[1]
        for tag in tags:
            if tag == current_tag:
                tags_row.append(1)
            else:
                tags_row.append(0)

        #for each item in corpus, X will be array indicating stemmed words and y array indicating tags
        X.append(bag)
        y.append(tags_row) 

    X = np.array(X)
    y = np.array(y)
    print(X)
    print(y)

# saving some variables in pickle for future reference
import pickle
with open('data.pickle', 'wb') as file:
    pickle.dump((stemmed_words, tags, ignore_words, X, y), file)

# #-------------------------------------------------------------------------------------
# # Some cleaning
# stemmed_words = []
# tags = []
# ignore_words = ['!', '?', '.']
# corpus = []

# for intent in data['intents']:
#     for pattern in intent['patterns']:
#         stemmed_pattern = clean_pattern(pattern, ignore_words)
#         stemmed_words.extend(stemmed_pattern)
#         corpus.append((stemmed_pattern, intent['tag']))
#     if intent['tag'] not in tags:
#         tags.append(intent['tag'])

# # remove duplicates and sort
# stemmed_words = sorted(list(set(stemmed_words)))
# tags = sorted(list(set(tags)))

# print(stemmed_words)
# print(tags)
# print(corpus)

# # Creating numeric features out of cleaned data
# X = []
# y = []
# for item in corpus:
#     bag = [] #array of 1 and 0. 1 if stemmed word is present stemmed pattern
#     stemmed_pattern = item[0]
#     for w in stemmed_words:
#         if w in stemmed_pattern:
#             bag.append(1)
#         else:
#             bag.append(0)

#     tags_row = [] #array of 1 and 0. 1 for current tag and for everything else 0.
#     current_tag = item[1]
#     for tag in tags:
#         if tag == current_tag:
#             tags_row.append(1)
#         else:
#             tags_row.append(0)

#     #for each item in corpus, X will be array indicating stemmed words and y array indicating tags
#     X.append(bag)
#     y.append(tags_row) 

# X = np.array(X)
# y = np.array(y)
# print(X)
# print(y)

# # saving some variables in pickle for future reference
# import pickle
# with open('data.pickle', 'wb') as file:
#     pickle.dump((stemmed_words, tags, ignore_words, X, y), file)


In [18]:
tf.compat.v1.reset_default_graph() #Clears the default graph stack and resets the global default graph

# neural network's layers
network = tflearn.input_data(shape= [None, len(X[0])]) #input layer
network = tflearn.fully_connected(network, 8) #1st hidden layer
network = tflearn.fully_connected(network, 8) #2nd hidden layer
network = tflearn.fully_connected(network, len(y[0]), activation= 'softmax') #output layer
network = tflearn.regression(network)

model = tflearn.DNN(network, tensorboard_dir='tflearn_logs') #tensorboard_dir is path to store logs

In [21]:
try:
    model.load("chatbot_model.tflearn")
except:
    model.fit(X, y, n_epoch=80, batch_size=8, show_metric=True, shuffle= True) #n_epoch:no. of times model will see same data
    model.save("chatbot_model.tflearn")

# model.fit(X, y, n_epoch=500, batch_size=8, show_metric=True, shuffle= True) #n_epoch:no. of times model will see same data
# model.save("chatbot_model.tflearn")

INFO:tensorflow:Restoring parameters from F:\ChatBot\chatbot_model.tflearn


#### Chatbot stuff

In [20]:
model.load("chatbot_model.tflearn")

INFO:tensorflow:Restoring parameters from F:\ChatBot\chatbot_model.tflearn
