In [9]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
import numpy as np
import tflearn
import random
import json
from tensorflow.python.framework import ops
import pickle
stemmer = LancasterStemmer()

with open('intents.json') as file:
    data = json.load(file)
try:
    with open('data.pickle', 'rb') as f:
        words, labels, training, output = pickle.load(f)
except:
    words = []
    labels = []
    docs_x = []
    docs_y = []

    for intent in data['intents']:
            for pattern in intent['patterns']:
                # this gets all of the words individually
                wrds = nltk.word_tokenize(pattern)
                # extend can be used to add all items from a list to another list so we can get all of the tokenized words in here
                # to be saved
                words.extend(wrds)
                # all of the input messages (patterns) are saved here so we can compare this and words for later
                docs_x.append(wrds)
                # we record how many times a pattern appears in each tag e.g. if there are 4 strings in pattern in greeting
                # then this will start with the words greeting four times and so on. This causes it to be linked with docs_x and
                # contain the tag for each pattern that docs_x contains
                docs_y.append(intent['tag'])
            if intent['tag'] not in labels:
                # this records every tag we have e.g. greeting e.t.c
                labels.append(intent['tag'])

    # this turns them all to lower case and takes the stem of a word e.g. running -> run, this is so that similar words that are the
    # same can be removed e.g. if we hd runs and running then we would want to remove one but currently they are not the same
    words = [stemmer.stem(w.lower()) for w in words if w not in '?']

    # set() removes duplicates, list() converts it back to a list because set is its own data type and sorted() orders them alphabetically
    words = sorted(list(set(words)))

    labels = sorted(labels)
    training = []
    output = []

    out_empty = [0 for _ in range(len(labels))]
    for x, doc in enumerate(docs_x):
        bag = []
        wrds = [stemmer.stem(w) for w in doc]
        for w in words:
            if w in wrds:
                bag.append(1)
            else:
                bag.append(0)
        output_row = out_empty[:]
        output_row[labels.index(docs_y[x])] = 1

        training.append(bag)
        output.append(output_row)

    training = np.array(training)
    output = np.array(output)

    with open('data.pickle', 'wb') as f:
        pickle.dump((words, labels, training, output), f)
    
    
ops.reset_default_graph()

# we take all the words and then try to predict what response should be given to them. The output list has all of the
# corresponding repsonses to the words. 
net = tflearn.input_data(shape=[None, len(training[0])])
# 8 neurons, starts at the input data, fully connected
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
# the amount of neurons is the number of tags we have (this can be represented in any list in output_row)
net = tflearn.fully_connected(net, len(output[0]), activation='softmax')
# the makes our model have the property of regression
net = tflearn.regression(net)

model = tflearn.DNN(net)

try:
    model.load('model.tflearn')
except:
    model.fit(training, output, n_epoch=1000, batch_size=8, show_metric=True)
    model.save('model.tflearn')

def bag_of_words(s, words):
    # set the length of the amount of words we have to 0
    bag = [0 for _ in range(len(words))]
    
    # s_words is the typed in sentence
    s_words = nltk.word_tokenize(s)
    s_words = [stemmer.stem(word.lower()) for word in s_words]
    
    # if the word that we have in patterns is in the typed in sentence then we will replace it's position with a 1
    for se in s_words:
        for i, w in enumerate(words):
            if w == se:
                bag[i] = 1
    return np.array(bag)


def chat():
    print('Start talking with the bot! (type quit to stop)')
    while True:
        inp = input('You: ')
        if inp.lower() == 'quit':
            break
        results = model.predict([bag_of_words(inp, words)])[0]
        # which value is the highest and give the index of the value from labels
        results_index = np.argmax(results)
        # this gives us the label that was the most likely
        tag = labels[results_index]
        if results[results_index] > 0.7:
            for tg in data['intents']:
                if tg['tag'] == tag:
                    responses = tg['responses']
            print(random.choice(responses))
        else:
            print("I didn't get that, can you try again")
chat()

INFO:tensorflow:Restoring parameters from C:\Users\oscar\Jupyter\ML Course Basics\AI Chat Bot\model.tflearn
Start talking with the bot! (type quit to stop)
You: hello
Good to see you again
You: hello
Hi there, how can I help?
You: when are you open
Our hours are 9am-9pm every day
You: when are you opb
Our hours are 9am-9pm every day
You: dfgdfsag
I didn't get that, can you try again
You: what can iuu pay with
I didn't get that, can you try again
You: what can i pay with
I didn't get that, can you try again
You: pay
I didn't get that, can you try again
You: quit
