In [1]:
import json

with open("intents.json") as file:
    data = json.load(file)

### Data Preprocessing

In [2]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

def stemming():
    words = []
    labels = [] # Holds all possible tags
    patterns_dict = {} # Holds all patterns and their relative tags
    
    for intent in data["intents"]:
        for pattern in intent["patterns"]:
             # Breaks down patterns into their word substrings
            tokenized_patterns = nltk.word_tokenize(pattern)
            
            words.extend(tokenized_patterns)
            patterns_dict[tuple(tokenized_patterns)] = intent["tag"]

            if intent["tag"] not in labels:
                labels.append(intent["tag"])

    # Breaks all words down to their root word (e.g. programmer, programming -> program)
    words = [stemmer.stem(w.lower()) for w in words if w != "?"]
    words = sorted(list(set(words)))
    labels = sorted(labels)

    return words, labels, patterns_dict

words, labels, patterns_dict = stemming()

print(f"All stemmed words = {words}")
print(f"\n All labels = {labels}")
print(f"\n All patterns and their respective tags = {patterns_dict}")

All stemmed words = ["'d", "'s", 'a', 'about', 'am', 'amaz', 'ar', 'aw', 'bad', 'bye', 'cal', 'cat', 'cool', 'day', 'do', 'fantast', 'good', 'goodby', 'gre', 'greet', 'hav', 'hello', 'hey', 'hi', 'i', 'is', 'lat', 'leav', 'lik', 'nam', 'sad', 'see', 'should', 'talk', 'to', 'what', 'yo', 'you']

 All labels = ['cat', 'goodbye', 'greeting', 'name', 'negative', 'positive', 'talk']

 All patterns and their respective tags = {('hi',): 'greeting', ('hello',): 'greeting', ('greetings',): 'greeting', ('good', 'day'): 'greeting', ('hey',): 'greeting', ('bye',): 'goodbye', ('see', 'you', 'later'): 'goodbye', ('goodbye',): 'goodbye', ('i', 'am', 'Leaving'): 'goodbye', ('have', 'a', 'good', 'day'): 'goodbye', ('do', 'you', 'like', 'cats'): 'cat', ('cats', 'are', 'cool'): 'cat', ('what', 'is', 'your', 'name'): 'name', ('what', 'should', 'I', 'call', 'you'): 'name', ('what', "'s", 'your', 'name', '?'): 'name', ('i', "'d", 'like', 'to', 'talk'): 'talk', ('what', 'to', 'talk', 'about'): 'talk', ('good

In [3]:
import numpy as np

# Converts words data into numerical values
def one_hot_encoding(words, labels, patterns):
    train = []
    output = []
    
    for x, pattern in enumerate(list(patterns_dict.keys())):
        bag = []
        stemmed_words = [stemmer.stem(w) for w in pattern]

        # If the word is present, add 1 to the bag, otherwise 0
        for w in words:
            if w in stemmed_words:
                bag.append(1)
            else:
                bag.append(0)

        output_row = np.zeros(len(labels))
        output_row[labels.index(list(patterns.values())[x])] = 1

        train.append(bag)
        output.append(output_row)

    return np.array(train), np.array(output)
        
train, output = one_hot_encoding(words, labels, patterns_dict)

In [5]:
import tensorflow as tf
import tflearn as tfl

def model():
    tf.compat.v1.reset_default_graph()
    nn = tfl.input_data(shape=[None, len(train[0])])
    
    # NN has 2 hidden layers, with 8 nuerons each.
    nn = tfl.fully_connected(nn, 8)
    nn = tfl.fully_connected(nn, 8)

    # Specifies the number of nuerons in the output layer and the activation function used
    nn = tfl.fully_connected(nn, len(output[0]), activation="softmax") # Probability for each 
    nn = tfl.regression(nn)
    
    model = tfl.DNN(nn)
    model.fit(train, output, n_epoch = 1000, batch_size = 8, show_metric = False)
    model.save("model.tflearn")

model()

Training Step: 2999  | total loss: [1m[32m0.00389[0m[0m | time: 0.002s
| Adam | epoch: 1000 | loss: 0.00389 -- iter: 16/24
Training Step: 3000  | total loss: [1m[32m0.00368[0m[0m | time: 0.003s
| Adam | epoch: 1000 | loss: 0.00368 -- iter: 24/24
--
INFO:tensorflow:C:\Users\amber\Github\BasicChatbot\model.tflearn is not in all_model_checkpoint_paths. Manually adding it.
