In [1]:
import json

with open("intents.json") as file:
    data = json.load(file)

### Data Preprocessing

In [2]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

def stemming():
    words = []
    labels = [] # Holds all possible tags
    patterns_dict = {} # Holds all patterns and their relative tags
    
    for intent in data["intents"]:
        for pattern in intent["patterns"]:
             # Breaks down patterns into their word substrings
            tokenized_patterns = nltk.word_tokenize(pattern)
            
            words.extend(tokenized_patterns)
            patterns_dict[tuple(tokenized_patterns)] = intent["tag"]

            if intent["tag"] not in labels:
                labels.append(intent["tag"])

    # Breaks all words down to their root word (e.g. programmer, programming -> program)
    words = [stemmer.stem(w.lower()) for w in words if w != "?"]
    words = sorted(list(set(words)))
    labels = sorted(labels)

    return words, labels, patterns_dict

words, labels, patterns_dict = stemming()

print(f"All stemmed words = {words}")
print(f"\n All labels = {labels}")
print(f"\n All patterns and their respective tags = {patterns_dict}")

All stemmed words = ["'d", "'s", 'a', 'about', 'am', 'amaz', 'ar', 'aw', 'bad', 'been', 'bye', 'cal', 'cat', 'cool', 'day', 'do', 'doing', 'fantast', 'going', 'good', 'goodby', 'gre', 'greet', 'hav', 'hello', 'hey', 'hi', 'how', 'i', 'is', 'it', 'lat', 'leav', 'lik', 'nam', 'sad', 'see', 'should', 'talk', 'to', 'up', 'what', 'yo', 'you']

 All labels = ['cat', 'goodbye', 'greeting', 'how are you', 'name', 'negative', 'positive', 'talk']

 All patterns and their respective tags = {('hi',): 'greeting', ('hello',): 'greeting', ('greetings',): 'greeting', ('good', 'day'): 'greeting', ('hey',): 'greeting', ('bye',): 'goodbye', ('see', 'you', 'later'): 'goodbye', ('goodbye',): 'goodbye', ('i', 'am', 'Leaving'): 'goodbye', ('have', 'a', 'good', 'day'): 'goodbye', ('how', 'are', 'you'): 'how are you', ('how', 'have', 'you', 'been'): 'how are you', ('how', 'are', 'you', 'doing'): 'how are you', ('what', "'s", 'up'): 'how are you', ('how', "'s", 'it', 'going'): 'how are you', ('do', 'you', 'like

In [3]:
import numpy as np

# Converts words data into numerical values
def one_hot_encoding(words, labels, patterns):
    train = []
    output = []
    
    for x, pattern in enumerate(list(patterns_dict.keys())):
        bag = []
        stemmed_words = [stemmer.stem(w) for w in pattern]

        # If the word is present, add 1 to the bag, otherwise 0
        for w in words:
            if w in stemmed_words:
                bag.append(1)
            else:
                bag.append(0)

        output_row = np.zeros(len(labels))
        output_row[labels.index(list(patterns.values())[x])] = 1

        train.append(bag)
        output.append(output_row)

    return np.array(train), np.array(output)
        
train, output = one_hot_encoding(words, labels, patterns_dict)

### Creating The Model

In [4]:
import tensorflow as tf
import tflearn as tfl

def create_model():
    tf.compat.v1.reset_default_graph()
    nn = tfl.input_data(shape=[None, len(train[0])])
    
    # NN has 2 hidden layers, with 8 nuerons each.
    nn = tfl.fully_connected(nn, 8)
    nn = tfl.fully_connected(nn, 8)

    # Specifies the number of nuerons in the output layer and the activation function used
    nn = tfl.fully_connected(nn, len(output[0]), activation="softmax") 
    nn = tfl.regression(nn)
    
    model = tfl.DNN(nn)
    model.fit(train, output, n_epoch = 1000, batch_size = 8, show_metric = True)
    model.save("model.tflearn")

    return model

model = create_model()

Training Step: 3999  | total loss: [1m[32m0.00343[0m[0m | time: 0.004s
| Adam | epoch: 1000 | loss: 0.00343 - acc: 1.0000 -- iter: 24/29
Training Step: 4000  | total loss: [1m[32m0.00334[0m[0m | time: 0.006s
| Adam | epoch: 1000 | loss: 0.00334 - acc: 1.0000 -- iter: 29/29
--
INFO:tensorflow:C:\Users\amber\Github\BasicChatbot\model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


In [5]:
import random

# Turns input from user into a bag of words
def predict(input, words, model):
    # Creates bag of words for input from user
    bag = np.zeros(len(words))

    tokenized_input = nltk.word_tokenize(input)
    stemmed_input = [stemmer.stem(w.lower()) for w in tokenized_input]

    for i in stemmed_input:
        for j, k in enumerate(words):
            if k == i:
                bag[j] = 1

    # Passes the input to the model to use for prediction.
    # Then retrieves and return one of the most likely correct responses.
    results = model.predict(bag.reshape(1, len(words)))
    results_index = np.argmax(results)
    tag = labels[results_index]

    for intent in data["intents"]:
            if intent['tag'] == tag:
                responses = intent['responses']
                
    return (random.choice(responses))
    
print(predict("hi", words, model))

Hey!


### Running the Chatbot & Sample Conversation

In [None]:
def chat():
    print("Hello, I am Catabot! Feel free to ask me anything!")
    
    while(True):
        user_input = input()

        if(user_input == "q"):
            break
            
        print(predict(user_input.lower(), words, model))

chat()

Hello, I am Catabot! Feel free to ask me anything!


### Sample Conversation
**Chatbot:** Hello, I am Catabot! Feel free to ask me anything!  
**User:** Hello  
**Chatbot:** Hi!    
**User:** How are you?  
**Chatbot:** I am doing well.  
**User:** What can we talk about?  
**Chatbot:** Anything at all! How was your day?  
**User:** It was fantastic.  
**Chatbot:** Excellent to hear!  
**User:** Do you like cats?  
**Chatbot:** Yes, I love cats!  
**User:** See you soon. Goodbye  
**Chatbot:** Talk to you later  