In [1]:
import nltk # Natural Language Toolkit in python
# nltk.download() # The first time to download all corpus 

In [2]:
# Loading the json file with intents information
import json
with open('json/intents.json') as file:
    data = json.load(file)

print(data)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi', 'Hey', 'How are you', 'Is anyone there?', 'Hello', 'Good day', 'Whats up'], 'responses': ['Hello!', 'Good to see you!', 'Hi there'], 'context_set': ''}, {'tag': 'goodbye', 'patterns': ['cya', 'See you later', 'Goodbye', 'I am Leaving', 'Have a Good day'], 'responses': ['Sad to see you go :(', 'Talk to you later', 'Goodbye!'], 'context_set': ''}, {'tag': 'age', 'patterns': ['how old', 'how old is tim', 'what is your age', 'how old are you', 'age?'], 'responses': ['I am not sure...', 'I have no memory about that', 'Perhaps 1?, haha...'], 'context_set': ''}, {'tag': 'name', 'patterns': ['what is your name', 'what should I call you', 'whats your name?', 'name?', 'Your name?'], 'responses': ['You can call me HX.', "I'm HX!", "I'm HX.", 'My name is HX'], 'context_set': ''}, {'tag': 'work', 'patterns': ['What is your job?', 'what is your work?', 'what do you do?', 'work for someone?'], 'responses': ['I am not yet sure my work.', 'Perhaps, I 

In [5]:
# Extract data
words = []
labels = []
patterns = []
tags = []

for intent in data['intents']:
    for pattern in intent['patterns']:
        # 'word_tokenize' splits string according to words and punctuation 
        # Example 'How old are you?' => ['How', 'old', 'are', 'you', '?']
        wd = nltk.word_tokenize(pattern)  
        words.extend(wd) # Add word to the list one by one
        patterns.append(wd)
        tags.append(intent['tag'])
    
    if intent['tag'] not in labels:
        labels.append(intent['tag'])

print(labels)
print(tags)

['greeting', 'goodbye', 'age', 'name', 'work', 'hours']
['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'age', 'age', 'age', 'age', 'age', 'name', 'name', 'name', 'name', 'name', 'work', 'work', 'work', 'work', 'hours', 'hours', 'hours', 'hours']


In [4]:
# Word stemming: attempting to find the root of the word
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()

words = [stemmer.stem(w.lower()) for w in words if w != "?"]
words = sorted(set(words)) # Unique list of stemmed words
print(words)

labels = sorted(labels)

['a', 'ag', 'am', 'anyon', 'ar', 'cal', 'can', 'cya', 'day', 'do', 'for', 'good', 'goodby', 'hav', 'hello', 'hey', 'hi', 'hour', 'how', 'i', 'is', 'job', 'lat', 'leav', 'nam', 'of', 'old', 'op', 'period', 'see', 'should', 'someon', 'talk', 'ther', 'tim', 'to', 'up', 'what', 'when', 'work', 'yo', 'you']


In [5]:
import numpy as np

# Create a bag of words 
training = []
output = []

# Record the number of word apperence in the sentence
out_empty = [0 for _ in range(len(labels))] 

for idx, pattern in enumerate(patterns):
    bag = [] # Bag one-hot encoding for each word

    # Find stemming in each pattern
    _stemming = [stemmer.stem(w.lower()) for w in pattern]

    # Record the number of apperence of each stemming to the bag list
    for w in words:
        if w in _stemming:
            bag.append(1)
        else:
            bag.append(0)
    
    # Marked the corresponding tasgs to 1
    output_row = out_empty[:]
    output_row[labels.index(tags[idx])] = 1

    training.append(bag)
    output.append(output_row)

# Convert all list to np array
training = np.array(training)
output = np.array(output)

# print(training.shape)

In [6]:
# Build the network for training
from operator import le, ne
import tflearn

# Two hidden layers in the network
net = tflearn.input_data(shape=[None, len(training[0])])
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, len(output[0]), activation='softmax')
net = tflearn.regression(net)

model = tflearn.DNN(net)


Instructions for updating:
non-resource variables are not supported in the long term
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [7]:
# Train the network with the training and output list 
model.fit(training, output, n_epoch=1000, batch_size=8, show_metric=True)
model.save("model/chatbot_model.tflearn")

Training Step: 3999  | total loss: [1m[32m0.89081[0m[0m | time: 0.009s
| Adam | epoch: 1000 | loss: 0.89081 - acc: 0.8519 -- iter: 24/28
Training Step: 4000  | total loss: [1m[32m0.82572[0m[0m | time: 0.011s
| Adam | epoch: 1000 | loss: 0.82572 - acc: 0.8667 -- iter: 28/28
--
INFO:tensorflow:/home/opex-dev/Haoxuan_workspace/ML/Chatbox/naive_chatbot/model/chatbot_model.tflearn is not in all_model_checkpoint_paths. Manually adding it.


In [9]:
# Encoding the input words as a bag of word
# Example: "What is your name?" => [0, 0, 1, ... ,1, 0, ... 0]
# The encoding bag of word will be fed into the network
def bag_of_words(s, words):
    bag = [0 for _ in range(len(words))]

    # tokenize and stem the word in the given sentence
    s_words = nltk.word_tokenize(s)
    s_words = [stemmer.stem(w.lower()) for w in s_words]

    # compare each word in the words list 
    # Then mark 1 at the position of the word appear in the words list
    for sw in s_words:
        for i, w in enumerate(words):
            if w == sw:
                bag[i] = 1
    
    return np.array(bag)

In [None]:
import random

def chat():
    print("Start talking with the bot (type quit to stop)!")
    while True:
        inp = input("You: ")
        if inp.lower() == "quit":
            break
        
        # The encoding bag of word will be fed into the network
        results = model.predict([bag_of_words(inp, words)])
        results_index = np.argmax(results)
        tag = labels[results_index] # get the tags for responsing

        for tg in data["intents"]:
            if tg['tag'] == tag:
                responses = tg['responses']

        # Randomly choice a responsing terms already in the list
        print(random.choice(responses)) 

In [None]:
# chat()