In [None]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
import numpy
import tflearn
import tensorflow as tf
import random
import json

In [None]:
#open the json and store it
with open("intents.json") as intents:
    intent_data = json.load(intents)
# intent_data

In [None]:
#will hold all words
all_words = []

#will hold all possible intent tags
all_labels = []

#will hold all pattern data, each pattern in a list of itself
all_patterns = []

#will hold the type of intent of the corresponding word in all_patterns
all_responses = []

In [None]:
stemmer = LancasterStemmer()

In [None]:
#loop through the intent dictionary
for intent in intent_data['intents']:
    #loop through each pattern in each patterns list
    for pattern in intent['patterns']:
        #tokenize basically separates each sentence into individual words
        words = nltk.word_tokenize(pattern)
        #add all the words into all words
        all_words.extend(words)
        all_patterns.append(words)
        all_responses.append(intent['tag'])
        
    if intent['tag'] not in all_labels:
        all_labels.append(intent['tag'])

In [None]:
print(all_words[:5])
print(all_labels)
print(all_patterns[:5])
print(all_responses[:5])

In [None]:
#removes the ends of the words. Basically reducing the words to their root type
#for example if there was a word that is "Whats" it removes the "s" returning only "what"
#the reason for doing this is to get the plain meaning of the word whilst ignoring any unnecessary
#additions that might confuse the model, making it to be able to generalize more
all_words = [stemmer.stem(word.lower()) for word in all_words if word != "?"]
print(all_words)
print(len(all_words))

In [None]:
#remove any duplicate words, and sort for easiness
all_words = sorted(list(set(all_words)))
len(all_words)

In [None]:
all_labels = sorted(all_labels)
print(len(all_labels))

In [None]:
#A neural network cannot interpret these Strings
#However, they can be one-hot encoded to numbers
#One hot encoding - Bag of Words (if word is there - "hot", represented with a 1)
#the mapping is [the, she, he, him, they, was, a, guy, person]
#one-hot representation -> [0, 0, 1, 0, 0, 1, 1, 0, 1], for the sentence "he was a person"
#We'll use this representation for each sentence, using {all_words} as its list to mapped against

#training and output list
training = []
output = []

#creating a list of all 0's  to use as a starting point
out_empty = [0 for _ in range(len(all_labels))]
# print(out_empty) -> [0,0,0,0,0,0]

for index, pattern in enumerate(all_patterns):
    #the bag of words
    bag = []
    words = [stemmer.stem(word.lower()) for word in pattern]
    
    for word in all_words:
        #loop and check whether each word consists
        if word in words:
            #if it does append 1
            bag.append(1)
        else:
            bag.append(0)
    
    #create a copy of out_empty
    output_row = out_empty[:]
    
    #set the position of the tag equal to 1
    output_row[all_labels.index(all_responses[index])] = 1
#     print(output_row)
    training.append(bag)
    output.append(output_row)
    
#convert to arrays, for tflearn to accept
training = numpy.array(training)
output = numpy.array(output)
# print()
# print(training)

#the data is now ready to be used to train

In [None]:
#input data size - bag of words length
net = tflearn.input_data(shape=[None, len(training[0])])
#hidden layers
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
#output layer, size equal to number of possibilities
net = tflearn.fully_connected(net, len(output[0]), activation='softmax')
net = tflearn.regression(net)
#Regular deep neural network
model = tflearn.DNN(net)

print(training.shape)
print(output.shape)

In [None]:
#Reset any previous settings
# tf.compat.v1.reset_default_graph()

In [None]:
# try:
#     model.load("chatbot.tflearn")
# except:
model.fit(X_inputs=training, Y_targets=output, n_epoch=1000, batch_size=8, show_metric=True)
model.save("chatbot.tflearn")

In [None]:
#convert the input text into bag of words
def bag_of_words(text, all_words):
    bag = [0 for _ in range(len(all_words))]
                            
    text_words = nltk.word_tokenize(text)
    text_words = [stemmer.stem(word.lower()) for word in text_words]
    
    for se in text_words:
        for index, word in enumerate(all_words):
            if word==se:
                bag[index] = 1
    
    return numpy.array(bag)

In [None]:
def chat():
    #Reset context on start, due to there being no context
    context = None
    #default responses if no proper valid match
    default_responses = [
    "Sorry, can't understand you, I am not perfect :'(", "Please give me more info :(", "Not sure I understand :(",
    "Please be more specific"
    ]
    
    print("Start talking! (quit to stop)")

    while True:
        user_input = str(input("You: ")).lower()
        if user_input == 'quit':
            break
    
        #will hold a list of probabilities (softmax)
        results = model.predict([bag_of_words(user_input, all_words)])
        #will return index of highest probability
        result_index = numpy.argmax(results)
        #Corresponding tag of prediction
        result_tag = all_labels[results_index]
        
        #only if the model is quite confident do this
        if results[result_index] > 0.8:
            for intent in intent_data['intents']:
                #predicted intent
                if intent['tag'] == result_tag:
                    if result_tag == 'goodbye' or result_tag == 'thanks':
                        break
                        
                    #does it have some context
                    if ('context_filter' not in intent) or ('context_filter' in intent and intent['context_filter'] == context):
                        #responses of corresponding intent
                        responses = intent['responses']
                        
                        if 'context' in intent:
                            context = intent['context']
                        else:
                            context = None

                        #choose some random response
                        print("CHANCO: " + random.choice(responses))
                    else:
                        print("CHANCO: " + random.choice(default_responses))
        
        #if not so confident - print a default text
        else :
            print("CHANCO: " + random.choice(default_responses))
        print()

In [None]:
chat()