In [18]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
import numpy
import tflearn
import tensorflow as tf
import random
import json

In [26]:
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer

In [27]:
#open the json and store it
with open("intents.json") as intents:
    intent_data = json.load(intents)
# intent_data

In [28]:
#will hold all words
all_words = []

#will hold all possible intent tags
all_labels = []

#will hold all pattern data, each pattern in a list of itself
all_patterns = []

#will hold the type of intent of the corresponding word in all_patterns
all_responses = []

In [30]:
stemmer = SnowballStemmer("english")

In [31]:
#loop through the intent dictionary
for intent in intent_data['intents']:
    #loop through each pattern in each patterns list
    for pattern in intent['patterns']:
        #tokenize basically separates each sentence into individual words
        words = nltk.word_tokenize(pattern)
        #add all the words into all words
        all_words.extend(words)
        all_patterns.append(words)
        all_responses.append(intent['tag'])
        
    if intent['tag'] not in all_labels:
        all_labels.append(intent['tag'])

In [32]:
print(all_words[:5])
print(all_labels)
print(all_patterns[:5])
print(all_responses[:5])

['Hi', 'How', 'are', 'you', 'hey']
['greeting', 'goodbye', 'name', 'name_yes', 'name_no', 'thanks', 'first', 'lc_initial', 'lc_main', 'bc_initial', 'bc_main', 'sc_initial', 'sc_main']
[['Hi'], ['How', 'are', 'you'], ['hey'], ['yo'], ['Is', 'anyone', 'there', '?']]
['greeting', 'greeting', 'greeting', 'greeting', 'greeting']


In [33]:
#removes the ends of the words. Basically reducing the words to their root type
#for example if there was a word that is "Whats" it removes the "s" returning only "what"
#the reason for doing this is to get the plain meaning of the word whilst ignoring any unnecessary
#additions that might confuse the model, making it to be able to generalize more
all_words = [stemmer.stem(word.lower()) for word in all_words if word != "?"]
print(all_words)
print(len(all_words))

['hi', 'how', 'are', 'you', 'hey', 'yo', 'is', 'anyon', 'there', 'hello', 'good', 'day', 'what', 'up', 'cya', 'see', 'you', 'later', 'goodby', 'i', 'am', 'leav', 'have', 'a', 'good', 'day', 'what', 'is', 'your', 'name', 'what', 'should', 'i', 'call', 'you', 'what', 'your', 'name', 'yes', 'haha', 'yes', 'yep', 'nice', 'inde', 'yup', 'nah', 'no', 'nope', 'not', 'realli', 'ew', 'cring', 'thank', 'thank', 'you', 'that', "'s", 'help', 'awesom', ',', 'thank', 'thank', 'for', 'help', 'me', 'i', 'am', 'not', 'feel', 'well', 'i', 'feel', 'sick', 'someth', 'is', 'wrong', 'with', 'me', 'i', 'do', 'not', 'think', 'i', 'am', 'of', 'perfect', 'health', 'cough', 'chest', 'pain', 'chest', 'ach', 'i', 'feel', 'unusu', 'tire', 'i', 'feel', 'too', 'tire', 'i', 'feel', 'exhaust', 'i', 'get', 'tire', 'too', 'easili', 'no', 'energi', 'lack', 'in', 'energi', 'i', 'dont', 'want', 'to', 'eat', 'i', 'dont', 'have', 'an', 'appetit', 'i', 'dont', 'want', 'to', 'eat', 'yes', 'yea', 'cough', 'blood', 'i', 'cough', 

In [34]:
#remove any duplicate words, and sort for easiness
all_words = sorted(list(set(all_words)))
len(all_words)

101

In [35]:
all_labels = sorted(all_labels)
print(len(all_labels))

13


In [36]:
#A neural network cannot interpret these Strings
#However, they can be one-hot encoded to numbers
#One hot encoding - Bag of Words (if word is there - "hot", represented with a 1)
#the mapping is [the, she, he, him, they, was, a, guy, person]
#one-hot representation -> [0, 0, 1, 0, 0, 1, 1, 0, 1], for the sentence "he was a person"
#We'll use this representation for each sentence, using {all_words} as its list to mapped against

#training and output list
training = []
output = []

#creating a list of all 0's  to use as a starting point
out_empty = [0 for _ in range(len(all_labels))]
# print(out_empty) -> [0,0,0,0,0,0]

for index, pattern in enumerate(all_patterns):
    #the bag of words
    bag = []
    words = [stemmer.stem(word.lower()) for word in pattern]
    
    for word in all_words:
        #loop and check whether each word consists
        if word in words:
            #if it does append 1
            bag.append(1)
        else:
            bag.append(0)
    
    #create a copy of out_empty
    output_row = out_empty[:]
    
    #set the position of the tag equal to 1
    output_row[all_labels.index(all_responses[index])] = 1
#     print(output_row)
    training.append(bag)
    output.append(output_row)
    
#convert to arrays, for tflearn to accept
training = numpy.array(training)
output = numpy.array(output)
# print()
# print(training)

#the data is now ready to be used to train

In [37]:
#input data size - bag of words length
net = tflearn.input_data(shape=[None, len(training[0])])
#hidden layers
net = tflearn.fully_connected(net, 8)
net = tflearn.fully_connected(net, 8)
#output layer, size equal to number of possibilities
net = tflearn.fully_connected(net, len(output[0]), activation='softmax')
net = tflearn.regression(net)
#Regular deep neural network
model = tflearn.DNN(net)

print(training.shape)
print(output.shape)

(71, 101)
(71, 13)


In [38]:
#Reset any previous settings
# tf.compat.v1.reset_default_graph()

In [39]:
# try:
#     model.load("chatbot.tflearn")
# except:
model.fit(X_inputs=training, Y_targets=output, n_epoch=1000, batch_size=8, show_metric=True)
model.save("chatbot.tflearn")

IndexError: list index out of range

In [14]:
#convert the input text into bag of words
def bag_of_words(text, all_words):
    bag = [0 for _ in range(len(all_words))]
                            
    text_words = nltk.word_tokenize(text)
    text_words = [stemmer.stem(word.lower()) for word in text_words]
    
    for se in text_words:
        for index, word in enumerate(all_words):
            if word==se:
                bag[index] = 1
    
    return numpy.array(bag)

In [15]:
def chat():
    #Reset context on start, due to there being no context
    context = None
    #default responses if no proper valid match
    default_responses = [
    "Sorry, can't understand you, I am not perfect :'(", "Please give me more info :(", "Not sure I understand :(",
    "Please be more specific"
    ]
    
    print("Start talking! (quit to stop)")

    while True:
        user_input = str(input("You: ")).lower()
        if user_input == 'quit':
            break
    
        #will hold a list of probabilities (softmax)
        results = model.predict([bag_of_words(user_input, all_words)])[0]
        #will return index of highest probability
        result_index = numpy.argmax(results)
        #Corresponding tag of prediction
        result_tag = all_labels[result_index]
        
        #only if the model is quite confident do this
        if results[result_index] > 0.8:
            for intent in intent_data['intents']:
                #predicted intent
                if intent['tag'] == result_tag:
                    if result_tag == 'goodbye' or result_tag == 'thanks':
                        break
                        
                    #this if condition checks to see whether the context_filter intent has the same value as the context of the
                    #intent it is referring to
                    if ('context_filter' not in intent) or ('context_filter' in intent and intent['context_filter'] == context):
                        #responses of corresponding intent
                        responses = intent['responses']
                        
                        #does the current intent have context
                        if 'context' in intent:
                            context = intent['context']
                        else:
                            context = None

                        #choose some random response
                        print("CHANCO: " + random.choice(responses))
                    else:
                        print("CHANCO: " + random.choice(default_responses))
        
        #if not so confident - print a default text
        else :
            print("CHANCO: " + random.choice(default_responses))
        print()

In [16]:
chat()

Start talking! (quit to stop)
You: hi
CHANCO: Hello!

You: no
CHANCO: Please give me more info :(

You: yes
CHANCO: Please give me more info :(

You: yo
CHANCO: Hello there!, how are you feeling today?

You: sick
CHANCO: Please give me more info :(

You: i feel sick
CHANCO: not to worry, you have me! lets get started, what do have?

You: cough
CHANCO: I am sorry to hear that... These symptoms however can be less serious than you think! :) So please let me know further symptoms that you might have (Coughing up blood, a cough that does not stop, breathlessness)

You: yes
CHANCO: Please give me more info :(

You: cough blood
CHANCO: I suspect a chance of you having lung cancer... But there's always a chance of positivity! I suggest that you please get an x-ray uploaded on our detection section so that we can confirm

You: dd
CHANCO: Please be more specific

You: breast swell
CHANCO: I am sorry to hear that... But fret not! Please do not be alarmed, because this can be not as serious as yo

KeyboardInterrupt: Interrupted by user