In [2]:
import nltk
from nltk.stem.lancaster import LancasterStemmer
import numpy
# import tflearn
# import tensorflow as tf
from tensorflow import keras
import random
import json
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.stem.snowball import SnowballStemmer

In [3]:
#open the json and store it
with open("intents.json") as intents:
    intent_data = json.load(intents)
# intent_data

In [4]:
#will hold all words
all_words = []

#will hold all possible intent tags
all_labels = []

#will hold all pattern data, each pattern in a list of itself
all_patterns = []

#will hold the type of intent of the corresponding word in all_patterns
all_responses = []

In [5]:
stemmer = SnowballStemmer("english")
lemmatizer = WordNetLemmatizer() 

In [6]:
#loop through the intent dictionary
for intent in intent_data['intents']:
    #loop through each pattern in each patterns list
    for pattern in intent['patterns']:
        #tokenize basically separates each sentence into individual words
        words = nltk.word_tokenize(pattern)
        #add all the words into all words
        all_words.extend(words)
        all_patterns.append(words)
        all_responses.append(intent['tag'])
        
    if intent['tag'] not in all_labels:
        all_labels.append(intent['tag'])

In [7]:
print(all_words[:5])
print(all_labels)
print(all_patterns[:5])
print(all_responses[:5])

['Hi', 'How', 'are', 'you', 'hey']
['greeting', 'goodbye', 'name', 'name_yes', 'name_no', 'thanks', 'first', 'lc_initial', 'lc_main', 'bc_initial', 'bc_main', 'sc_main']
[['Hi'], ['How', 'are', 'you'], ['hey'], ['yo'], ['Is', 'anyone', 'there']]
['greeting', 'greeting', 'greeting', 'greeting', 'greeting']


In [8]:
#removes the ends of the words. Basically reducing the words to their root type
#for example if there was a word that is "Whats" it removes the "s" returning only "what"
#the reason for doing this is to get the plain meaning of the word whilst ignoring any unnecessary
#additions that might confuse the model, making it to be able to generalize more
all_words = [lemmatizer.lemmatize(word.lower()) for word in all_words]
all_words = [stemmer.stem(word.lower()) for word in all_words if word]
print(all_words)
print(len(all_words))

['hi', 'how', 'are', 'you', 'hey', 'yo', 'is', 'anyon', 'there', 'hello', 'good', 'day', 'what', 'up', 'sup', 'what', 'are', 'you', 'who', 'are', 'you', 'cya', 'see', 'you', 'later', 'goodby', 'im', 'leav', 'have', 'a', 'good', 'day', 'bye', 'what', 'is', 'your', 'name', 'what', 'should', 'i', 'call', 'you', 'what', 'your', 'name', 'name', 'your', 'name', 'yes', 'haha', 'yes', 'yep', 'nice', 'inde', 'yup', 'yea', 'nah', 'no', 'nope', 'not', 'realli', 'ew', 'cring', 'thank', 'thank', 'you', 'that', 'is', 'help', 'awesom', 'thank', 'thank', 'for', 'help', 'me', 'sick', 'i', 'do', 'not', 'feel', 'good', 'not', 'well', 'i', 'am', 'not', 'feel', 'well', 'i', 'feel', 'sick', 'someth', 'is', 'wrong', 'with', 'me', 'i', 'do', 'not', 'think', 'i', 'am', 'of', 'perfect', 'health', 'cough', 'i', 'have', 'a', 'cough', 'i', 'have', 'a', 'chest', 'pain', 'chest', 'pain', 'i', 'have', 'a', 'chest', 'ach', 'chest', 'ach', 'i', 'feel', 'unusu', 'tire', 'i', 'feel', 'too', 'tire', 'i', 'feel', 'exhaust'

In [9]:
#remove any duplicate words, and sort for easiness
all_words = sorted(list(set(all_words)))
len(all_words)

126

In [10]:
all_labels = sorted(all_labels)
print(len(all_labels))

12


In [11]:
#A neural network cannot interpret these Strings
#However, they can be one-hot encoded to numbers
#One hot encoding - Bag of Words (if word is there - "hot", represented with a 1)
#the mapping is [the, she, he, him, they, was, a, guy, person]
#one-hot representation -> [0, 0, 1, 0, 0, 1, 1, 0, 1], for the sentence "he was a person"
#We'll use this representation for each sentence, using {all_words} as its list to mapped against

#training and output list
training = []
output = []

#creating a list of all 0's  to use as a starting point
out_empty = [0 for _ in range(len(all_labels))]
# out_empty -> [0,0,0,0,0,0]

for index, pattern in enumerate(all_patterns):
    #the bag of words
    bag = []
    words = [lemmatizer.lemmatize(word.lower()) for word in pattern]
    words = [stemmer.stem(word.lower()) for word in words]
    
    for word in all_words:
        #loop and check whether each word consists
        if word in words:
            #if it does append 1
            bag.append(1)
        else:
            bag.append(0)
    
    #create a copy of out_empty
    output_row = out_empty[:]
    
    #set the position of the tag equal to 1
    output_row[all_labels.index(all_responses[index])] = 1
    training.append(bag)
    output.append(output_row)
    
#convert to arrays, for tflearn to accept
training = numpy.array(training)
output = numpy.array(output)
#the data is now ready to be used to train

In [12]:
# #input data size - bag of words length
# net = tflearn.input_data(shape=[None, len(training[0])])
# #hidden layers
# net = tflearn.fully_connected(net, 8)
# net = tflearn.fully_connected(net, 8)
# #output layer, size equal to number of possibilities
# net = tflearn.fully_connected(net, len(output[0]), activation='softmax')
# net = tflearn.regression(net)
# #Regular deep neural network
# model = tflearn.DNN(net)

# print(training.shape)
# print(output.shape)

In [13]:
model = keras.models.Sequential()
# model.add(keras.layers.Input(shape=[len(training[0])]))
# model.add(keras.layers.Dense(32))
# model.add(keras.layers.Dense(32))
# model.add(keras.layers.Dense(32))
# model.add(keras.layers.Dense(len(output[0]), activation='softmax'))

In [15]:
# try:
model = keras.models.load_model('chatbot.h5')
# except:
#     model.compile(optimizer=keras.optimizers.Adam(lr=1e-5), metrics=['accuracy'], loss='categorical_crossentropy')
#     model.fit(training, output, epochs=1000, batch_size=32)
#     model.save("chatbot.h5")

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [16]:
# model.save("chatbot.h5")

In [17]:
#convert the input text into bag of words
def bag_of_words(text, all_words):
    bag = [0 for _ in range(len(all_words))]
                            
    text_words = nltk.word_tokenize(text)
    text_words = [lemmatizer.lemmatize(word.lower()) for word in text_words]
    text_words = [stemmer.stem(word.lower()) for word in text_words]
    
    for wrd in text_words:
        for index, word in enumerate(all_words):
            if word == wrd:
                bag[index] = 1
    
    return numpy.array(bag)

In [18]:
def chat(username):
    print(f'Hi {username}, how can I help you today?')
    
    #Reset context on start, due to there being no context
    context = None
    #default responses if no proper valid match
    default_responses = [
    "Sorry, can't understand you, I am not perfect :'(", "Please give me more info :(", "Not sure I understand :(",
    "Please be more specific", "Please provide me more information"
    ]
    
    while True:
        user_input = str(input("You: ")).lower()
        if user_input == 'quit':
            break
    
        #will hold a list of probabilities (softmax)
        bag = bag_of_words(user_input, all_words)
        bag = bag.reshape(1, -1)
        results = model.predict([bag])[0]
        print(results)
        #will return index of highest probability
        result_index = numpy.argmax(results)
        print(result_index)
        #Corresponding tag of prediction
        result_tag = all_labels[result_index]
           
#         print("Prediction Probability", results[result_index])
            
        #only if the model is quite confident do this
        if results[result_index] > 0.8:
            #break loop if end
            if result_tag == 'goodbye' or result_tag == 'thanks':
                responses = intent_data['intents'][1]['responses'] if result_tag == 'goodbye' else intent_data['intents'][5]['responses']
                print("CHANCO: " + random.choice(responses))
                break
            
            for intent in intent_data['intents']:
                #predicted intent
                if intent['tag'] == result_tag:
                    print(result_tag)
#                     print('context_filter' in intent and intent['context_filter'] == context)

                    #this if condition checks to see whether the context_filter intent has the same value as the context of the
                    #intent it is referring to, if so it means that the context_filter intent will be given authority to provide a response
                    if 'context_filter' not in intent or 'context_filter' in intent and intent['context_filter'] == context:
                        #responses of corresponding intent
                        responses = intent['responses']
                        
                        #does the current intent have context
                        if 'context' in intent:
                            context = intent['context']
                        else:
                            context = None

                        #choose some random response
                        print("CHANCO: " + random.choice(responses))

                    #if user enters smth directly about the main cancer symptoms
                    elif intent.get('direct_access'):
                        responses = intent['responses']
                        print("CHANCO: " + random.choice(responses))
                        
                    else:
                        print("CHANCO: " + random.choice(default_responses))
        
        #if not so confident - print a default text
        else :
            print("CHANCO: " + random.choice(default_responses))
        print()

In [19]:
chat("bro")

Hi bro, how can I help you today?
You: hi




[0.01325454 0.00677617 0.05350413 0.02315326 0.7322471  0.0076578
 0.02001894 0.03273576 0.03589428 0.05453176 0.01746304 0.00276315]
4
CHANCO: Sorry, can't understand you, I am not perfect :'(

You: hello
[0.0116023  0.00215118 0.06231675 0.01895966 0.8084692  0.00157818
 0.01345231 0.03157056 0.0119796  0.02951449 0.0055725  0.00283332]
4
greeting
CHANCO: Hi there, how can I help?

You: i dont feel good
[8.0889557e-05 3.5163537e-06 4.6951124e-01 5.4427117e-02 9.1760969e-03
 4.4571206e-01 9.5159542e-03 7.4833916e-03 2.5438846e-03 6.1919447e-04
 5.2720941e-05 8.7399641e-04]
2
CHANCO: Please provide me more information

You: i do not feel good
[1.4564373e-06 4.1856003e-09 9.8564512e-01 2.0980432e-03 7.0207552e-03
 1.6844743e-03 3.2640406e-04 3.0828740e-03 2.6212660e-05 9.8800621e-05
 7.0147408e-08 1.5735121e-05]
2
first
CHANCO: Not to worry! I am very dependable when it comes to your health! Tell me, what do you have



KeyboardInterrupt: Interrupted by user