In [64]:
import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()
import json
import pickle

import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
import random

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [65]:
#This line initializes an empty list named words.
#It will be used to store unique words from the dataset.
words=[]

# this line initializes an empty list named classes.
# It will be used to store unique classes or categories of intents from the dataset.
categories = []

#This line initializes an empty list named documents.
#It will be used to store tuples where each tuple consists of a list of words (representing a pattern or sentence) and its corresponding class
documents = []

#This line initializes a list named ignore_words containing characters that should be ignored or treated separately.
ignore_words = ['?', '!', '@', '#', '`', '$', '%', '^', '&', '*', '(', ')', '_', '+']

In [66]:
data_file = open('/content/Chabotdataset.json').read()
intents = json.loads(data_file)
print(intents)

{'intents': [{'tag': 'greeting', 'patterns': ['Hi there', 'How are you', 'Is anyone there?', 'Hey', 'Hola', 'Hello', 'Good day'], 'responses': ['Hello, thanks for asking', 'Good to see you again', 'Hi there, how can I help?'], 'context': ['']}, {'tag': 'goodbye', 'patterns': ['Bye', 'See you later', 'Goodbye', 'Nice chatting to you, bye', 'Till next time'], 'responses': ['See you!', 'Have a nice day', 'Bye! Come back again soon.'], 'context': ['']}, {'tag': 'thanks', 'patterns': ['Thanks', 'Thank you', "That's helpful", 'Awesome, thanks', 'Thanks for helping me'], 'responses': ['Happy to help!', 'Any time!', 'My pleasure'], 'context': ['']}, {'tag': 'noanswer', 'patterns': [], 'responses': ["Sorry, can't understand you", 'Please give me more info', 'Not sure I understand'], 'context': ['']}, {'tag': 'options', 'patterns': ['How you could help me?', 'What you can do?', 'What help you provide?', 'How you can be helpful?', 'What support is offered'], 'responses': ['I am like your virtual 

In [67]:
for intent in intents['intents']:

  for pattern in intent['patterns']:
    #take each pattern and tokenize it
    w = nltk.word_tokenize(pattern)

    #adding w into words list
    words.extend(w)

    #adding documents
    documents.append((w,intent['tag']))

    #adding categories to our category list
    if intent['tag'] not in categories:
      categories.append(intent['tag'])

print(words)
print(documents)
print(categories)

['Hi', 'there', 'How', 'are', 'you', 'Is', 'anyone', 'there', '?', 'Hey', 'Hola', 'Hello', 'Good', 'day', 'Bye', 'See', 'you', 'later', 'Goodbye', 'Nice', 'chatting', 'to', 'you', ',', 'bye', 'Till', 'next', 'time', 'Thanks', 'Thank', 'you', 'That', "'s", 'helpful', 'Awesome', ',', 'thanks', 'Thanks', 'for', 'helping', 'me', 'How', 'you', 'could', 'help', 'me', '?', 'What', 'you', 'can', 'do', '?', 'What', 'help', 'you', 'provide', '?', 'How', 'you', 'can', 'be', 'helpful', '?', 'What', 'support', 'is', 'offered', 'You', 'okay', '?', 'How', 'are', 'you', 'feeling', 'today', '?', 'Are', 'you', 'feeling', 'ill', '?', 'Do', 'you', 'need', 'my', 'help', '?', 'Hope', 'you', 'tell', 'me', 'what', "'s", 'bothering', 'you', '.', 'Is', 'something', 'bothering', 'you', '?', 'I', "'m", 'all', 'ears', 'if', 'you', 'got', 'something', 'to', 'say', 'I', 'like', 'you', 'There', "'s", 'no', 'one', 'like', 'you', 'You', 'are', 'one', 'in', 'a', 'million', 'You', 'are', 'so', 'helpful', '!', 'You', 'are

In [68]:
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
print(words)

['hi', 'there', 'how', 'are', 'you', 'is', 'anyone', 'there', 'hey', 'hola', 'hello', 'good', 'day', 'bye', 'see', 'you', 'later', 'goodbye', 'nice', 'chatting', 'to', 'you', ',', 'bye', 'till', 'next', 'time', 'thanks', 'thank', 'you', 'that', "'s", 'helpful', 'awesome', ',', 'thanks', 'thanks', 'for', 'helping', 'me', 'how', 'you', 'could', 'help', 'me', 'what', 'you', 'can', 'do', 'what', 'help', 'you', 'provide', 'how', 'you', 'can', 'be', 'helpful', 'what', 'support', 'is', 'offered', 'you', 'okay', 'how', 'are', 'you', 'feeling', 'today', 'are', 'you', 'feeling', 'ill', 'do', 'you', 'need', 'my', 'help', 'hope', 'you', 'tell', 'me', 'what', "'s", 'bothering', 'you', '.', 'is', 'something', 'bothering', 'you', 'i', "'m", 'all', 'ear', 'if', 'you', 'got', 'something', 'to', 'say', 'i', 'like', 'you', 'there', "'s", 'no', 'one', 'like', 'you', 'you', 'are', 'one', 'in', 'a', 'million', 'you', 'are', 'so', 'helpful', 'you', 'are', 'amazing', 'please', 'can', 'i', 'talk', 'with', 'you

In [69]:
# list of lemmatized words converted to a set to ensure uniqueness and then back to a sorted list.
# This list now contains all unique lemmatized words.
words = sorted(list(set(words)))
print(words)

["'m", "'s", ',', '.', 'a', 'again', 'aim', 'all', 'amazing', 'and', 'anyone', 'are', 'awesome', 'be', 'bothering', 'boyfriend', 'bye', 'can', 'chatbot', 'chatting', 'could', 'cringe', 'day', 'do', 'ear', 'favourite', 'feel', 'feeling', 'for', 'girlfriend', 'good', 'good-for-nothing', 'goodbye', 'got', 'had', 'happy', 'have', 'hello', 'help', 'helpful', 'helping', 'hey', 'hi', 'hobby', 'hola', 'hope', 'how', 'i', 'if', 'ill', 'in', 'is', 'it', 'later', 'life', 'like', 'lost', 'mad', 'made', 'make', 'me', 'messed', 'million', 'mistake', 'movie', 'my', 'need', 'next', 'nice', 'no', 'offered', 'okay', 'one', 'or', 'please', 'provide', 'rough', 'rude', 'sad', 'say', 'see', 'so', 'someone', 'something', 'song', 'stupid', 'support', 'talk', 'tell', 'thank', 'thanks', 'that', 'there', 'till', 'time', 'to', 'today', 'totally', 'up', 'useless', 'what', 'why', 'with', 'wrong', 'you', 'your']


In [70]:
# Similar to the words, the list of classes is converted to a set for uniqueness and then back to a sorted list.
categories = sorted(list(set(categories)))
print(categories)

['Appreciation', 'Concern', 'Criticism', 'Personal', 'Please', 'goodbye', 'greeting', 'options', 'thanks']


In [71]:
pickle.dump(words,open('words.pkl','wb'))
pickle.dump(categories,open('categories.pkl','wb'))

In [72]:
# initializing training data
training = []
output_empty = [0] * len(categories)
for doc in documents:
    # initializing bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # lemmatize each word - create base word, in attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # create our bag of words array with 1, if word match found in current pattern
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = list(output_empty)
    output_row[categories.index(doc[1])] = 1

    training.append([bag, output_row])
# shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training)
# create train and test lists. X - patterns, Y - intents
train_x = list(training[:,0])
train_y = list(training[:,1])
print("Training data created")

Training data created


  training = np.array(training)


In [73]:
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)

print("model created")

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

  saving_api.save_model(


In [75]:
from keras.models import load_model

model = load_model('/content/chatbot_model.h5')

In [76]:
user_input = "Hi, how are you?"
input_words = nltk.word_tokenize(user_input)
input_words = [lemmatizer.lemmatize(word.lower()) for word in input_words]

# Create a bag-of-words representation
input_bag = [1 if w in input_words else 0 for w in words]

In [77]:
input_bag = np.array([input_bag])  # Convert the input to a NumPy array
predictions = model.predict(input_bag)[0]




In [78]:
predicted_class_index = np.argmax(predictions)
predicted_intent = categories[predicted_class_index]
predicted_intent


'greeting'

In [None]:
test_data = [
    {"input": "Hello", "expected_intent": "greeting"},
    {"input": "Bye", "expected_intent": "goodbye"}
]

In [82]:
import tensorflow as tf


In [84]:
# Assuming you have test data in test_data
test_x = []
test_y = []

for test_case in test_data:
    user_input = test_case["input"]
    expected_intent = test_case["expected_intent"]

    input_words = nltk.word_tokenize(user_input)
    input_words = [lemmatizer.lemmatize(word.lower()) for word in input_words]

    input_bag = [1 if w in input_words else 0 for w in words]

    test_x.append(input_bag)
    test_y.append(categories.index(expected_intent))

# Convert test_y to one-hot encoded format
test_y = tf.keras.utils.to_categorical(test_y, num_classes=len(categories))

test_x = np.array(test_x)
test_y = np.array(test_y)

# Evaluate the model
accuracy = model.evaluate(test_x, test_y, verbose=1)
print(f"Test Accuracy: {accuracy[1] * 100:.2f}%")


Test Accuracy: 100.00%


In [86]:
import nltk
import numpy as np
from keras.models import load_model

nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer

# Load the trained model
model = load_model('/content/chatbot_model.h5')

# Load preprocessed data (words and categories)
import pickle
words = pickle.load(open('/content/words.pkl', 'rb'))
categories = pickle.load(open('/content/categories.pkl', 'rb'))

lemmatizer = WordNetLemmatizer()

def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

def bow(sentence, words, show_details=True):
    sentence_words = clean_up_sentence(sentence)
    bag = [0]*len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print("found in bag: %s" % w)
    return(np.array(bag))

def predict_class(sentence, model):
    p = bow(sentence, words, show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]

    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": categories[r[0]], "probability": str(r[1])})
    return return_list

def get_response(intents_list, intents_json):
    tag = intents_list[0]['intent']
    list_of_intents = intents_json['intents']
    for i in list_of_intents:
        if i['tag'] == tag:
            result = random.choice(i['responses'])
            break
    return result

def chatbot_response(user_input):
    intents = predict_class(user_input, model)
    response = get_response(intents, intents_json)
    return response


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [88]:
import json

# Load your intents JSON file
with open('/content/Chabotdataset.json', 'r') as file:
    intents_json = json.load(file)


In [90]:
while True:
    user_input = input("You: ")

    if user_input.lower() == 'bye':
        print("Chatbot: Goodbye!")
        break

    response = chatbot_response(user_input)
    print("Chatbot:", response)


You: Hi there
Chatbot: Good to see you again
You: what can you do for me
Chatbot: I can answer your questions related to machine learning or any general anser. Come on try me
You: how are you feeling
Chatbot: No thanks
You: you okay
Chatbot: No thanks
You: There's no one like you
Chatbot: Tell me something I don't know
You: I like you
Chatbot: Of course, you do, sweetheart
You: you are amazing
Chatbot: I know
You: Please can I talk with you
Chatbot: Awww..welcome to my life!
You: you are useless
Chatbot: Thank you so much for correcting me
You: do you like me
Chatbot: I don't know...I'm a bot!
You: bye
Chatbot: Goodbye!
