## Import and Load data file

In [2]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD
import random

ModuleNotFoundError: No module named 'tensorflow'

In [1]:
import nltk
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
import json
import pickle


data_file = open("../data/intents.json").read()
intents = json.loads(data_file)

ModuleNotFoundError: No module named 'tensorflow'

## Pre-process data

Here we iterate through the patterns and tokenize the sentence using nltk.word_tokenize() function and append each word in the words list. We also create a list of classes for our tags.

In [14]:
for intent in intents["intents"]:
    for pattern in intent["patterns"]:

        # tokenize each word
        w = nltk.word_tokenize(pattern)
        words.extend(w)

        # add documents in the corpus
        documents.append((w, intent["tag"]))

        # add to our classes list
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

lemmatize each word and remove duplicate words from the list

In [15]:
# lemmatize, lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))
# sort classes
classes = sorted(list(set(classes)))
# documents = combination between patterns and intents
print(len(documents), "documents")
# classes = intents
print(len(classes), "classes", classes)
# words = all words, vocabulary
print(len(words), "unique lemmatized words", words)

pickle.dump(words, open("../models/words.pkl", "wb"))
pickle.dump(classes, open("../models/classes.pkl", "wb"))

125 documents
43 classes ['age', 'art', 'book_recommendation', 'books', 'compliment', 'current_events', 'dance', 'dreams', 'education', 'environment', 'fashion', 'favorite_color', 'food', 'fun_fact', 'goodbye', 'greeting', 'happiness', 'health', 'help', 'history', 'hobby', 'insult', 'joke', 'math', 'meaning_of_life', 'movie_recommendation', 'movies', 'music', 'music_recommendation', 'name', 'options', 'pets', 'philosophy', 'politics', 'random_fact', 'science', 'sing', 'sports', 'technology', 'thanks', 'time', 'travel', 'weather']
144 unique lemmatized words ["'re", "'s", '+', '2', 'a', 'about', 'age', 'any', 'appreciate', 'are', 'art', 'assist', 'assistance', 'athlete', 'awesome', 'background', 'belief', 'book', 'bye', 'can', 'capability', 'color', 'concept', 'created', 'current', 'dance', 'delicious', 'destination', 'did', 'do', 'dream', 'eat', 'education', 'educational', 'emotion', 'environment', 'environmental', 'event', 'existence', 'experience', 'fact', 'famous', 'fashion', 'favor

## Create training and testing data

In [16]:
# Initialize lists to hold words, classes, and document tuples
words = []
classes = []
documents = []

# Process each intent in the intents dictionary
for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        # Tokenize each word in the pattern
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        # Add documents in the corpus
        documents.append((w, intent["tag"]))
        # Add to our classes list
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

# Lemmatize and lower each word and remove duplicates
words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in "?"]
words = sorted(list(set(words)))

# Sort classes
classes = sorted(list(set(classes)))

# Prepare training data
training = []
# Create an empty array for the output
output_empty = [0] * len(classes)

# Training set, bag of words for each sentence
for doc in documents:
    # Initialize our bag of words
    bag = []
    # List of tokenized words for the pattern
    pattern_words = doc[0]
    # Lemmatize each word - create base word, in an attempt to represent related words
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]
    # Create our bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # Output is '0' for each tag and '1' for the current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

# Shuffle our features and turn into np.array
random.shuffle(training)
training = np.array(training, dtype=object)

# Create train and test lists
train_x = list(training[:, 0])
train_y = list(training[:, 1])

# Optionally convert lists to NumPy arrays
train_x = np.array(train_x)
train_y = np.array(train_y)

print("Training data created")

Training data created


## Create Model

In [17]:
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation="softmax"))

# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])

# fitting and saving the model
hist = model.fit(
    np.array(train_x), np.array(train_y), epochs=200, batch_size=5, verbose=1
)
model.save("../models/chatbot_model.h5", hist)

print("model created")

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [18]:
from keras.models import load_model

model = load_model("../models/chatbot_model.h5")
import json
import random

intents = json.loads(open("../data/intents.json").read())
words = pickle.load(open("../models/words.pkl", "rb"))
classes = pickle.load(open("../models/classes.pkl", "rb"))

In [19]:
def clean_up_sentence(sentence):
    # tokenize the pattern - split words into array
    sentence_words = nltk.word_tokenize(sentence)
    # stem each word - create short form for word
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words


# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence

In [20]:
def bow(sentence, words, show_details=True):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)
    # bag of words - matrix of N words, vocabulary matrix
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print("found in bag: %s" % w)
    return np.array(bag)

In [21]:
def predict_class(sentence, model):
    # filter out predictions below a threshold
    p = bow(sentence, words, show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list

In [22]:
def getResponse(ints, intents_json):
    tag = ints[0]["intent"]
    list_of_intents = intents_json["intents"]
    for i in list_of_intents:
        if i["tag"] == tag:
            result = random.choice(i["responses"])
            break
    return result

In [23]:
def chatbot_response(text):
    ints = predict_class(text, model)
    res = getResponse(ints, intents)
    return res

In [24]:
import streamlit as st

# Function to send message


def send_message(input_message):
    msg = input_message.strip()
    if msg != "":
        st.write("You: " + msg)
        res = chatbot_response(msg)
        st.write("Bot: " + res)


# Define the layout of the Streamlit app


def main():
    st.title("Chat with Bot")
    input_message = st.text_input("Enter message:", "")
    if st.button("Send"):
        send_message(input_message)


# Run the Streamlit app
if __name__ == "__main__":
    main()

2024-03-04 14:11:58.789 
  command:

    streamlit run C:\Users\vinot\AppData\Roaming\Python\Python311\site-packages\ipykernel_launcher.py [ARGUMENTS]
