***Import Libraries***

In [15]:
import random
import json
import pickle

import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.optimizers import SGD

import numpy as np


random – To randomly select responses or shuffle data (used in chatbot reply logic).

json – To load and parse structured data from a .json file (e.g., chatbot intents).

pickle – To save and load Python objects like tokenized words, labels, and trained models.

nltk – For natural language processing tasks such as tokenization and lemmatization.

WordNetLemmatizer (from nltk.stem) – To reduce words to their base form for consistency in text processing.

Sequential (from tensorflow.keras.models) – To build neural network models in a linear (stacked) fashion.

Dense (from tensorflow.keras.layers) – To create fully connected layers in the neural network.

Activation (from tensorflow.keras.layers) – To apply activation functions (e.g., ReLU, softmax) to model layers.

Dropout (from tensorflow.keras.layers) – To prevent overfitting by randomly disabling neurons during training.

SGD (from tensorflow.keras.optimizers) – To optimize the neural network weights using Stochastic Gradient Descent.

numpy (np) – For handling numerical data and arrays efficiently, used throughout data processing and model training.


 ***Chatbot Data Preprocessing – Building Vocabulary and Label***

In [16]:
# Download necessary NLTK data packages
nltk.download('punkt_tab')
nltk.download('wordnet')  # Download the 'wordnet' dataset

lemmatizer = WordNetLemmatizer()

intents = json.loads(open("/content/intents.json").read())

words = []
classes = []
documents = []

ignore_letters = ["?", "!", ".", ","]

for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent["tag"]))

        if intent["tag"] not in classes:
            classes.append(intent["tag"])
words = [lemmatizer.lemmatize(word)
        for word in words if word not in ignore_letters]

words = sorted(set(words))
classes = sorted(set(classes))

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Loads and tokenizes patterns from an intents JSON file.

Lemmatizes and cleans the words to build a vocabulary.

Creates a list of unique intent classes (labels).

Prepares the data structure for further feature extraction and training a machine learning model.

 ***Saving Preprocessed Data for Future Use with Pickle***


In [17]:
pickle.dump(words, open('/content/words.pkl', 'wb'))
pickle.dump(classes, open('/content/classes.pkl', 'wb'))


This code uses pickle to serialize and save the cleaned vocabulary (words) and unique intent tags (classes) to files. This ensures consistency and avoids repeating preprocessing steps every time the chatbot runs.

***Converting Text Data into Training Data for Chatbot Model***

In [18]:
dataset = []
template = [0]*len(classes)

for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(
        word.lower()) for word in word_patterns]

    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)

    output_row = list(template)
    output_row[classes.index(document[1])] = 1
    dataset.append([bag, output_row])

random.shuffle(dataset)
# Convert dataset to a list of lists with consistent shape before converting to a NumPy array
# This fixes the ValueError
dataset = [[np.array(data[0]), np.array(data[1])] for data in dataset]

dataset = np.array(dataset, dtype=object) # Specify dtype=object to handle arrays of different lengths


train_x = list(dataset[:, 0])
train_y = list(dataset[:, 1])

Converts each user input pattern into a bag of words vector.

Converts each class label into a one-hot encoded vector.

Combines them into a final training dataset suitable for a neural network model.

***Training a Neural Network for Intent Classification***

In [19]:
model = Sequential()
model.add(Dense(256, input_shape=(len(train_x[0]),),
				activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))


sgd = SGD(learning_rate=0.01, decay=1e-6,
		momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
			optimizer=sgd, metrics=['accuracy'])

hist = model.fit(np.array(train_x), np.array(train_y),
				epochs=200, batch_size=5, verbose=1)

model.save("chatbot_model.h5", hist)
print("Done!")


Epoch 1/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.0497 - loss: 3.6042
Epoch 2/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.1392 - loss: 3.3646
Epoch 3/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2326 - loss: 3.0533
Epoch 4/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.2615 - loss: 2.7324
Epoch 5/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4337 - loss: 2.2642
Epoch 6/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5300 - loss: 1.9129
Epoch 7/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5750 - loss: 1.6020
Epoch 8/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6636 - loss: 1.3427
Epoch 9/200
[1m81/81[0m [32m━━━━━━━━━━━━━━━━━



Done!


 Defines and trains a neural network to classify user input into predefined intents using a bag-of-words representation. It consists of two hidden layers with dropout regularization, and uses the softmax function in the output layer for multi-class prediction. The model is trained using categorical crossentropy loss and SGD optimizer, then saved for later use in a chatbot system.

***Chatbot Response System Using Neural Network Model***

In [None]:
from tensorflow.keras.models import load_model # Import the load_model function
lemmatizer = WordNetLemmatizer()

# Load intents, words, and classes
intents = json.loads(open("intents.json").read())
words = pickle.load(open("words.pkl", "rb"))
classes = pickle.load(open("classes.pkl", "rb"))
model = load_model("/content/chatbot_model.h5")

# Function to clean up the sentence
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]
    return sentence_words

# Convert a sentence into a bag-of-words array
def bow(sentence, words):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
    return np.array(bag)

# Predict the intent
def predict_class(sentence):
    bow_input = bow(sentence, words)
    res = model.predict(np.array([bow_input]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]

    # Sort by probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = [{"intent": classes[r[0]], "probability": str(r[1])} for r in results]
    return return_list

# Get response based on intent
def get_response(intents_list, intents_json):
    tag = intents_list[0]['intent']
    for i in intents_json['intents']:
        if i['tag'] == tag:
            return random.choice(i['responses'])

# Chat with the bot
def chatbot_response(text):
    intents_list = predict_class(text)
    if intents_list:
        response = get_response(intents_list, intents)
    else:
        response = "I'm sorry, I didn't understand that."
    return response

# Example conversation loop
print("Chat with the bot! Type 'quit' to exit.")
while True:
    message = input("You: ")
    if message.lower() == "quit":
        break
    response = chatbot_response(message)
    print("Bot:", response)




Chat with the bot! Type 'quit' to exit.
You: Hi
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
Bot: Hi there, how can I help?
You: what is fees of collage
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Bot: For Fee detail visit <a target="_blank" href="LINK"> here</a>
You: is there any sport events
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
Bot: Our university encourages all-round development of students and hence provides sports facilities in the campus. For more details visit<a target="_blank" href=/"(LINK IF HAVE)">here</a>
You: tell me about subject and domain
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
Bot: Our university encourages all-round development of students and hence provides sports facilities in the campus. For more details visit<a target="_blank" href=/"(LINK IF HAVE)">here</a>
You: hostel is available
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3

This code implements a chatbot system that classifies user input using a pre-trained neural network model. It converts the user input into a bag-of-words vector, predicts the most likely intent, and returns an appropriate response from a set of predefined responses. The chatbot can continuously interact with the user until the user types 'quit'.