# Problem Statement

**Our institution currently faces challenges in providing timely, accurate, and efficient support to students, faculty, and staff. The existing support systems, such as help desks and FAQs, are often overwhelmed, leading to long wait times, information overload, and decreased user satisfaction.**

# Objective

**To develop a sophisticated chatbot powered by deep learning to address these challenges by providing:**

24/7 availability: Ensuring round-the-clock support without relying on human agents.

Improved efficiency: Automating routine inquiries and reducing response times.

Enhanced user experience: Offering a personalized and intuitive interaction.

In [1]:
import numpy as np 
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/chatbot-model/intents.json


In [2]:
#Import needed Libraries for ChatBot creation

import json
import random

import nltk.tokenize
from nltk.stem.porter import PorterStemmer

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

In [3]:
#Load Dataset in workspace
data_file = open('/kaggle/input/chatbot-model/intents.json').read()
intents = json.loads(data_file)

In [4]:
# Natural Language Process

stemmer = PorterStemmer()

class nltk_process:
    def tokenize(sentence):
        return nltk.tokenize.word_tokenize(sentence)

    def stem(word):
        return stemmer.stem(word.lower())

    def bag_of_words(tokenized_sentence, all_words):
        tokenized_sentence = [nltk_process.stem(w) for w in tokenized_sentence]

        bag = np.zeros(len(all_words),dtype=np.float32)
        for idx,w in enumerate(all_words):
            if w in tokenized_sentence:
                bag[idx] = 1.0
        return bag

In [5]:
# FeedForwarding Neural Network

class NeuralNet(nn.Module):
    def __init__(self,input_size,hidden_size,num_classess):
        super(NeuralNet,self).__init__()
        self.l1 = nn.Linear(input_size,hidden_size)
        self.l2 = nn.Linear(hidden_size,hidden_size)
        self.l3 = nn.Linear(hidden_size,hidden_size)
        self.l4 = nn.Linear(hidden_size,num_classess)

        self.relu = nn.ReLU()
    
    def forward(self,x):
        out = self.l1(x)
        out = self.relu(out)

        out = self.l2(out)
        out = self.relu(out)

        out = self.l3(out)
        out = self.relu(out)
        
        out = self.l4(out)
        return out

In [6]:
# Creating Dataset for Model training

class ChatDataset(Dataset):
    def __init__(self) :
        self.n_samples = len(x_train)
        self.x_data = x_train
        self.y_data = y_train
    
    def __getitem__(self, index):
        return self.x_data[index],self.y_data[index]
    
    def __len__(self):
        return self.n_samples

In [7]:
# Prepare Data for Dataset

all_words = []
tags = []
xy = []
ignore_words = ['?','.',',','!']

for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent["patterns"]:
        w = nltk_process.tokenize(pattern)
        all_words.extend(w)
        xy.append((w,tag))

all_words = [nltk_process.stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))
print(tags)

['admission', 'canteen', 'college intake', 'committee', 'computerhod', 'course', 'creator', 'document', 'event', 'extchod', 'facilities', 'fees', 'floors', 'goodbye', 'greeting', 'hod', 'hostel', 'hours', 'infrastructure', 'ithod', 'library', 'location', 'menu', 'name', 'number', 'placement', 'principal', 'ragging', 'random', 'salutaion', 'scholarship', 'sem', 'sports', 'swear', 'syllabus', 'task', 'uniform']


In [8]:
# Cont..

x_train = []
y_train = []

for (pattern_sentance,tag) in xy:
    bag = nltk_process.bag_of_words(pattern_sentance,all_words)
    x_train.append(bag)

    label = tags.index(tag)

    y_train.append(label)

x_train = np.array(x_train)
y_train = np.array(y_train)


In [9]:
# Hyper-parameters 
num_epochs = 1000
batch_size = 7
learning_rate = 0.001
input_size = len(x_train[0])
hidden_size = 7
output_size = len(tags)
print(input_size, output_size)

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,batch_size=batch_size,shuffle=True,
                          num_workers=0)

243 37


In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet(input_size, hidden_size, output_size).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)
        
        # Forward pass
        outputs = model(words)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


print(f'final loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 0.0243
Epoch [200/1000], Loss: 0.0009
Epoch [300/1000], Loss: 0.0040
Epoch [400/1000], Loss: 0.0000
Epoch [500/1000], Loss: 0.0000
Epoch [600/1000], Loss: 0.0000
Epoch [700/1000], Loss: 0.0000
Epoch [800/1000], Loss: 0.0000
Epoch [900/1000], Loss: 0.0000
Epoch [1000/1000], Loss: 0.0000
final loss: 0.0000


In [12]:
#Save the Trained Model

data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

FILE = "data.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')

training complete. file saved to data.pth


In [13]:
def chatBot():
    FILE = "data.pth"
    data = torch.load(FILE)

    input_size = data["input_size"]
    hidden_size = data["hidden_size"]
    output_size = data["output_size"]
    all_words = data['all_words']
    tags = data['tags']
    model_state = data["model_state"]

    model = NeuralNet(input_size, hidden_size, output_size).to(device)
    model.load_state_dict(model_state)
    model.eval()

    bot_name = "QuartZ"
    print("Let's chat! (type 'quit' to exit)")
    while True:
        # sentence = "do you use credit cards?"
        sentence = input("You: ")
        if sentence == "quit":
            break

        sentence = nltk_process.tokenize(sentence)
        X = nltk_process.bag_of_words(sentence, all_words)
        X = X.reshape(1, X.shape[0])
        X = torch.from_numpy(X).to(device)

        output = model(X)
        _, predicted = torch.max(output, dim=1)

        tag = tags[predicted.item()]

        probs = torch.softmax(output, dim=1)
        prob = probs[0][predicted.item()]
        if prob.item() > 0.75:
            for intent in intents['intents']:
                if tag == intent["tag"]:
                    print(f"{bot_name}: {random.choice(intent['responses'])}")
        else:
            print(f"{bot_name}: I do not understand...")

In [14]:
## Run the ChatBot-MKCE
chatBot()

Let's chat! (type 'quit' to exit)


You:  Hi


QuartZ: Hello!


You:  who made you


QuartZ: I was created by Kavin


You:  timing of college


QuartZ: College is open 8.40am-5pm Monday-Saturday!


You:  what are the courses offered in your college?


QuartZ: Our university offers Information Technology, Computer Engineering, Mechanical engineering, Civil engineering, Electranical Engineering, Electrical and Communication Engineering, Artificial Intelligence & Data Science and Artificial Intelligence and Data Science


You:  Hod


QuartZ: HODs differ for each branch, please be more specific like: (HOD it)


You:  hod it


QuartZ: All engineering departments have only one hod XYZ who available on (Place name)


You:  AI hod


QuartZ: HODs differ for each branch, please be more specific like: (HOD it)


You:  Who is computer HOD


QuartZ: All engineering departments have only one hod JXXXX who available on 1st Block 3rd Floor


You:  principal name


QuartZ: P.S. Murugan is college principal and if you need any help then call your branch hod first. That is more appropriate


You:  number of seats per branch


QuartZ: For IT, Computer and extc 60 per branch and seat may be differ for different department.


You:  college dresscode


QuartZ: No Uniform but dress code is mandatory- Boys- Formal-Casual [Shirt & Pants]


You:  quit
