In [1]:
# model
import torch
import torch.nn as nn

class NeuralNet(nn.Module):
    # constructor (__init__)
    # initializes neural network's architecture
    # input_size: size of input features
    # hidden_size: no. of neurons in hidden layer
    # num_classes: no. of output classes
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        # Layers defined using nn.Linear
        # connects input neurons to hidden
        self.l1 = nn.Linear(input_size, hidden_size)
        # connects hidden to subsequent hidden neurons
        self.l2 = nn.Linear(hidden_size, hidden_size)
        # connects hidden to output
        self.l3 = nn.Linear(hidden_size, num_classes)
        # ReLU: Rectified Linear Unit
        # used to introduce non-linearity in network
        self.relu = nn.ReLU()

    # defines forward pass of neural network
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)

        return out

In [2]:
# nltk_utils
import numpy as np
import nltk
# natural language toolkit
nltk.download('punkt')
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

# splits sentence to array of words or tokens

# uses NLTKs word_tokenize method for splitting
def tokenize(sentence):
    """
    split sentence into array of words/tokens
    a token can be a word or punctuation character, or number
    """
    return nltk.word_tokenize(sentence)


def stem(word):
    """
    stemming = find the root form of the word
    examples:
    words = ["organize", "organizes", "organizing"]
    words = [stem(w) for w in words]
    -> ["organ", "organ", "organ"]
    """
    return stemmer.stem(word.lower())


def bag_of_words(tokenized_sentence, words):
    """
    return bag of words array:
    1 for each known word that exists in the sentence, 0 otherwise
    example:
    sentence = ["hello", "how", "are", "you"]
    words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
    bog   = [  0 ,    1 ,    0 ,   1 ,    0 ,    0 ,      0]
    """
    # stem each word
    sentence_words = [stem(word) for word in tokenized_sentence]
    # initialize bag with 0 for each word
    bag = np.zeros(len(words), dtype=np.float32)
    for idx, w in enumerate(words):
        if w in sentence_words:
            bag[idx] = 1

    return bag

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Pc\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [3]:
# train
import numpy as np
import random
import json

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

with open('intents.json', 'r') as f:
    intents = json.load(f)

all_words = []
tags = []
xy = []
# loop through each sentence in our intents patterns
for intent in intents['intents']:
    tag = intent['tag']
    # add to tag list
    tags.append(tag)
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = tokenize(pattern)
        # add to our words list
        all_words.extend(w)
        # add to xy pair
        xy.append((w, tag))

# stem and lower each word
ignore_words = ['?', '.', '!']
all_words = [stem(w) for w in all_words if w not in ignore_words]
# remove duplicates and sort
all_words = sorted(set(all_words))
tags = sorted(set(tags))

print(len(xy), "patterns")
print(len(tags), "tags:", tags)
print(len(all_words), "unique stemmed words:", all_words)

# create training data
X_train = []
y_train = []
for (pattern_sentence, tag) in xy:
    # X: bag of words for each pattern_sentence
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
    label = tags.index(tag)
    y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)

# Hyper-parameters
num_epochs = 1000
batch_size = 8
learning_rate = 0.001
input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)
print(input_size, output_size)

class ChatDataset(Dataset):

    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = X_train
        self.y_data = y_train

    # support indexing such that dataset[i] can be used to get i-th sample
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples

dataset = ChatDataset()
train_loader = DataLoader(dataset=dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=0)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = NeuralNet(input_size, hidden_size, output_size).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(dtype=torch.long).to(device)

        # Forward pass
        outputs = model(words)
        # if y would be one-hot, we must apply
        # labels = torch.max(labels, 1)[1]
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 100 == 0:
        print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


print(f'final loss: {loss.item():.4f}')

data = {
"model_state": model.state_dict(),
"input_size": input_size,
"hidden_size": hidden_size,
"output_size": output_size,
"all_words": all_words,
"tags": tags
}

FILE = "data.pth"
torch.save(data, FILE)

print(f'training complete. file saved to {FILE}')

259 patterns
26 tags: ['admission', 'campus area', 'canteen', 'committee', 'course', 'document', 'event', 'facilities', 'fees', 'goodbye', 'greeting', 'hostel', 'hours', 'infrastructure', 'library', 'location', 'number', 'placement', 'principal', 'ragging', 'ranking', 'scholarship', 'sem', 'sports', 'uniform', 'vacation']
178 unique stemmed words: ["'s", 'a', 'about', 'ac', 'activ', 'address', 'admis', 'admiss', 'against', 'ai/ml', 'an', 'and', 'ani', 'antirag', 'are', 'area', 'at', 'automobil', 'avail', 'averag', 'be', 'between', 'big', 'book', 'boy', 'branch', 'bring', 'build', 'bye', 'cafetaria', 'campu', 'can', 'canteen', 'capac', 'case', 'casual', 'ce', 'chemic', 'civil', 'code', 'colleg', 'committ', 'committe', 'comp', 'compani', 'comput', 'conduct', 'contact', 'cours', 'date', 'day', 'detail', 'differ', 'distanc', 'do', 'document', 'doe', 'done', 'dress', 'dresscod', 'dure', 'each', 'end', 'engin', 'event', 'exam', 'facil', 'far', 'fee', 'first', 'food', 'for', 'fourth', 'from',

In [4]:
# chat
import random
import json
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load contents
with open('E:\All semesters\Projects\8. Thapar Query Chatbot ML\ML_Project_Chatbot\ML_Project_Chatbot\intents.json', 'r') as json_data:
    intents = json.load(json_data)

# Load preprocessed data and model from a saved file
FILE = "data.pth"
data = torch.load(FILE)

# Extraction of components from loaded data
input_size = data["input_size"]
hidden_size = data["hidden_size"]
output_size = data["output_size"]
all_words = data['all_words']
tags = data['tags']
model_state = data["model_state"]

model = NeuralNet(input_size, hidden_size, output_size).to(device)
model.load_state_dict(model_state)
model.eval()

bot_name = "ThaparBot"
print("Let's chat! \nType 'quit' to exit")
while True:
    sentence = input("You: ")
    if sentence == "quit":
        break

    # tokenize user input
    sentence = tokenize(sentence)
    # convert user input to bag of words representation
    X = bag_of_words(sentence, all_words)
    X = X.reshape(1, X.shape[0])
    X = torch.from_numpy(X).to(device)
    # make prediction with trained model
    output = model(X)
    _, predicted = torch.max(output, dim=1)

    tag = tags[predicted.item()]

    # calculate prob and check confidence level
    probs = torch.softmax(output, dim=1)
    prob = probs[0][predicted.item()]
    if prob.item() > 0.75:
        for intent in intents['intents']:
            if tag == intent["tag"]:
                print(f"{bot_name}: {random.choice(intent['responses'])}")
    else:
        print(f"{bot_name}: Sorry, I do not understand.")

Let's chat! 
Type 'quit' to exit
ThaparBot: You can contact at: 0175 239 3021
ThaparBot: Our university offers: 
1. Chemical Engineering, 
2. Civil Engineering, 
3. Computer Engineering, 
4. Electrical Engineering, 
5. Electronics and Communication Engineering, 
6. Electronics Instrumentation and Control Engineering, 
7. Electronics and Computer Engineering, 
8. Mechanical Engineering, 
9. Mechatronics, 
10. Biotechnology.
ThaparBot: University fees per semester is around 1.5-2 lacs.And hostel fees ranges between Rs.40500-70000 depending upon the type of accomodation.
ThaparBot: Our University has Excellent Infrastructure. Campus is clean. Good IT Labs With Good Speed of Internet connection. You may also visit https://tiet360.in/ for campus tour.
ThaparBot: Our university's Engineering department provides fully AC Lab with internet connection, smart classroom, Auditorium, library,canteen
ThaparBot: Our university's Engineering department provides fully AC Lab with internet connection, 