In [37]:
import nltk
from nltk.stem.porter import PorterStemmer
import json
import numpy as np

import torch 
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# Contextual chatbot - Pytorch


## The idea:

### There is going to be a pre-defined set of categories 
### Once user enters text, the job of the model is to predict which category the input belongs to 
### Then, simply pick a random response from the category's pre-defined responses to reply

# <img src="pics/idea.jpg">

In [30]:
portStemmer = PorterStemmer()
def tokenize(sentence):
    return nltk.word_tokenize(sentence)


def stem(word):    
    return portStemmer.stem(word.lower())


def bag_words(input_sentence, all_words):
    sentence = [stem(w) for w in input_sentence]    
    bag = np.zeros(len(all_words), dtype=np.float32)
    for i, w in enumerate(all_words):        
        if w in sentence:            
            bag[i] = 1.0
    return bag
  
    

In [31]:
stem("works")

'work'

In [32]:
total_words = ['this', 'is', 'how', 'stemming', 'works']    # all words 
input_words = ['how', 'works']                              # how many words from this exist in all words 
bag_words(input_words, [stem(x) for x in total_words])

array([0., 0., 1., 0., 1.], dtype=float32)

## Preparing the training data

In [38]:
with open("categories.json", "r") as f:
    intents = json.load(f)

In [39]:
all_words = []
tags = []
pattern_tags = []
for intent in intents["intents"]:
    tag = intent["tag"]
    tags.append(tag)
    pattern_tag = []
    for pattern in intent["patterns"]:
        tokenized_pattern = tokenize(pattern)
        all_words.extend(tokenized_pattern)
        pattern_tags.append((tokenized_pattern, tag))

ignore_chars = ["?", "!", ".", ","]



# <img src="pics/json_iteration.jpg">

In [40]:
print(all_words)

['Hi', 'Hey', 'How', 'are', 'you', 'Is', 'anyone', 'there', '?', 'Hello', 'Good', 'day', 'Bye', 'See', 'you', 'later', 'Goodbye', 'Thanks', 'Thank', 'you', 'That', "'s", 'helpful', 'Thank', "'s", 'a', 'lot', '!', 'Which', 'items', 'do', 'you', 'have', '?', 'What', 'kinds', 'of', 'items', 'are', 'there', '?', 'What', 'do', 'you', 'sell', '?', 'Do', 'you', 'take', 'credit', 'cards', '?', 'Do', 'you', 'accept', 'Mastercard', '?', 'Can', 'I', 'pay', 'with', 'Paypal', '?', 'Are', 'you', 'cash', 'only', '?', 'How', 'long', 'does', 'delivery', 'take', '?', 'How', 'long', 'does', 'shipping', 'take', '?', 'When', 'do', 'I', 'get', 'my', 'delivery', '?', 'Tell', 'me', 'a', 'joke', '!', 'Tell', 'me', 'something', 'funny', '!', 'Do', 'you', 'know', 'a', 'joke', '?']


In [41]:
# applying stemming and remove duplicate words by type-casting the list into a set
all_words = set([stem(w) for w in all_words if w not in ignore_chars])

In [42]:
print(all_words)

{'long', 'when', 'get', 'is', 'paypal', 'someth', 'you', 'a', 'hey', 'my', 'good', 'day', 'me', 'with', 'thank', 'have', 'mastercard', 'tell', 'accept', 'of', 'know', 'hi', 'are', 'anyon', 'item', 'help', 'do', 'what', 'hello', 'doe', 'ship', 'there', 'bye', 'kind', 'onli', 'lot', 'which', 'pay', 'credit', 'goodby', 'later', 'sell', 'card', 'deliveri', 'how', 'that', 'cash', 'see', 'i', 'funni', "'s", 'joke', 'take', 'can'}


In [43]:
all_words = sorted(all_words)
print(all_words)
print(len(all_words))

["'s", 'a', 'accept', 'anyon', 'are', 'bye', 'can', 'card', 'cash', 'credit', 'day', 'deliveri', 'do', 'doe', 'funni', 'get', 'good', 'goodby', 'have', 'hello', 'help', 'hey', 'hi', 'how', 'i', 'is', 'item', 'joke', 'kind', 'know', 'later', 'long', 'lot', 'mastercard', 'me', 'my', 'of', 'onli', 'pay', 'paypal', 'see', 'sell', 'ship', 'someth', 'take', 'tell', 'thank', 'that', 'there', 'what', 'when', 'which', 'with', 'you']
54


In [10]:
tags = sorted(set(tags))
print(tags)

['delivery', 'funny', 'goodbye', 'greeting', 'items', 'payments', 'thanks']


In [11]:
X_train = []    # contains all patterns 
Y_train = []    # contains all tags or categories
for x in pattern_tags:
    bag = bag_words(x[0], all_words)   # sentence, all_words
    X_train.append(bag)
    label = tags.index(x[1])
    Y_train.append(label)

X_train = np.array(X_train)
Y_train = np.array(Y_train, dtype=np.int64)
print(type(X_train))
# X_train = X_train.type(torch.LongTensor)

<class 'numpy.ndarray'>


In [12]:
class ChatDataSet(Dataset):
    def __init__(self):
        self.n_samples = len(X_train)
        self.x_data = torch.from_numpy(X_train)
        self.y_data = torch.from_numpy(Y_train)
        print(type(self.x_data))
        print(type(self.x_data[0]))
        print(self.x_data)
        print(type(self.y_data))
        print(self.y_data)

    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]

    def __len__(self):
        return self.n_samples

batch_size = 8

dataset = ChatDataSet()
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True) # entire training data is contained in this 1 object now


<class 'torch.Tensor'>
<class 'torch.Tensor'>
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        ...,
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 1.]])
<class 'torch.Tensor'>
tensor([3, 3, 3, 3, 3, 3, 2, 2, 2, 6, 6, 6, 6, 4, 4, 4, 5, 5, 5, 5, 0, 0, 0, 1,
        1, 1])


## Defining the neural network structure

In [13]:
class Model(nn.Module):
    def __init__(self, input_size, hidden_size, classes):
        super(Model, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, classes)

        self.relu = nn.ReLU()

    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)

        return out

# <img src="pics/structure.jpg">

In [14]:
print(len(tags))

7


In [15]:
INPUT_SIZE = len(X_train[0])
OUTPUT_SIZE = len(tags)
HIDDEN_SIZE = 8
BATCH_SIZE = 8

LEARN_RATE = 0.001
EPOCHS = 1000

print(INPUT_SIZE, len(all_words))
print(OUTPUT_SIZE, tags)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Model(INPUT_SIZE, HIDDEN_SIZE, OUTPUT_SIZE).to(device)

54 54
7 ['delivery', 'funny', 'goodbye', 'greeting', 'items', 'payments', 'thanks']


## Training the network

# <img src="pics/logloss.jpg">

In [16]:
# Optimization 

xEntropyLoss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARN_RATE)

for epoch in range(EPOCHS):
    for (words, labels) in train_loader:
        words = words.to(device)
        labels = labels.to(device)

        # forward pass
        
        outputs = model(words) 
            
        loss = xEntropyLoss(outputs, labels)

        #Back propogation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f"epoch: {epoch+1} / {EPOCHS}, loss: {loss.item()} ")


print(f"final loss: {loss.item()}")

epoch: 100 / 1000, loss: 1.2412502765655518 
epoch: 200 / 1000, loss: 0.08783357590436935 
epoch: 300 / 1000, loss: 0.016463223844766617 
epoch: 400 / 1000, loss: 0.015537867322564125 
epoch: 500 / 1000, loss: 0.006050540134310722 
epoch: 600 / 1000, loss: 0.004362071864306927 
epoch: 700 / 1000, loss: 0.0003328182501718402 
epoch: 800 / 1000, loss: 0.0006490105297416449 
epoch: 900 / 1000, loss: 0.0023323972709476948 
epoch: 1000 / 1000, loss: 0.001341348048299551 
final loss: 0.001341348048299551


In [17]:
data = {
    'model_state': model.state_dict(),
    'input_size': INPUT_SIZE,
    'output_size': OUTPUT_SIZE,
    'hidden_size': HIDDEN_SIZE,
    'all_words': all_words,
    'tags': tags
}

file = 'data.pth'
torch.save(data, file)
