Table of contents:

1. [Imports](#Import)
2. [Function definitions](#Functions)
3. [Create JSON file](#JSON)
4. [Prepare dataset](#Prep)
5. [Define neural net](#NN)
6. [Training](#Train)

<a name = "Import"></a>
## 1. Imports

In [1]:
import nltk
from nltk.stem.porter import PorterStemmer
import pandas as pd
import numpy as np
nltk.download('punkt')
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\charl\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


<a name = "Functions"></a>
## 2. Function definitions

In [2]:
def tokenize(sentence):
    return nltk.word_tokenize(sentence)

def stem(word):
    stemmer = PorterStemmer()
    return stemmer.stem(word=word.lower())

def bag_of_words(tokenized_sentence, all_words):
    tokenized_sentence = [stem(w) for w in tokenized_sentence]
    
    bag = np.zeros(len(all_words), dtype=np.float32)
    for idx, w in enumerate(all_words):
        if w in tokenized_sentence:
            bag[idx] = 1.0
            
    return bag

<a name = "JSON"></a>
## 3. Create JSON file

In [3]:
import pandas as pd
import json

intents_key = pd.read_csv('intents-key.csv')
intents_df = pd.read_csv('intents.csv')
df = pd.read_csv('dataset_appended.csv')
df = df.rename(columns={'Unnamed: 0': 'id', 'intent': 'tag', 'prompt': 'patterns', 'completion': 'responses'})

intents = {'intents': []}
tag_list = list(set(df['tag']))
for i, tag in enumerate(tag_list):
    intents['intents'].append(dict())
    intents['intents'][i]['tag'] = tag
    intents['intents'][i]['patterns'] = list(df[df['tag']==tag]['patterns'])
    intents['intents'][i]['responses'] = list(set(intents_df[intents_df['intent']==tag]['completion']))
    intents['intents'][i]['area'] = list(intents_df[intents_df['intent']==tag]['area'])[0]
    intents['intents'][i]['context'] = intents_key[intents_key['area']==intents['intents'][i]['area']]['context'].iloc[0]
    
with open('intents.json', 'w') as f:
    json.dump(intents, f) 
    
with open('intents.json', 'r') as f:
    intents = json.load(f)

<a name = "Prep"></a>
## 4. Prepare Dataset

In [4]:
all_words = []
tags = []
Xy = []

for intent in intents['intents']:
    tag = intent['tag']
    tags.append(tag)
    for pattern in intent['patterns']:
        words = tokenize(pattern)
        all_words.extend(words)
        Xy.append((words, tag))    
        
ignore_words = ['?', '!', '.', ',']
all_words = [stem(w) for w in all_words if w not in ignore_words]
all_words = sorted(set(all_words))
tags = sorted(set(tags))

In [5]:
X_train = []
y_train = []

for pattern_sentence, tag in Xy:
    bag = bag_of_words(pattern_sentence, all_words)
    X_train.append(bag)
    
    label = tags.index(tag)
    y_train.append(label)
    
X_train = np.array(X_train)
y_train = np.array(y_train)

print(X_train.shape)
print(y_train.shape)

(4911, 559)
(4911,)


In [6]:
class ChatDataset(Dataset):
    def __init__(self, X_train, y_train):
        self.n_samples = len(X_train)
        self.x_data = torch.from_numpy(X_train)
        self.y_data = torch.from_numpy(y_train).type(torch.LongTensor)
        
    def __getitem__(self, idx):
        return self.x_data[idx], self.y_data[idx]
    
    def __len__(self):
        return self.n_samples
    
batch_size = 2048
    
dataset = ChatDataset(X_train, y_train)
train_loader = DataLoader(dataset=dataset, batch_size=batch_size, num_workers=0, shuffle=True)

<a name = "NN"></a>
## 5. Define Neural Net
Define neural network for intent inference

In [7]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.l1 = nn.Linear(input_size, hidden_size)
        self.l2 = nn.Linear(hidden_size, hidden_size)
        self.l3 = nn.Linear(hidden_size, num_classes)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(out)
        out = self.l2(out)
        out = self.relu(out)
        out = self.l3(out)
        return out

input_size = len(X_train[0])
hidden_size = 8
output_size = len(tags)
    
model = NeuralNet(input_size, hidden_size, output_size).to(device)

<a name = "Train"></a>
## 6. Training

In [8]:
learning_rate = 0.001
num_epochs = 500

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    for words, labels in train_loader:
        words, labels = words.to(device), labels.to(device)
        
        # forward
        outputs = model(words)
        loss = criterion(outputs, labels)
        
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    if (epoch+1)%50 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss = {loss.item():.4f}")
    
print(f"Final loss = {loss.item():.4f}")

Epoch 50/500, Loss = 2.9721
Epoch 100/500, Loss = 2.0750
Epoch 150/500, Loss = 1.2561
Epoch 200/500, Loss = 0.7099
Epoch 250/500, Loss = 0.4482
Epoch 300/500, Loss = 0.3302
Epoch 350/500, Loss = 0.2997
Epoch 400/500, Loss = 0.2271
Epoch 450/500, Loss = 0.1784
Epoch 500/500, Loss = 0.1427
Final loss = 0.1427


In [9]:
data = {
    'model_state': model.state_dict(),
    'input_size': input_size,
    'output_size': output_size,
    'hidden_size': hidden_size,
    'all_words': all_words,
    'tags': tags
}

torch.save(data, "model.pth")