In [1]:
import wandb
import nltk
from nltk.stem.porter import *
from torch.nn import *
from torch.optim import *
import numpy as np
import pandas as pd
import torch,torchvision
import random
from tqdm import *
from torch.utils.data import Dataset,DataLoader
stemmer = PorterStemmer()
PROJECT_NAME = 'chatbot'
device = 'cuda'

In [2]:
def tokenize(sentence):
    return nltk.word_tokenize(sentence.lower())

In [3]:
tokenize('$100')

['$', '100']

In [4]:
def stem(word):
    return stemmer.stem(word.lower())

In [5]:
stem('organic')

'organ'

In [6]:
def bag_of_words(tokenized_words,words):
    tokenized_words = [stem(w) for w in tokenized_words]
    bag = np.zeros(len(words))
    for idx,w in enumerate(words):
        if w in tokenized_words:
            bag[idx] = 1.0
    return bag

In [7]:
bag_of_words(['hi'],['hi','how','hi'])

array([1., 0., 1.])

In [8]:
# data = pd.read_csv('./data.csv').sample(frac=1)[:3250]

In [9]:
# data.to_csv('./cleaned-data.csv',index=False)

In [10]:
data = pd.read_json('./data.json')

In [11]:
dict(data.iloc[0]['intents'])

{'tag': 'greeting',
 'patterns': ['Hi',
  'Hey',
  'How are you',
  'Is anyone there?',
  'Hello',
  'Good day'],
 'responses': ['Hey :-)',
  'Hello, thanks for visiting',
  'Hi there, what can I do for you?',
  'Hi there, how can I help?']}

In [12]:
new_data = {'Question':[],'Tag':[]}
responses = {'Tag':[],'Response':[]}

In [13]:
for i in range(len(data)):
    info = data.iloc[i]['intents']
    for q in list(info['patterns']):
        new_data['Tag'].append(info['tag'])
        new_data['Question'].append(q)
    for r in list(info['responses']):
        responses['Tag'].append(info['tag'])
        responses['Response'].append(r)

In [14]:
data = pd.DataFrame(new_data)

In [15]:
data.to_csv('./data.csv')

In [16]:
responses = pd.DataFrame(responses)

In [17]:
X = data['Question']
y = data['Tag']
X_words = []
labels = {}
labels_r = {}
idx = 0
data = []

In [18]:
for label in y:
    if label not in list(labels.keys()):
        idx += 1
        labels[label] = idx
        labels_r[idx] = label

In [19]:
labels

{'greeting': 1,
 'goodbye': 2,
 'thanks': 3,
 'items': 4,
 'payments': 5,
 'delivery': 6,
 'funny': 7}

In [20]:
for X_batch,y_batch in tqdm(zip(X,y)):
    X_batch = tokenize(X_batch)
    new_X = []
    for Xb in X_batch:
        new_X.append(stem(Xb))
    X_words.extend(new_X)
    
    data.append([
        X_words,
        labels[y_batch]-1
    ])

26it [00:00, 6218.74it/s]


In [21]:
X_words = sorted(set(X_words))
np.random.shuffle(data)

In [22]:
X = []
y = []

In [23]:
for input_sentence,output_sentence in tqdm(data):
    X.append(bag_of_words(input_sentence,X_words))
    y.append(output_sentence)

100%|██████████| 26/26 [00:00<00:00, 912.35it/s]


In [24]:
X_train = torch.from_numpy(np.array(X)).to(device).long()
y_train = torch.from_numpy(np.array(y)).to(device).long()
X_test = torch.from_numpy(np.array(X)).to(device).long()
y_test = torch.from_numpy(np.array(X)).to(device).long()

In [25]:
def get_loss(model,X,y,criterion):
    preds = model(X)
    loss = criterion(preds,y)
    return loss.item()

In [26]:
def get_accuracy(model,X,y,):
    preds = model(X)
    correct = 0
    total = 0
    for pred,yb in zip(preds,y):
        pred = int(torch.argmax(pred))
        yb = int(torch.argmax(yb))
        if pred == yb:
            correct += 1
        total += 1
    acc = round(correct/total,3)*100
    return acc

In [70]:
class Model(Module):
    def __init__(self):
        super().__init__()
        self.hidden = 8
        self.activation = ReLU()
        self.batchnorm = BatchNorm1d(self.hidden)
        self.linear1 = Linear(len(X_words),self.hidden)
        self.linear2 = Linear(self.hidden,self.hidden)
        self.linear3 = Linear(self.hidden,self.hidden)
        self.linear4 = Linear(self.hidden,self.hidden)
        self.linear5 = Linear(self.hidden,self.hidden)
        self.output = Linear(self.hidden,len(labels)) # TODO trying adding 1 or -1
    
    def forward(self,X):
        preds = self.linear1(X)
        preds = self.activation(self.linear2(preds))
        preds = self.activation(self.linear3(preds))
        preds = self.activation(self.linear4(preds))
        preds = self.activation(self.linear5(preds))
        preds = self.output(preds)
        return preds

In [71]:
model = Model().to(device)
criterion = CrossEntropyLoss()
optimizer = Adam(model.parameters(),lr=0.001)
epochs = 1000
batch_size = 8

In [72]:
wandb.init(project=PROJECT_NAME,name='baseline')
for _ in tqdm(range(epochs)):
    preds = model(X_train.float())
    loss = criterion(preds,y_train.long())
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    torch.cuda.empty_cache()
    wandb.log({'Loss':get_loss(model,X_train.float(),y_train,criterion)})
    torch.cuda.empty_cache()
#     wandb.log({'Val Loss':get_loss(model,X_test.float(),y_test,criterion)})
    torch.cuda.empty_cache()
    wandb.log({'Acc':get_accuracy(model,X_train.float(),y_train)})
    torch.cuda.empty_cache()
    wandb.log({'Val Acc':get_accuracy(model,X_test.float(),y_test)})
    torch.cuda.empty_cache()
wandb.finish()
torch.cuda.empty_cache()

VBox(children=(Label(value=' 0.07MB of 0.07MB uploaded (0.03MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

100%|██████████| 1000/1000 [00:05<00:00, 192.44it/s]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Acc,▁▁██████████████████████████████████████
Loss,█▆▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
Val Acc,▁▁██████████████████████████████████████

0,1
Acc,100.0
Loss,1.91101
Val Acc,100.0


In [73]:
torch.save(model.to('cpu'),'model.pt')
torch.save(model.to('cpu'),'model.pth')
torch.save(model.to('cpu').state_dict(),'model-sd.pt')
torch.save(model.to('cpu').state_dict(),'model-sd.pth')
torch.save(X_words,'words.pt')
torch.save(X_words,'words.pth')
torch.save(data,'data.pt')
torch.save(data,'data.pth')
torch.save(labels,'labels.pt')
torch.save(labels,'labels.pth')
torch.save(labels_r,'labels_r.pt')
torch.save(labels_r,'labels_r.pth')
torch.save(idx,'idx.pt')
torch.save(idx,'idx.pth')
torch.save(y_train,'y_train.pt')
torch.save(y_test,'y_test.pth')
torch.save(y,'y.pt')
torch.save(y,'y.pth')

In [74]:
model.eval()
# torch.argmax(model(torch.from_numpy(bag_of_words(['hi','how','are','you'],X_words)).float()))

Model(
  (activation): ReLU()
  (batchnorm): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear1): Linear(in_features=56, out_features=8, bias=True)
  (linear2): Linear(in_features=8, out_features=8, bias=True)
  (linear3): Linear(in_features=8, out_features=8, bias=True)
  (linear4): Linear(in_features=8, out_features=8, bias=True)
  (linear5): Linear(in_features=8, out_features=8, bias=True)
  (output): Linear(in_features=8, out_features=7, bias=True)
)

In [75]:
responses

Unnamed: 0,Tag,Response
0,greeting,Hey :-)
1,greeting,"Hello, thanks for visiting"
2,greeting,"Hi there, what can I do for you?"
3,greeting,"Hi there, how can I help?"
4,goodbye,"See you later, thanks for visiting"
5,goodbye,Have a nice day
6,goodbye,Bye! Come back again soon.
7,thanks,Happy to help!
8,thanks,Any time!
9,thanks,My pleasure


In [76]:
model.train()

Model(
  (activation): ReLU()
  (batchnorm): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear1): Linear(in_features=56, out_features=8, bias=True)
  (linear2): Linear(in_features=8, out_features=8, bias=True)
  (linear3): Linear(in_features=8, out_features=8, bias=True)
  (linear4): Linear(in_features=8, out_features=8, bias=True)
  (linear5): Linear(in_features=8, out_features=8, bias=True)
  (output): Linear(in_features=8, out_features=7, bias=True)
)

In [77]:
X_train[0].shape

torch.Size([56])

In [78]:
preds = model(X_train.cpu().float()) #[25].view(1,-1).to('cpu').float().to('cpu')

In [79]:
preds

tensor([[ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.239

In [80]:
preds[0]

tensor([ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
       grad_fn=<SelectBackward>)

In [87]:
Softmax()(preds[2])

  Softmax()(preds[2])


tensor([0.2307, 0.1154, 0.1539, 0.1154, 0.1538, 0.1154, 0.1154],
       grad_fn=<SoftmaxBackward>)

In [82]:
model.eval()

Model(
  (activation): ReLU()
  (batchnorm): BatchNorm1d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear1): Linear(in_features=56, out_features=8, bias=True)
  (linear2): Linear(in_features=8, out_features=8, bias=True)
  (linear3): Linear(in_features=8, out_features=8, bias=True)
  (linear4): Linear(in_features=8, out_features=8, bias=True)
  (linear5): Linear(in_features=8, out_features=8, bias=True)
  (output): Linear(in_features=8, out_features=7, bias=True)
)

In [83]:
preds = model(X_train.cpu().float()) #[25].view(1,-1).to('cpu').float().to('cpu')

In [84]:
preds

tensor([[ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.2393, -0.0481, -0.0476],
        [ 0.6446, -0.0477,  0.2397, -0.0476,  0.239

In [85]:
torch.argmax(preds)

tensor(0)

In [86]:
responses.to_csv('./responses.csv')