In [2]:
import sys
sys.path.append("..")

import json
from pathlib import Path
import pickle

import torch

data_dir = Path("../data/intent")
cache_dir = Path("../cache/intent")

Load vocab

In [3]:
with open(cache_dir / "vocab.pkl", "rb") as f:
    vocab = pickle.load(f)
print(vocab.tokens[:5])

['[PAD]', '[UNK]', 'in', 'originally', 'mane']


Load index

In [4]:
intent_idx_path = cache_dir / "intent2idx.json"
intent2idx = json.loads(intent_idx_path.read_text())
next(iter(intent2idx.items()))

('damaged_card', 0)

Load dataset

In [5]:
dataset_path = data_dir / "eval.json"
dataset = json.loads(dataset_path.read_text())
dataset[0]

{'text': 'how long should i cook steak for',
 'intent': 'cook_time',
 'id': 'eval-0'}

In [6]:
valid_y = [d["intent"] for d in dataset]
torch.tensor([intent2idx[y] for y in valid_y]).shape

torch.Size([3000])

In [7]:
for d in dataset:
    if ',' in d['text']:
        print(d)
        break

{'text': "for friday, add a doctor's appointment to my calendar", 'intent': 'calendar_update', 'id': 'eval-48'}


Load embeddings

In [8]:
embeddings = torch.load(cache_dir / "embeddings.pt")
embeddings.shape

torch.Size([5963, 300])

In [9]:
from torch.nn import Embedding

embed = Embedding.from_pretrained(embeddings, freeze=False)

In [18]:
texts = [d['text'].split() for d in dataset]
length = torch.tensor([len(t) for t in texts])
x = torch.tensor(vocab.encode_batch(texts))
x.shape

torch.Size([3000, 24])

In [11]:
embed(x).shape

torch.Size([3000, 24, 300])

In [12]:
embeddings.shape

torch.Size([5963, 300])

In [35]:
from torch.nn import RNN

embedding_dim = embeddings.shape[1]
hidden_size = 100
num_layers = 3

rnn = RNN(
    input_size=embedding_dim,
    hidden_size=hidden_size,
    num_layers=num_layers,
    batch_first=True,
    bidirectional=True
)
rnn

RNN(300, 100, num_layers=3, batch_first=True, bidirectional=True)

In [36]:
len(dataset)

3000

In [37]:
embed(x).shape

torch.Size([3000, 24, 300])

In [55]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
packed_features = pack_padded_sequence(
    input=embed(x),
    lengths=length.cpu(),
    batch_first=True,
    enforce_sorted=False
)
packed_output_features, _ = rnn(packed_features)
output_features, input_length = pad_packed_sequence(
    sequence=packed_output_features,
    batch_first=True
)
output_features.shape

torch.Size([3000, 24, 200])

In [75]:
forward_features, backward_features = torch.chunk(output_features, 2, dim=2)
last_features = torch.cat((forward_features[:, -1, :], backward_features[:, 0, :]), dim=1)

In [78]:
last_features.shape

torch.Size([3000, 200])

In [27]:
output, hn = rnn(embed(x))
output.shape, hn.shape

(torch.Size([3000, 24, 100]), torch.Size([3, 3000, 100]))

In [16]:
output[:, -1, :].shape

torch.Size([3000, 100])

In [17]:
from torch import nn

fc = nn.Sequential(
    nn.Linear(hidden_size, 10),
    nn.Softmax(dim=1)
)

In [18]:
embedding = embed(x)
output, _ = rnn(embedding)
output = output[:, -1, :]
fc(output).shape

torch.Size([3000, 10])

In [19]:
from src.intent.models import IntentClassifier

In [98]:
clf = IntentClassifier(
    embeddings=embeddings,
    hidden_size=100,
    num_layers=3,
    dropout=.5,
    bidirectional=False,
    num_class=len(intent2idx)
)

In [99]:
clf.rnn(clf.embedding(x))[0].shape

torch.Size([3000, 24, 100])

In [22]:
len(intent2idx)

150

In [23]:
import torch

torch.cuda.is_available()

True

In [43]:
[torch.device(i) for i in range(torch.cuda.device_count())]

[device(type='cuda', index=0),
 device(type='cuda', index=1),
 device(type='cuda', index=2),
 device(type='cuda', index=3),
 device(type='cuda', index=4),
 device(type='cuda', index=5),
 device(type='cuda', index=6),
 device(type='cuda', index=7)]

In [46]:
torch.device("cpu").type

'cpu'

In [47]:
from pytorch_lightning import Trainer, seed_everything
seed_everything(42)
trainer = Trainer(devices=1, accelerator="gpu", deterministic=True)

2022-02-24 01:19:48 | INFO | Global seed set to 42
2022-02-24 01:19:48 | INFO | GPU available: True, used: True
2022-02-24 01:19:48 | INFO | TPU available: False, using: 0 TPU cores
2022-02-24 01:19:48 | INFO | IPU available: False, using: 0 IPUs


In [48]:
loss = nn.CrossEntropyLoss()

In [64]:
pred = clf(x)
valid_y = [d["intent"] for d in dataset]
valid_y = torch.tensor([intent2idx[y] for y in valid_y])
loss(pred, valid_y)

tensor(5.0106, grad_fn=<NllLossBackward0>)

In [82]:
(pred.argmax(dim=1) == valid_y).float().mean()

tensor(0.0057)

In [89]:
pred.sum(dim=1)

tensor([1.0000, 1.0000, 1.0000,  ..., 1.0000, 1.0000, 1.0000],
       grad_fn=<SumBackward1>)