In [1]:
import sys
sys.path.append("..")

import json
from pathlib import Path
import pickle

import torch
from torch.utils import data

from src.intent.models import IntentClassifier
from src.intent.data import IntentDataset

In [2]:
from types import SimpleNamespace
params = {
    'data_dir': Path('../data/intent/'),
    'cache_dir': Path('../cache/intent/'),
    'hidden_size': 64,
    'num_layers': 2,
    'dropout': .1,
    'bidirectional': True,
    'num_class': 150,
    'lr': 1e-3,
    'max_len': 64,
    'batch_size': 128,
    'num_workers': 0
}
args = SimpleNamespace()
for k, v in params.items():
    setattr(args, k, v)

In [4]:
def load_data(data_path):
    data = json.loads(data_path.read_text())
    texts = [d["text"].split() for d in data]
    intents = [d["intent"] for d in data]
    return texts, intents

# Load data
train_x, train_y = load_data(args.data_dir / "train.json")
valid_x, valid_y = load_data(args.data_dir / "eval.json")

# Load vocab
vocab_path = args.cache_dir / "vocab.pkl"
with open(vocab_path, "rb") as f:
    vocab = pickle.load(f)
max_len = args.max_len if args.max_len > 0 else None
train_x = torch.tensor(vocab.encode_batch(train_x, max_len=max_len))
valid_x = torch.tensor(vocab.encode_batch(valid_x, max_len=max_len))

# Load intent2idx
intent_idx_path = args.cache_dir / "intent2idx.json"
intent2idx = json.loads(intent_idx_path.read_text())
train_y = torch.tensor([intent2idx[y] for y in train_y])
valid_y = torch.tensor([intent2idx[y] for y in valid_y])

train_dataset = IntentDataset(train_x, train_y)
valid_dataset = IntentDataset(valid_x, valid_y)

train_dataloader = data.DataLoader(
    dataset=train_dataset,
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=args.num_workers
)
valid_dataloader = data.DataLoader(
    dataset=valid_dataset,
    batch_size=args.batch_size,
    shuffle=False,
    num_workers=args.num_workers
)

# Load embeddings
embeddings_path = args.cache_dir / "embeddings.pt"
embeddings = torch.load(embeddings_path)

model = IntentClassifier(
    embeddings=embeddings,
    hidden_size=args.hidden_size,
    num_layers=args.num_layers,
    dropout=args.dropout,
    bidirectional=args.bidirectional,
    num_class=len(intent2idx),
    lr=args.lr
)


In [5]:
x, y = next(iter(train_dataloader))
x.shape, y.shape

(torch.Size([128, 28]), torch.Size([128]))

In [57]:
model.train()
torch.set_grad_enabled(True)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fdf19c4c100>

In [58]:
embeddings = model.embedding(x)
embeddings.shape

torch.Size([128, 28, 300])

In [59]:
output_features, _ = model.rnn(embeddings)
output_features.shape

torch.Size([128, 28, 128])

In [60]:
last_features = output_features[:, -1, :]
last_features.shape

torch.Size([128, 128])

In [61]:
output = model.fc(last_features)
output.shape

torch.Size([128, 150])

In [62]:
(output.argmax(dim=1)==y).float().mean()

tensor(0.0078)

In [63]:
loss = model.loss(input=output, target=y)
loss

tensor(5.0349, grad_fn=<NllLossBackward0>)

In [64]:
# for n, p in model.named_parameters():
#     print(n)

In [65]:
model.get_parameter('rnn.weight_ih_l0')[0,:10]

tensor([-0.1062,  0.0032,  0.0097, -0.0351, -0.0981, -0.0112, -0.0721, -0.0804,
         0.0920,  0.0230], grad_fn=<SliceBackward0>)

In [66]:
optimizer = model.configure_optimizers()
optimizer.zero_grad()

loss.backward()

optimizer.step()

In [67]:
model.get_parameter('rnn.weight_ih_l0')[0,:10]

tensor([-0.1052,  0.0022,  0.0087, -0.0361, -0.0971, -0.0102, -0.0711, -0.0794,
         0.0910,  0.0240], grad_fn=<SliceBackward0>)