In [284]:
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchtext.vocab as vocab
from torchtext.data import get_tokenizer
from torchtext.datasets import SST2
from torch.utils.data import DataLoader
from torchtext.vocab import build_vocab_from_iterator
import nlpaug.augmenter.word as naw

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
%run models/RNN.ipynb

In [285]:
train_iter = SST2(split="train")
val_iter = SST2(split="dev")
# test_iter = SST2(split="test")

tokenizer = get_tokenizer('spacy')
def yield_tokens(data_iter):
    for text, _ in data_iter:
        yield tokenizer(text)

vocabulary = build_vocab_from_iterator(yield_tokens(train_iter), specials=["<unk>"])
vocabulary.set_default_index(vocabulary["<unk>"])

In [286]:
def yield_tokens_adv(data_iter):
    for text, _ in data_iter:
        yield tokenizer(text)

vocabulary = build_vocab_from_iterator(yield_tokens_adv(train_iter), specials=["<unk>"])
vocabulary.set_default_index(vocabulary["<unk>"])

In [287]:
glove_vectors = vocab.GloVe(name='6B', dim=100)
vocabulary.vectors = glove_vectors.get_vecs_by_tokens(vocabulary.get_itos())

In [288]:
BATCH_SIZE = 128
MAX_VOCAB_SIZE = 400000
aug = naw.AntonymAug()

def collate(x):
    batch = []
    for item in x:
        seq = []
        for word in tokenizer(item[0]):
            seq.append(vocabulary[word])
        batch.append(torch.tensor(seq))
    l = torch.tensor([item[1] for item in x]).unsqueeze(dim=1)
    return nn.utils.rnn.pad_sequence(batch, padding_value=vocabulary["<pad>"], batch_first=True), l

def collate_adv(x):
    batch = []
    for item in x:
        seq = []
        sentence = aug.augment(item[0])[0]
        for word in tokenizer(sentence):
            seq.append(vocabulary[word])
        batch.append(torch.tensor(seq))
    l = torch.tensor([item[1] for item in x]).unsqueeze(dim=1)
    return nn.utils.rnn.pad_sequence(batch, padding_value=vocabulary["<pad>"], batch_first=True), l

In [289]:
train_loader = DataLoader(train_iter, batch_size=BATCH_SIZE, collate_fn= lambda x : collate(x), drop_last=True)
val_loader = DataLoader(val_iter, batch_size=BATCH_SIZE, collate_fn= lambda x : collate(x), drop_last=True)
val_adv_loader = DataLoader(val_iter, batch_size=BATCH_SIZE, collate_fn= lambda x : collate_adv(x))

In [323]:
EMBEDDING_DIM = 100
HIDDEN_DIM = 100
OUTPUT_DIM = 2

model = RNN(vocabulary, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

In [324]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 40,802 trainable parameters



## Train the Model

In [325]:
optimizer = optim.Adam(model.parameters(), lr=5e-3)
criterion = nn.CrossEntropyLoss().to(device)
model = model.to(device).float()
criterion = criterion.to(device)

In [326]:
def binary_accuracy(preds, y):
    # round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()  # convert into float for division
    acc = correct.sum() / len(correct)
    return acc

In [327]:
def train(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0

    model.train()
    c= 0
    for inputs, labels in iterator:
        c+=1
        inputs = inputs.to(device)
        labels = labels.view(-1).to(device)
        optimizer.zero_grad()
        predictions, _ = model(inputs)
        predictions = predictions.float()
        loss = criterion(predictions, F.one_hot(labels, num_classes=2).float())
        acc = binary_accuracy(predictions.argmax(-1), labels)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / c, epoch_acc / c

In [328]:
def evaluate(model, iterator, criterion):
    epoch_loss = 0
    epoch_acc = 0
    c = 0
    model.eval()

    with torch.no_grad():
        for inputs,labels in iterator:
            c+=1
            inputs = inputs.to(device)
            labels = labels.view(-1).to(device)
            predictions, _ = model(inputs)
            predictions = predictions.float()
            loss = criterion(predictions, F.one_hot(labels, num_classes=2).float())
            acc = binary_accuracy(predictions.argmax(-1), labels)

            epoch_loss += loss.item()
            epoch_acc += acc.item()
    return epoch_loss / c, epoch_acc / c

In [329]:
import time

def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [331]:
from tqdm import tqdm

N_EPOCHS = 20

best_valid_loss = float('inf')

train_losses = []
val_losses = []
train_accuracy = []
val_accuracy = []

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss, train_acc = train(model, train_loader, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, val_loader, criterion)

    train_losses.append(train_loss)
    train_accuracy.append(train_acc)
    val_losses.append(valid_loss)
    val_accuracy.append(valid_acc)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    if valid_loss > best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'models/rnn.pt')

    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. Acc: {valid_acc*100:.2f}%')

Epoch: 01 | Epoch Time: 0m 7s
	Train Loss: 0.434 | Train Acc: 79.59%
	 Val. Loss: 0.515 |  Val. Acc: 74.22%
Epoch: 02 | Epoch Time: 0m 7s
	Train Loss: 0.443 | Train Acc: 79.82%
	 Val. Loss: 0.486 |  Val. Acc: 75.26%
Epoch: 03 | Epoch Time: 0m 7s
	Train Loss: 0.422 | Train Acc: 80.98%
	 Val. Loss: 0.486 |  Val. Acc: 76.17%
Epoch: 04 | Epoch Time: 0m 7s
	Train Loss: 0.403 | Train Acc: 81.97%
	 Val. Loss: 0.489 |  Val. Acc: 74.74%
Epoch: 05 | Epoch Time: 0m 7s
	Train Loss: 0.389 | Train Acc: 82.79%
	 Val. Loss: 0.490 |  Val. Acc: 76.17%
Epoch: 06 | Epoch Time: 0m 7s
	Train Loss: 0.379 | Train Acc: 83.33%
	 Val. Loss: 0.484 |  Val. Acc: 76.30%
Epoch: 07 | Epoch Time: 0m 7s
	Train Loss: 0.371 | Train Acc: 83.61%
	 Val. Loss: 0.478 |  Val. Acc: 76.69%
Epoch: 08 | Epoch Time: 0m 7s
	Train Loss: 0.367 | Train Acc: 83.79%
	 Val. Loss: 0.504 |  Val. Acc: 74.61%
Epoch: 09 | Epoch Time: 0m 7s
	Train Loss: 0.383 | Train Acc: 83.11%
	 Val. Loss: 0.501 |  Val. Acc: 75.26%
Epoch: 10 | Epoch Time: 0m 8

In [None]:
plt.plot(train_losses, label='train')
plt.plot(val_losses, label='val')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.grid()
plt.show()

In [None]:
plt.plot(train_accuracy, label='train')
plt.plot(val_accuracy, label='val')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.legend()
plt.grid()
plt.show()