In [91]:
from imdb_preprocess import get_iterator
from bilstm_model import BiLSTMModel

In [92]:
import torch
import torch.nn as nn
import torch.optim as optim

torch.backends.cudnn.deterministic = True

In [93]:
train_iterator, valid_iterator, test_iterator, text = get_iterator()

In [94]:
VOCAB_SIZE = len(text.vocab)
EMBEDDING_DIM = 100
HIDDEN_DIM = 256
N_CLASSES = 2
DROPOUT = 0.5
pretrained_embeddings = text.vocab.vectors

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [95]:
UNK_IDX = text.vocab.stoi[text.unk_token]
PAD_IDX = text.vocab.stoi[text.pad_token]

model = BiLSTMModel(VOCAB_SIZE,
                    EMBEDDING_DIM,
                    HIDDEN_DIM,
                    PAD_IDX,
                    N_CLASSES,
                    pretrained_embeddings,
                    DROPOUT)

  "num_layers={}".format(dropout, num_layers))


In [96]:
print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 3,234,410 trainable parameters


In [97]:
model.embed.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.embed.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

In [98]:
print(model.embed.weight.data)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0382, -0.2449,  0.7281,  ..., -0.1459,  0.8278,  0.2706],
        ...,
        [-0.0980,  0.5190, -0.3514,  ...,  0.4989,  2.1672, -0.5434],
        [-1.0313,  1.4758, -0.4346,  ..., -1.1830,  1.3063, -0.8754],
        [-0.2333,  0.2726,  0.7370,  ..., -0.4732, -0.1464,  0.0424]])


In [99]:
def get_num_corrects(predictions, targets):
#     num_corrects = (torch.max(predictions, 1)[1].view(targets.size()).data == targets.data).sum()
    num_corrects = (predictions.max(1)[1].data == targets.data).sum()
    return num_corrects


def train(model, iterator, optimizer, criterion):
    epoch_loss = 0
    epoch_acc = 0

    model.train()

    for batch in iterator:
        optimizer.zero_grad()

        # [seq_len * bs]
        text, text_lengths = batch.text
        target = torch.autograd.Variable(batch.label).long()

        predictions = model(text, text_lengths)
        loss = criterion(predictions, target)
          
        num_corrects = get_num_corrects(predictions, target)
        acc = 100.0 * num_corrects / text.size(1)

        loss.backward()

        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)


def evaluate(model, iterator, criterion):
    epoch_loss = 0
    epoch_acc = 0

    model.eval()

    with torch.no_grad():
        for batch in iterator:
            text, text_lengths = batch.text

            predictions = model(text, text_lengths)
            target = torch.autograd.Variable(batch.label).long()

            loss = criterion(predictions, target)

            num_corrects = get_num_corrects(predictions, target)
            acc = 100.0 * num_corrects / text.size(1)

            epoch_loss += loss.item()
            epoch_acc += acc.item()

    return epoch_loss / len(iterator), epoch_acc / len(iterator)


def get_deivce():
    return 'cuda' if torch.cuda.is_available() else 'cpu'

In [100]:
N_EPOCHS = 5

best_valid_loss = float('inf')

optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss().to(get_deivce())
model = model.to(get_deivce())

for epoch in range(N_EPOCHS):

    train_loss, train_acc = train(model, train_iterator, optimizer, criterion)
    valid_loss, valid_acc = evaluate(model, valid_iterator, criterion)

    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), './bi_lstm_glove_100dim_model.pt')

    print(f'Train Loss: {train_loss} | Train Acc: {train_acc}%')
    print(f'Val Loss: {valid_loss} |  Val Acc: {valid_acc}%')

Train Loss: 0.6368060878811092 | Train Acc: 63.565693430656935%
Val Loss: 0.5752079135785668 |  Val Acc: 68.94067796610169%
Train Loss: 0.46949362488341156 | Train Acc: 77.33941605839416%
Val Loss: 0.4135060675315938 |  Val Acc: 80.13559322033899%
Train Loss: 0.34502649568293214 | Train Acc: 84.66423357664233%
Val Loss: 0.35530106912730103 |  Val Acc: 85.0%
Train Loss: 0.23123428736724994 | Train Acc: 90.35766423357664%
Val Loss: 0.3405611390026949 |  Val Acc: 86.33050847457628%
Train Loss: 0.1467401851648397 | Train Acc: 94.01094890510949%
Val Loss: 0.34862592386997354 |  Val Acc: 84.66101694915254%


FileNotFoundError: [Errno 2] No such file or directory: 'bi_lstm_glove_100dim_model'

In [101]:
model.load_state_dict(torch.load('./bi_lstm_glove_100dim_model.pt'))

test_loss, test_acc = evaluate(model, test_iterator, criterion)

print(f'Test Loss: {test_loss:} | Test Acc: {test_acc:f}%')

Test Loss: 0.35645644507749613 | Test Acc: 85.531969%
