In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import IMDB
from torchtext import data
from torchtext.vocab import GloVe

import os
import json
import time
import random
import copy
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix, f1_score, classification_report

from IPython.display import Image

from seq_label import SeqLabel


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')

SEED = 1
random.seed(SEED)
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

### Common Parameters

In [0]:
batch_size = 16
# Percentage of training data
split_ratio = 0.8
learning_rate = 0.001
epochs = 200
# vocabulary size to embed input (GloVe output dim)
embed_dim = 300

## IMDb data

In [11]:
from imdb import IMDB_dataset

imdb = IMDB_dataset(split_ratio, SEED)
imdb.load(verbose = True)
imdb.build_vocab(embed_dim)
train_loader, valid_loader, test_loader = imdb.create_data_loader(batch_size, 
                                                                  device)
vocab_len = len(imdb.TEXT.vocab)

Training data size:    20000
Validation data size:  5000
Test data size:        25000


# LSTM

#### LSTM Parameters

In [0]:
# Number of hidden nodes
hidden_dim = 64
# Number of output nodes
output_dim = 1
# Number of LSTMs cells to be stacked
layers = 1
# Boolean value for bidirectioanl or not
bidirectional = True
# Boolean value to use LayerNorm or not
layernorm = True

### Our implementation

In [31]:
## Our implementation

from seq_label import LSTMSeqLabel

# Initializing model
model = LSTMSeqLabel(vocab_len, embed_dim, hidden_dim, output_dim, 
                      imdb.pretrained_weights, layers, bidirectional,
                      layernorm)
model.to(device)

print('Model parameters: ', model.count_parameters())

# Initializing optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_criterion = nn.BCEWithLogitsLoss()

# Initializing task
task = SeqLabel(model, optimizer, loss_criterion, device)

# Training
freq = 5    # epoch interval to calculate F1 score and save models
out_dir = "results/seq_label/lstm"
# out_dir = "/content/drive/My Drive/colab/seq_label/"

Model parameters:  93505


In [0]:
model, stats = task.train(epochs, train_loader, valid_loader, freq, out_dir)

### PyTorch implementation

In [30]:
## PyTorch implementation

from seq_label import PyTorchBaseline

# Initializing model
model = PyTorchBaseline(vocab_len, embed_dim, hidden_dim, output_dim, 
                       imdb.pretrained_weights, layers, bidirectional)
model.to(device)

print('Model parameters: ', model.count_parameters())

# Initializing optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_criterion = nn.BCEWithLogitsLoss()

# Initializing task
task = SeqLabel(model, optimizer, loss_criterion, device)

# Training
freq = 5    # epoch interval to calculate F1 score and save models
out_dir = "results/seq_label/pytorch/"
# out_dir = "/content/drive/My Drive/colab/seq_label/"

Model parameters:  93761


In [0]:
model, stats = task.train(epochs, train_loader, valid_loader, freq, out_dir)

### Evaluate

In [0]:
# Testing
f1_test = task.evaluate(test_loader, verbose=True)
print('F1 score: ', f1_test)

# Transformer

### our implementation

In [33]:
from seq_label import TransformerSeqLabel
from transformer import NoamOpt

# 117k
model = TransformerSeqLabel(in_dim=vocab_len, out_dim=1, N=1, heads=4, embed_dim=embed_dim, model_dim=128, ff_dim=256, 
                            key_dim=32, value_dim=32, batch_first=False,
                            pretrained_vec=imdb.pretrained_weights)

model = model.to(device)

print('Model parameters: ', model.count_parameters())

# Initializing optimizer and loss
# optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
optimizer = NoamOpt(model.model_dim, 1, 2000,
        torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
loss_criterion = nn.BCEWithLogitsLoss()

# Initializing task
task = SeqLabel(model, optimizer, loss_criterion, device)

# Training
freq = 5    # epoch interval to calculate F1 score and save models
out_dir = "results/seqLabel/transformer/"

Model parameters:  171137


In [0]:
model, stats = task.train(50, train_loader, valid_loader, freq, out_dir)

### Evaluate

In [0]:
# Testing
f1_test = task.evaluate(test_loader, verbose=True)
print('F1 score: ', f1_test)

Confusion Matrix: 
 [[10179  2321]
 [ 1361 11139]]
Classification Report: 
               precision    recall  f1-score   support

         0.0       0.88      0.81      0.85     12500
         1.0       0.83      0.89      0.86     12500

    accuracy                           0.85     25000
   macro avg       0.85      0.85      0.85     25000
weighted avg       0.85      0.85      0.85     25000

F1 score:  (0.8581664098613251, 0.4305374167611678)


In [0]:
# !zip -r results-seq_label.zip results/seqLabel/

# from google.colab import files
# files.download('results-seq_label.zip')