In [16]:
# from google.colab import drive
# drive.mount('/content/drive')

In [1]:
import numpy as np
import pickle
import random
import torch

from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.nn.utils import clip_grad_norm_ as clip_grad_norm
from torch.utils.data import DataLoader

In [2]:
seed = 1

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print(torch.randn(5))

tensor([ 0.6614,  0.2669,  0.0617,  0.6213, -0.4519])


In [3]:
y_trn = pickle.load(open("../preprocessed_embeddings/elmo_trn_title_labels.pkl", "rb"))
y_val = pickle.load(open("../preprocessed_embeddings/elmo_val_title_labels.pkl", "rb"))
y_tst = pickle.load(open("../preprocessed_embeddings/elmo_tst_title_labels.pkl", "rb"))

In [4]:
x_trn = pickle.load(open("../preprocessed_embeddings/elmo_trn_title.pkl", "rb")).tolist()
x_val = pickle.load(open("../preprocessed_embeddings/elmo_val_title.pkl", "rb")).tolist()
x_tst = pickle.load(open("../preprocessed_embeddings/elmo_tst_title.pkl", "rb")).tolist()

In [5]:
batch_size = 128

## Training set
trn_dataset = []
for i in range(len(x_trn)):
    trn_dataset.append((torch.tensor(x_trn[i]), y_trn[i]))

del x_trn
del y_trn
trn_dataloader = DataLoader(trn_dataset, batch_size)

### Validation set
val_dataset = []
for i in range(len(x_val)):
    val_dataset.append((torch.tensor(x_val[i]), y_val[i]))

del x_val
del y_val
val_dataloader = DataLoader(val_dataset, batch_size)

### Test set
tst_dataset = []
for i in range(len(x_tst)):
    tst_dataset.append((torch.tensor(x_tst[i]), y_tst[i]))

del x_tst
del y_tst
tst_dataloader = DataLoader(tst_dataset, batch_size)

In [6]:
class RNN(nn.Module):
  def __init__(self, embedding_dim, hidden_size, output_size, num_layers=1):
    super(RNN, self).__init__()
    self.embedding_dim = embedding_dim 
    self.hidden_size = hidden_size
    self.output_size = output_size 
    self.num_layers = num_layers
    self.dropout = nn.Dropout(0.5)
    self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True)
    self.fc = nn.Linear(self.hidden_size, self.output_size) # fully connected layer 
  def init_state(self, batch_size): # create dummy state for (h0, c0)
    return (torch.zeros(self.num_layers, batch_size, self.hidden_size), torch.zeros(self.num_layers, batch_size, self.hidden_size))
  def forward(self, x):
    batch_size = x.shape[0]
    h0, c0 = self.init_state(batch_size)
    h0 = h0.to(device)
    c0 = c0.to(device)
    output, hidden = self.lstm(x, (h0, c0))
    fc_output = self.fc(output)
    fc_output = torch.sum(fc_output, dim=1)
#     print(fc_output.shape)
    return fc_output

In [7]:
from torch.nn.utils import clip_grad_norm_ as clip_grad_norm
grad_clip = 1.0

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def batch_train(batch, labels, model, optimizer):
    model.train()
    texts = batch.to(device)
    h0, c0 = model.init_state(batch_size=texts.shape[0])
    h0 = h0.to(device)
    c0 = c0.to(device)
    
    predictions = model(texts)
    targets = labels.to(device)
    cost_function = nn.CrossEntropyLoss()
    h0 = h0.detach() 
    c0 = c0.detach()

    # Cross Entropy Loss 
    loss = cost_function(predictions, targets)
    optimizer.zero_grad()
    loss.backward()
    clip_grad_norm(model.parameters(), grad_clip)
    optimizer.step()
    return model, loss.item()

def eval(data_loader, model):
    # set in the eval model, which will trun off the features only used for training, such as droput
    model.eval()
    # records
    val_loss, val_batch = 0, 0 
    correct_pred, total_sample = 0, 0
    # iterate all the mini batches for evaluation
    for batch, label in data_loader:
        batch = batch.to(device)
        label = label.to(device)
        # Forward: prediction
        h0, c0 = model.init_state(batch_size=batch.shape[0])
        h0 = h0.to(device)
        c0 = c0.to(device)
        predictions = model(batch)
        
        pred_label = predictions.argmax(dim=1)
        correct_pred += (pred_label == label).sum().item()
        total_sample += label.size()[0]
        
        cost_function = nn.CrossEntropyLoss()
        h0 = h0.detach() 
        c0 = c0.detach()
        loss = cost_function(predictions, label)

        val_batch += 1
        val_loss += loss.item()
    return (val_loss/val_batch), (correct_pred/total_sample)

In [15]:
model = RNN(embedding_dim=1024, hidden_size=128, output_size=2).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

epoch = 5
total_batch = 0
val_step = 50
best_val_loss = 20
for e in range(epoch):
    if total_batch > 0:
        print('\n')
        total_batch = 0
    for batch, label in trn_dataloader:
        total_batch += 1
        # Update parameters with one batch
        model, loss = batch_train(batch, label, model, optimizer)
        # Compute validation loss after each val_step
        if total_batch % val_step == 0:
            val_loss, val_acc = eval(val_dataloader, model)
            trn_loss, trn_acc = eval(trn_dataloader, model)
              
            print(f"Epoch: {e}, Batch: {total_batch},"
                  f" Trn loss: {trn_loss:.5f}, Trn acc: {trn_acc:.5f},"
                  f" Val loss: {val_loss:.5f}, Val acc: {val_acc:.5f}")

Epoch: 0, Batch: 50, Trn loss: 0.02708, Trn acc: 0.99593, Val loss: 0.02488, Val acc: 0.99469
Epoch: 0, Batch: 100, Trn loss: 0.03575, Trn acc: 0.99535, Val loss: 0.04111, Val acc: 0.99074
Epoch: 0, Batch: 150, Trn loss: 0.01222, Trn acc: 0.99781, Val loss: 0.01935, Val acc: 0.99480
Epoch: 0, Batch: 200, Trn loss: 0.03533, Trn acc: 0.98841, Val loss: 0.07382, Val acc: 0.97154


Epoch: 1, Batch: 50, Trn loss: 0.01130, Trn acc: 0.99726, Val loss: 0.01567, Val acc: 0.99593
Epoch: 1, Batch: 100, Trn loss: 0.00572, Trn acc: 0.99832, Val loss: 0.01022, Val acc: 0.99684
Epoch: 1, Batch: 150, Trn loss: 0.00390, Trn acc: 0.99884, Val loss: 0.01138, Val acc: 0.99639
Epoch: 1, Batch: 200, Trn loss: 0.01513, Trn acc: 0.99377, Val loss: 0.04792, Val acc: 0.98035


Epoch: 2, Batch: 50, Trn loss: 0.01205, Trn acc: 0.99758, Val loss: 0.02662, Val acc: 0.99503
Epoch: 2, Batch: 100, Trn loss: 0.00731, Trn acc: 0.99852, Val loss: 0.01766, Val acc: 0.99639
Epoch: 2, Batch: 150, Trn loss: 0.00431, Trn acc: