In [168]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [169]:
import numpy as np
import pickle
import random
import torch

from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.nn.utils import clip_grad_norm_ as clip_grad_norm
from torch.utils.data import DataLoader

In [170]:
seed = 1

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print(torch.randn(5))

tensor([ 0.6614,  0.2669,  0.0617,  0.6213, -0.4519])


In [171]:
y_trn = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_trn_title_labels.pkl", "rb"))
y_val = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_val_title_labels.pkl", "rb"))
y_tst = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_tst_title_labels.pkl", "rb"))

In [172]:
x_trn = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_trn_title.pkl", "rb")).tolist()
x_val = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_val_title.pkl", "rb")).tolist()
x_tst = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_tst_title.pkl", "rb")).tolist()

In [173]:
batch_size = 16

## Training set
trn_dataset = []
for i in range(len(x_trn)):
    trn_dataset.append((torch.tensor(x_trn[i]), y_trn[i]))

del x_trn
del y_trn
trn_dataloader = DataLoader(trn_dataset, batch_size)

### Validation set
val_dataset = []
for i in range(len(x_val)):
    val_dataset.append((torch.tensor(x_val[i]), y_val[i]))

del x_val
del y_val
val_dataloader = DataLoader(val_dataset, batch_size)

### Test set
tst_dataset = []
for i in range(len(x_tst)):
    tst_dataset.append((torch.tensor(x_tst[i]), y_tst[i]))

del x_tst
del y_tst
tst_dataloader = DataLoader(tst_dataset, batch_size)

In [174]:
val_iter = iter(val_dataloader)

i = next(val_iter)

print(i[0].shape) # B x L x E 
print(i[1].shape)

torch.Size([16, 7, 1024])
torch.Size([16])


In [228]:
class RNN(nn.Module):
  def __init__(self, embedding_dim, hidden_size, output_size, num_layers=1):
    super(RNN, self).__init__()
    self.embedding_dim = embedding_dim 
    self.hidden_size = hidden_size
    self.output_size = output_size 
    self.num_layers = num_layers
    self.dropout = nn.Dropout(0.5)
    self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True)
    self.fc = nn.Linear(self.hidden_size, self.output_size) # fully connected layer 
  def init_state(self, batch_size): # create dummy state for (h0, c0)
    return (torch.zeros(self.num_layers, batch_size, self.hidden_size), torch.zeros(self.num_layers, batch_size, self.hidden_size))
  def forward(self, x):
    batch_size = x.shape[0]
    h0, c0 = self.init_state(batch_size)
    h0 = h0.to(device)
    c0 = c0.to(device)
    output, hidden = self.lstm(x, (h0, c0))
    fc_output = self.fc(output)
    print(fc_output.shape)
    return fc_output

In [231]:
from torch.nn.utils import clip_grad_norm_ as clip_grad_norm
grad_clip = 1.0

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

def batch_train(batch, labels, model, optimizer):
    model.train()
    texts = batch.to(device)
    h0, c0 = model.init_state(batch_size=texts.shape[0])
    h0 = h0.to(device)
    c0 = c0.to(device)
    
    predictions = model(texts)
    targets = labels.to(device)
    cost_function = nn.CrossEntropyLoss()
    h0 = h0.detach() 
    c0 = c0.detach()

    # Cross Entropy Loss 
    loss = cost_function(predictions, targets.unsqueeze(0))
    optimizer.zero_grad()
    loss.backward()
    clip_grad_norm(model.parameters(), grad_clip)
    optimizer.step()
    return model, loss.item()

def eval(data_loader, model):
    # set in the eval model, which will trun off the features only used for training, such as droput
    model.eval()
    # records
    val_loss = 0
    val_batch = 0 
    # iterate all the mini batches for evaluation
    for batch, label in data_loader:
        texts = batch.to(device)
        targets = label.to(device)
        # Forward: prediction
        h0, c0 = model.init_state(batch_size=texts.shape[0])
        h0 = h0.to(device)
        c0 = c0.to(device)
        predictions = model(texts)
        cost_function = nn.CrossEntropyLoss()
        h0 = h0.detach() 
        c0 = c0.detach()
        loss = cost_function(predictions, targets.unsqueeze(0))

        val_batch += 1
        val_loss += loss.item()
    return (val_loss/val_batch)

model = RNN(embedding_dim=1024, hidden_size=128, output_size=1).to(device)
#optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)

epoch, val_step = 10, 250
total_batch = 0
best_val_loss = 20
for e in range(epoch):
    for batch, label in trn_dataloader:
        total_batch += 1
        # Update parameters with one batch
        model, loss = batch_train(batch, label, model, optimizer)
        # Compute validation loss after each val_step
        if total_batch % val_step == 0:
            val_loss = eval(val_loader, model)
            if val_loss < best_val_loss:
              best_val_loss = val_loss
              torch.save(model.state_dict(), "./drive/MyDrive/best_models/best_rnn_weights.pth")
              
            print("Epoch: {}, Batch: {}, Current val loss: {}. Best val loss: {}".format(e, b, val_loss, best_val_loss))


torch.Size([16, 6, 1])


ValueError: ignored