In [168]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [169]:
import numpy as np
import pickle
import random
import torch

from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.nn.utils import clip_grad_norm_ as clip_grad_norm
from torch.utils.data import DataLoader

In [170]:
seed = 1

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
print(torch.randn(5))

tensor([ 0.6614,  0.2669,  0.0617,  0.6213, -0.4519])


In [171]:
y_trn = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_trn_title_labels.pkl", "rb"))
y_val = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_val_title_labels.pkl", "rb"))
y_tst = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_tst_title_labels.pkl", "rb"))

In [172]:
x_trn = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_trn_title.pkl", "rb")).tolist()
x_val = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_val_title.pkl", "rb")).tolist()
x_tst = pickle.load(open("./drive/MyDrive/elmo_embeddings/elmo_tst_title.pkl", "rb")).tolist()

In [173]:
batch_size = 16

## Training set
trn_dataset = []
for i in range(len(x_trn)):
    trn_dataset.append((torch.tensor(x_trn[i]), y_trn[i]))

del x_trn
del y_trn
trn_dataloader = DataLoader(trn_dataset, batch_size)

### Validation set
val_dataset = []
for i in range(len(x_val)):
    val_dataset.append((torch.tensor(x_val[i]), y_val[i]))

del x_val
del y_val
val_dataloader = DataLoader(val_dataset, batch_size)

### Test set
tst_dataset = []
for i in range(len(x_tst)):
    tst_dataset.append((torch.tensor(x_tst[i]), y_tst[i]))

del x_tst
del y_tst
tst_dataloader = DataLoader(tst_dataset, batch_size)

In [174]:
val_iter = iter(val_dataloader)

i = next(val_iter)

print(i[0].shape) # B x L x E 
print(i[1].shape)

torch.Size([16, 7, 1024])
torch.Size([16])


In [188]:
class RNN(nn.Module):
  def __init__(self, embedding_dim, hidden_size, output_size, num_layers=1):
    super(RNN, self).__init__()
    self.embedding_dim = embedding_dim 
    self.hidden_size = hidden_size
    self.output_size = output_size 
    self.num_layers = num_layers
    self.dropout = nn.Dropout(0.5)
    self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True)
    self.fc = nn.Linear(self.hidden_size, self.output_size) # fully connected layer 
  def init_state(self, batch_size): # create dummy state for (h0, c0)
    return (torch.zeros(self.num_layers, batch_size, self.hidden_size), torch.zeros(self.num_layers, batch_size, self.hidden_size))
  def forward(self, x):
    batch_size = x.shape[0]
    hidden = self.init_state(batch_size)
    output, hidden = self.lstm(x, hidden)
    fc_output = self.fc(hidden)
    print(fc_output.shape)
    return fc_output

In [197]:
def train(model, trn_dataloader, optimizer, criterion, device=torch.device('cpu')):
    total_batch, correct_pred, total_sample = 0, 0, 0
    model.train()

    for batch, label in trn_dataloader:
        total_batch += 1

        batch = batch.to(device)
        label = label.to(device)

        optimizer.zero_grad()
        h0, c0 = model.init_state(batch.shape[0])
        h0 = h0.to(device)
        c0 = c0.to(device)
        output = model(batch, (h0, c0))
        loss = criterion(output, label)
        loss.backward()
        clip_grad_norm(model.parameters(), 1.0)
        optimizer.step()
        trn_loss = loss.data.item()
        pred_label = output.argmax(dim=1)
        correct_pred += (pred_label == label).sum()
        total_sample += label.size()[0]

        if total_batch % 50 == 0:
            print(f"#{total_batch}: trn loss = {trn_loss:.4f} | trn acc = {(correct_pred/total_sample):.4f}")
        
    return model

def eval(model, dataloader, criterion, device=torch.device('cpu')):
    total_loss, total_batch = 0, 0
    correct_pred, total_sample = 0, 0
    model.eval()

    for batch, label in dataloader:
        batch = batch.to(device)
        label = label.to(device)

        output = model(batch)

        total_batch += 1
        loss = criterion(output, label)
        total_loss += loss.data.item()

        pred_label = output.argmax(dim=1)
        correct_pred += (pred_label == label).sum()
        total_sample += label.size()[0]

    avg_loss = total_loss / total_batch
    acc = correct_pred.item() / total_sample

    return avg_loss, acc

In [196]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

model = RNN(1024, 128, 2, 1).to(device)

optimizer = optim.Adam(
    params=model.parameters(),
    lr=0.001
)

criterion = nn.CrossEntropyLoss()

for e in range(5):
    print(f'Epoch {e}\n') if e == 0 else print(f'\nEpoch {e}\n')
    model = train(model, trn_dataloader, optimizer, criterion, device)
    trn_loss, trn_acc = eval(model, trn_dataloader, criterion, device)
    val_loss, val_acc = eval(model, val_dataloader, criterion, device)
    print(f"\ntrn loss = {trn_loss} trn acc = {trn_acc} val loss = {val_loss} val acc = {val_acc}")

Epoch 0



TypeError: ignored