In [1]:
pip install tqdm torch transformers



In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import sys
from google.colab import drive
drive.mount('/content/drive')

root_folder = "/content/drive/My Drive/final/"
dataset_folder = "/content/drive/My Drive/final/data/"
sys.path.append(root_folder)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
from torch import nn, optim
from tqdm import tqdm
import numpy as np
import torch as th

from model import NovelModel, NaiveBertModel
from etl import *

In [5]:
device = th.device("cuda" if th.cuda.is_available() else "cpu")
print(device)
list_to_device = lambda th_obj: [tensor.to(device) for tensor in th_obj]

cuda


In [6]:
def loss_fn(pred, target):
    if output_shape == 5:
        criterion = nn.CrossEntropyLoss()
    else:
        criterion = nn.MSELoss()
    loss = criterion(pred, target)
    return loss

In [7]:
'''Evaluation'''
def evaluate_model(model, X_val, y_val, batch_size=2):
  val_size = len(X_val)
  losses = []
  accuracies = []
  t = tqdm(range(0, val_size // batch_size))
  for i in t:
    X = X_val[i * batch_size: (i + 1) * batch_size]
    y = y_val[i * batch_size: (i + 1) * batch_size]
    val_tokenized = tokenize(X, max_length=128)
    val_tokenized, y = list_to_device((val_tokenized, y))
    val_predict = model.forward(val_tokenized)
    losses.append(loss_fn(val_predict, y) * y.shape[0])
    accuracies.append(th.eq(val_predict.argmax(dim=1,keepdim=False),y).sum() / y.size()[0] * y.shape[0])

  return np.sum(losses)/val_size, np.sum(accuracies)/val_size

In [8]:
bert_model_link = 'bert-base-uncased'
output_shape = 5
lstm_hidden_size = 256
num_lstm_layers = 1
dropout = .5
residual = False

In [9]:
# Load data, train/val split
text, labels = load_data(root_folder)
text = text[:200]
labels = labels[:200]
X_train, y_train, X_val, y_val = partition(text, labels)

In [13]:
# Model params
optimizer_class = optim.Adam
lr = 1e-3
epochs = 2
batch_size = 16 # do not use 1 lol

In [11]:
model = NaiveBertModel(bert_model_link=bert_model_link, output_shape=output_shape)
model.to(device)
for params in model.bert.parameters():
    params.require_grad = False
optimizer = optimizer_class(model.parameters(), lr=lr)

In [12]:
evaluate_model(model, X_val, y_val, batch_size=2)

100%|██████████| 20/20 [00:21<00:00,  1.09s/it]


(tensor(1.7257, device='cuda:0', grad_fn=<DivBackward0>),
 tensor(0.1250, device='cuda:0'))

In [14]:
'''Training'''
model.train()
training_losses = []
val_losses = []
val_accs = []
losses = []
for epoch in range(epochs):
    indices = np.random.permutation(range(len(X_train)))
    t = tqdm(range(0, len(X_train) // batch_size))

    for i in t:
        batch_indices = indices[i * batch_size: (i + 1) * batch_size]
        batch_entries = [X_train[b_i] for b_i in batch_indices]
        batch_labels = y_train[batch_indices]
        batch_tokenized = tokenize(batch_entries, max_length=128)
        batch_labels, batch_tokenized = list_to_device((batch_labels, batch_tokenized))

        batch_predict = model.forward(batch_tokenized)
        batch_loss = loss_fn(batch_predict, batch_labels)
        losses.append(batch_loss.item())

        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()
        if i % 4 == 0:
            t.set_description(f"Epoch: {epoch} Iteration: {i} Loss: {np.mean(losses[-4:])}")

    val_loss, val_acc = evaluate_model(model, X_val, y_val)
    val_losses.append(val_loss)
    val_accs.append(val_acc)
    training_losses.append(np.mean(training_losses[-4:]))

  0%|          | 0/10 [00:00<?, ?it/s]

RuntimeError: ignored

In [None]:
# # Initialize model
# model = LanguageModel(dropout=dropout, bert_model_link=bert_model_link, output_shape=output_shape)
# model.to(device)
# for params in model.bert.parameters():
#     params.require_grad = False
# optimizer = optimizer_class(model.parameters(), lr=lr)