In [7]:
from models import BaselineClassifier
from data import load_imdb, load_imdb_synth, load_xor
import torch
import torch.nn as nn
from torch import device
from q1 import pad_batch
import random
import torch.nn.functional as F

In [8]:
(x_train_1, y_train_1), (x_val_1, y_val_1), (i2w_1, w2i_1), numcls_1 = load_imdb(final=False)
train_data1 = (x_train_1, y_train_1)
val_data1   = (x_val_1, y_val_1)
(x_train_2, y_train_2), (x_val_2, y_val_2), (i2w_2, w2i_2), numcls_2 = load_imdb_synth()
(x_train_3, y_train_3), (x_val_3, y_val_3), (i2w_3, w2i_3), numcls_3 = load_xor()

In [9]:
def iterate_batches(dataset, batch_size, pad_idx, shuffle=True):
    """
    dataset: (x_list, y_list)
    returns a list of (x_batch, y_batch) tuples
    """
    x_data, y_data = dataset
    indices = list(range(len(x_data)))
    if shuffle:
        random.shuffle(indices)

    batches = []
    for start in range(0, len(indices), batch_size):
        batch_idx = indices[start:start + batch_size]
        x_seqs = [x_data[j] for j in batch_idx]
        y_labels = [y_data[j] for j in batch_idx]

        x = pad_batch(x_seqs, pad_idx)              # (B, T)
        y = torch.tensor(y_labels, dtype=torch.long)  # (B,)
        batches.append((x, y))

    return batches


In [10]:
def train_epoch(model, train_data, batch_size, pad_idx, optimizer):
    total_loss = 0.0
    total_correct = 0
    total_examples = 0

    for (x, y) in iterate_batches(train_data, batch_size, pad_idx, shuffle=True):
        optimizer.zero_grad()
        output = model(x)                 # (B, num_classes)
        loss = F.cross_entropy(output, y) # scalar

        loss.backward()
        optimizer.step()

        # accumulate loss
        batch_size_actual = x.size(0)
        total_loss += loss.item() * batch_size_actual

        # compute accuracy for this batch
        preds = output.argmax(dim=1)          # (B,)
        total_correct += (preds == y).sum().item()
        total_examples += batch_size_actual

    avg_loss = total_loss / total_examples
    acc = total_correct / total_examples
    print(f'Training loss: {avg_loss} and training accuracy: {acc}')
    return avg_loss, acc


In [11]:
def evaluate(model, val_data, batch_size, pad_idx):
    total_loss = 0.0
    total_correct = 0
    total_examples = 0

    with torch.no_grad():
        for x, y in iterate_batches(val_data, batch_size, pad_idx, shuffle=False):
            output = model(x)
            loss = F.cross_entropy(output, y)

            batch_size_actual = x.size(0)
            total_loss += loss.item() * batch_size_actual

            preds = output.argmax(dim=1)
            total_correct += (preds == y).sum().item()
            total_examples += batch_size_actual

    avg_loss = total_loss / total_examples
    acc = total_correct / total_examples
    print(f'Validation loss: {avg_loss} and validation accuracy: {acc}')
    return avg_loss, acc

In [12]:
baseline = BaselineClassifier(vocab_size=len(i2w_1))
optimizer = torch.optim.Adam(baseline.parameters(), lr=0.001)
batch_size = 64
pad_idx1 = w2i_1['.pad']

train_loss, train_acc = train_epoch(baseline, train_data1, batch_size, pad_idx1, optimizer)
val_loss,   val_acc   = evaluate(baseline, val_data1, batch_size, pad_idx1)

Training loss: 0.6812857497215271 and training accuracy: 0.5653
Validation loss: 0.5806595430374145 and validation accuracy: 0.7342
