In [86]:
from models import BaselineClassifier
from data import load_imdb, load_imdb_synth, load_xor
import torch
import torch.nn as nn
from torch import device
from q1 import pad_batch
import random
import torch.nn.functional as F
import itertools

In [87]:
(x_train_1, y_train_1), (x_val_1, y_val_1), (i2w_1, w2i_1), numcls_1 = load_imdb(final=False)
train_data1 = (x_train_1, y_train_1)
print(len(train_data1[0]))
val_data1   = (x_val_1, y_val_1)

20000


In [88]:
(x_train_2, y_train_2), (x_val_2, y_val_2), (i2w_2, w2i_2), numcls_2 = load_imdb_synth()
train_data2 = (x_train_2, y_train_2)
print(len(train_data2[0]))
val_data2   = (x_val_2, y_val_2)

25000


In [89]:
(x_train_3, y_train_3), (x_val_3, y_val_3), (i2w_3, w2i_3), numcls_3 = load_xor()
train_data3 = (x_train_3, y_train_3)
print(len(train_data3[0]))
val_data3   = (x_val_3, y_val_3)

25000


In [90]:
def iterate_batches(dataset, batch_size, pad_idx, shuffle=True):
    """
    dataset: (x_list, y_list)
    returns a list of (x_batch, y_batch) tuples
    """
    x_data, y_data = dataset
    indices = list(range(len(x_data)))

    batches = []
    for start in range(0, len(indices), batch_size):
        batch_idx = indices[start:start + batch_size]
        x_seqs = [x_data[j] for j in batch_idx]
        y_labels = [y_data[j] for j in batch_idx]

        x = pad_batch(x_seqs, pad_idx)              # (B, T)
        y = torch.tensor(y_labels, dtype=torch.long)  # (B,)
        batches.append((x, y))
    return batches


In [91]:
def train_epochs(model, train_data, batch_size, pad_idx, optimizer, num_epochs=5):

    for epoch in range(1, num_epochs + 1):
        total_loss = 0.0       
        total_correct = 0
        total_examples = 0

        print(f"\nEpoch {epoch}/{num_epochs}")

        for x, y in iterate_batches(train_data, batch_size, pad_idx, shuffle=True):

            optimizer.zero_grad()
            output = model(x)
            loss = F.cross_entropy(output, y)

            loss.backward()
            optimizer.step()

            # stats for this batch
            batch_size_actual = x.size(0)
            total_loss += loss.item() * batch_size_actual
            preds = output.argmax(dim=1)
            total_correct += (preds == y).sum().item()
            total_examples += batch_size_actual

        # epoch metrics
        avg_loss = total_loss / total_examples
        acc = total_correct / total_examples
        
        print(f"Training loss: {avg_loss:.4f}  |  accuracy: {acc:.4f}")

    return avg_loss, acc

In [92]:
def evaluate(model, val_data, batch_size, pad_idx):
    total_loss = 0.0
    total_correct = 0
    total_examples = 0

    with torch.no_grad():
        for x, y in iterate_batches(val_data, batch_size, pad_idx, shuffle=False):
            output = model(x)
            loss = F.cross_entropy(output, y)

            batch_size_actual = x.size(0)
            total_loss += loss.item() * batch_size_actual

            preds = output.argmax(dim=1)
            total_correct += (preds == y).sum().item()
            total_examples += batch_size_actual

    avg_loss = total_loss / total_examples
    acc = total_correct / total_examples
    return avg_loss, acc

In [93]:
baseline = BaselineClassifier(vocab_size=len(i2w_1))
optimizer = torch.optim.Adam(baseline.parameters(), lr=0.001)
batch_size = 128
pad_idx1 = w2i_1['.pad']

In [94]:
def grid_search(train_data, val_data, vocab_size, num_classes, pad_idx):
    pools = ['mean', 'max', 'first']
    lrs = [1e-3]
    batch_sizes = [64]

    results = []

    for pool, lr, batch_size in itertools.product(pools, lrs, batch_sizes):
        model = BaselineClassifier(vocab_size=vocab_size, pool=pool)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
        # now train this model from scratch
        train_loss, train_acc = train_epochs(model, train_data, batch_size, pad_idx, optimizer)
        val_loss, val_acc = evaluate(model, val_data, batch_size, pad_idx)

        print(f'pool={pool}, lr={lr}, batch={batch_size} | train_acc={train_acc:.3f}, val_acc={val_acc:.3f}')
        results.append((pool, lr, batch_size, train_acc, val_acc))

    return results

In [95]:
results1 = grid_search(train_data1, val_data1, vocab_size=len(i2w_1), num_classes=numcls_1, pad_idx=pad_idx1)



Epoch 1/5
Training loss: 0.6044  |  accuracy: 0.7315

Epoch 2/5
Training loss: 0.4089  |  accuracy: 0.8516

Epoch 3/5
Training loss: 0.2976  |  accuracy: 0.8961

Epoch 4/5
Training loss: 0.2338  |  accuracy: 0.9212

Epoch 5/5
Training loss: 0.1891  |  accuracy: 0.9394
pool=mean, lr=0.001, batch=64 | train_acc=0.939, val_acc=0.884

Epoch 1/5
Training loss: 0.6783  |  accuracy: 0.5773

Epoch 2/5
Training loss: 0.5804  |  accuracy: 0.7153

Epoch 3/5
Training loss: 0.4793  |  accuracy: 0.8074

Epoch 4/5
Training loss: 0.3940  |  accuracy: 0.8500

Epoch 5/5
Training loss: 0.3309  |  accuracy: 0.8767
pool=max, lr=0.001, batch=64 | train_acc=0.877, val_acc=0.835

Epoch 1/5
Training loss: 0.7115  |  accuracy: 0.5232

Epoch 2/5
Training loss: 0.6839  |  accuracy: 0.5524

Epoch 3/5
Training loss: 0.6687  |  accuracy: 0.5722

Epoch 4/5
Training loss: 0.6534  |  accuracy: 0.5839

Epoch 5/5
Training loss: 0.6397  |  accuracy: 0.5918
pool=first, lr=0.001, batch=64 | train_acc=0.592, val_acc=0.548


In [96]:
results2 = grid_search(train_data2, val_data2, vocab_size=len(i2w_2), num_classes=numcls_2, pad_idx=w2i_2['.pad'])



Epoch 1/5
Training loss: 0.6976  |  accuracy: 0.4952

Epoch 2/5
Training loss: 0.6965  |  accuracy: 0.5007

Epoch 3/5
Training loss: 0.6964  |  accuracy: 0.5002

Epoch 4/5
Training loss: 0.6964  |  accuracy: 0.5002

Epoch 5/5
Training loss: 0.6963  |  accuracy: 0.5004
pool=mean, lr=0.001, batch=64 | train_acc=0.500, val_acc=0.491

Epoch 1/5
Training loss: 0.4318  |  accuracy: 0.8732

Epoch 2/5
Training loss: 0.0623  |  accuracy: 1.0000

Epoch 3/5
Training loss: 0.0137  |  accuracy: 1.0000

Epoch 4/5
Training loss: 0.0054  |  accuracy: 1.0000

Epoch 5/5
Training loss: 0.0028  |  accuracy: 1.0000
pool=max, lr=0.001, batch=64 | train_acc=1.000, val_acc=1.000

Epoch 1/5
Training loss: 0.6985  |  accuracy: 0.4966

Epoch 2/5
Training loss: 0.6982  |  accuracy: 0.4962

Epoch 3/5
Training loss: 0.6981  |  accuracy: 0.4962

Epoch 4/5
Training loss: 0.6980  |  accuracy: 0.4965

Epoch 5/5
Training loss: 0.6979  |  accuracy: 0.4974
pool=first, lr=0.001, batch=64 | train_acc=0.497, val_acc=0.499


In [97]:
results3 = grid_search(train_data3, val_data3, vocab_size=len(i2w_3), num_classes=numcls_3, pad_idx=w2i_3['.pad'])


Epoch 1/5
Training loss: 0.6996  |  accuracy: 0.4966

Epoch 2/5
Training loss: 0.6991  |  accuracy: 0.5028

Epoch 3/5
Training loss: 0.6990  |  accuracy: 0.4994

Epoch 4/5
Training loss: 0.6989  |  accuracy: 0.4996

Epoch 5/5
Training loss: 0.6987  |  accuracy: 0.5018
pool=mean, lr=0.001, batch=64 | train_acc=0.502, val_acc=0.500

Epoch 1/5
Training loss: 0.0190  |  accuracy: 0.9969

Epoch 2/5
Training loss: 0.0006  |  accuracy: 1.0000

Epoch 3/5
Training loss: 0.0002  |  accuracy: 1.0000

Epoch 4/5
Training loss: 0.0001  |  accuracy: 1.0000

Epoch 5/5
Training loss: 0.0001  |  accuracy: 1.0000
pool=max, lr=0.001, batch=64 | train_acc=1.000, val_acc=1.000

Epoch 1/5
Training loss: 0.6995  |  accuracy: 0.5046

Epoch 2/5
Training loss: 0.6999  |  accuracy: 0.5021

Epoch 3/5
Training loss: 0.6998  |  accuracy: 0.5011

Epoch 4/5
Training loss: 0.6997  |  accuracy: 0.5004

Epoch 5/5
Training loss: 0.6996  |  accuracy: 0.5001
pool=first, lr=0.001, batch=64 | train_acc=0.500, val_acc=0.500


In [99]:
results3 = grid_search(train_data3, val_data3, vocab_size=len(i2w_3), num_classes=numcls_3, pad_idx=w2i_3['.pad'])


Epoch 1/5
Training loss: 0.6996  |  accuracy: 0.4800

Epoch 2/5
Training loss: 0.6992  |  accuracy: 0.4970

Epoch 3/5
Training loss: 0.6991  |  accuracy: 0.5010

Epoch 4/5
Training loss: 0.6990  |  accuracy: 0.4957

Epoch 5/5
Training loss: 0.6988  |  accuracy: 0.4922
pool=mean, lr=0.001, batch=64 | train_acc=0.492, val_acc=0.500

Epoch 1/5
Training loss: 0.0301  |  accuracy: 0.9915

Epoch 2/5
Training loss: 0.0009  |  accuracy: 1.0000

Epoch 3/5
Training loss: 0.0004  |  accuracy: 1.0000

Epoch 4/5
Training loss: 0.0002  |  accuracy: 1.0000

Epoch 5/5
Training loss: 0.0001  |  accuracy: 1.0000
pool=max, lr=0.001, batch=64 | train_acc=1.000, val_acc=1.000

Epoch 1/5
Training loss: 0.6994  |  accuracy: 0.5038

Epoch 2/5
Training loss: 0.6999  |  accuracy: 0.5026

Epoch 3/5
Training loss: 0.6997  |  accuracy: 0.5008

Epoch 4/5
Training loss: 0.6995  |  accuracy: 0.5010

Epoch 5/5
Training loss: 0.6994  |  accuracy: 0.4999
pool=first, lr=0.001, batch=64 | train_acc=0.500, val_acc=0.499


In [100]:
results3 = grid_search(train_data3, val_data3, vocab_size=len(i2w_3), num_classes=numcls_3, pad_idx=w2i_3['.pad'])


Epoch 1/5
Training loss: 0.6995  |  accuracy: 0.4896

Epoch 2/5
Training loss: 0.6994  |  accuracy: 0.4935

Epoch 3/5
Training loss: 0.6993  |  accuracy: 0.4926

Epoch 4/5
Training loss: 0.6991  |  accuracy: 0.4974

Epoch 5/5
Training loss: 0.6990  |  accuracy: 0.4924
pool=mean, lr=0.001, batch=64 | train_acc=0.492, val_acc=0.500

Epoch 1/5
Training loss: 0.0240  |  accuracy: 0.9937

Epoch 2/5
Training loss: 0.0007  |  accuracy: 1.0000

Epoch 3/5
Training loss: 0.0003  |  accuracy: 1.0000

Epoch 4/5
Training loss: 0.0002  |  accuracy: 1.0000

Epoch 5/5
Training loss: 0.0001  |  accuracy: 1.0000
pool=max, lr=0.001, batch=64 | train_acc=1.000, val_acc=1.000

Epoch 1/5
Training loss: 0.7005  |  accuracy: 0.5008

Epoch 2/5
Training loss: 0.6998  |  accuracy: 0.5024

Epoch 3/5
Training loss: 0.6997  |  accuracy: 0.5016

Epoch 4/5
Training loss: 0.6995  |  accuracy: 0.5011

Epoch 5/5
Training loss: 0.6994  |  accuracy: 0.5003
pool=first, lr=0.001, batch=64 | train_acc=0.500, val_acc=0.499
