In [1]:
import os
import random
import time
import json
from copy import deepcopy
from collections import OrderedDict

import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torchtext import data, datasets

In [2]:
# ensure directories
os.makedirs("history", exist_ok=True)
os.makedirs("models", exist_ok=True)

### Preparing Data

In [3]:
SEED = 1234
torch.manual_seed(SEED)
random.seed(SEED)
torch.backends.cudnn.deterministic = True

# TEXT and LABEL fields
TEXT = data.Field(tokenize='spacy',
                  tokenizer_language='en_core_web_sm',
                  include_lengths=True,
                  pad_first=True)
LABEL = data.LabelField(dtype=torch.float)

In [4]:
# load dataset
train_data, test_data = datasets.IMDB.splits(TEXT, LABEL)
print(f'Number of training examples: {len(train_data)}')
print(f'Number of testing examples: {len(test_data)}')

# split train -> train/valid
train_data, valid_data = train_data.split(random_state=random.seed(SEED))

# build vocab
MAX_VOCAB_SIZE = 25_000
TEXT.build_vocab(train_data, max_size=MAX_VOCAB_SIZE)
LABEL.build_vocab(train_data)

# iterators
BATCH_SIZE = 64
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_iterator, valid_iterator, test_iterator = data.BucketIterator.splits(
    (train_data, valid_data, test_data),
    batch_size=BATCH_SIZE,
    sort_within_batch=True,
    device=DEVICE
)

Number of training examples: 25000
Number of testing examples: 25000


### Building Models

In [5]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [6]:
# RNN (simple)
class RNN(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.rnn = nn.RNN(embedding_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, text, text_lengths=None):
        # text: [sent_len, batch]
        embedded = self.embedding(text)
        output, hidden = self.rnn(embedded)
        return self.fc(hidden.squeeze(0))


In [7]:
# Feed-forward (pooled embeddings)
class FeedForwardText(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dims, output_dim, dropout=0.5, pool_fn=lambda emb: emb.mean(dim=0)):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.pool_fn = pool_fn
        self.dropout = nn.Dropout(dropout)
        layers = []
        in_dim = embedding_dim
        for h in hidden_dims:
            layers.append(nn.Linear(in_dim, h))
            in_dim = h
        self.hidden_layers = nn.ModuleList(layers)
        self.fc_out = nn.Linear(in_dim, output_dim)
        self.activation = nn.functional.relu

    def forward(self, text, text_lengths=None):
        embedded = self.embedding(text)
        x = self.pool_fn(embedded)
        for layer in self.hidden_layers:
            x = self.activation(layer(x))
            x = self.dropout(x)
        return self.fc_out(x)

In [8]:
# CNNText (three filter sizes)
class CNNText(nn.Module):
    def __init__(self, input_dim, embedding_dim, n_filters, filter_sizes=(1,2,3), output_dim=1, dropout=0.5, padding_idx=None):
        super().__init__()
        if isinstance(n_filters, int):
            n_filters = (n_filters, n_filters, n_filters)
        self.embedding = nn.Embedding(input_dim, embedding_dim, padding_idx=padding_idx)
        self.convs = nn.ModuleList([
            nn.Conv1d(in_channels=embedding_dim, out_channels=n_filters[i], kernel_size=fs, padding=fs-1)
            for i, fs in enumerate(filter_sizes)
        ])
        self.fc = nn.Linear(sum(n_filters), output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths=None):
        embedded = self.embedding(text)
        x = embedded.permute(1, 2, 0)
        conv_results = []
        for conv in self.convs:
            c = conv(x)
            c = nn.functional.relu(c)
            pooled = nn.functional.max_pool1d(c, kernel_size=c.size(2)).squeeze(2)
            conv_results.append(pooled)
        cat = torch.cat(conv_results, dim=1)
        cat = self.dropout(cat)
        return self.fc(cat)

In [9]:
# LSTM single-layer
class LSTMModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers=1, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths=None):
        embedded = self.embedding(text)
        output, (hidden, cell) = self.lstm(embedded)
        last_hidden = hidden[-1]
        last_hidden = self.dropout(last_hidden)
        return self.fc(last_hidden)

In [10]:
# Bi-LSTM
class BiLSTMModel(nn.Module):
    def __init__(self, input_dim, embedding_dim, hidden_dim, output_dim, n_layers=1, dropout=0.5):
        super().__init__()
        self.embedding = nn.Embedding(input_dim, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, text, text_lengths=None):
        embedded = self.embedding(text)
        output, (hidden, cell) = self.lstm(embedded)
        forward_hidden = hidden[-2]
        backward_hidden = hidden[-1]
        cat_hidden = torch.cat((forward_hidden, backward_hidden), dim=1)
        cat_hidden = self.dropout(cat_hidden)
        return self.fc(cat_hidden)

In [11]:
def build_models(INPUT_DIM, EMBEDDING_DIM=100, HIDDEN_DIM=256, OUTPUT_DIM=1):
    models = {}
    models['ff1'] = FeedForwardText(INPUT_DIM, EMBEDDING_DIM, hidden_dims=[500], output_dim=OUTPUT_DIM)
    models['ff2'] = FeedForwardText(INPUT_DIM, EMBEDDING_DIM, hidden_dims=[500,300], output_dim=OUTPUT_DIM)
    models['ff3'] = FeedForwardText(INPUT_DIM, EMBEDDING_DIM, hidden_dims=[500,300,200], output_dim=OUTPUT_DIM)
    models['cnn'] = CNNText(INPUT_DIM, EMBEDDING_DIM, n_filters=100, filter_sizes=(1,2,3), output_dim=OUTPUT_DIM, padding_idx=None)
    models['lstm'] = LSTMModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, n_layers=1, dropout=0.5)
    models['bilstm'] = BiLSTMModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, n_layers=1, dropout=0.5)
    models['rnn'] = RNN(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)
    return models

### Training

`device iterator`: Move batches to gpu, and keep sequence lengths in cpu. Sequence lengths are just integers describing how long each sequence is. They don’t require gradients or backpropagation, so there’s no computational benefit to putting them on the GPU.

`binary accuracy`: (correct predictions) / (total predictions)

`train`: train the model, by passing it batches, recording the loss and backpropogating it.

`evaluate`: similar loop as train, but does not backpropogate.

In [12]:
def device_iterator(iterator, device):
    for batch in iterator:
        try:
            text, text_lengths = batch.text
            text = text.to(device)
            batch.text = (text, text_lengths)
        except Exception:
            try:
                batch.text = batch.text.to(device)
            except Exception:
                pass
        try:
            batch.label = batch.label.to(device)
        except Exception:
            pass
        yield batch

def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float()
    return correct.sum() / len(correct)

def train(model, iterator, optimizer, criterion):
    device = next(model.parameters()).device
    epoch_loss = 0.0
    epoch_acc = 0.0
    batch_count = 0
    model.train()
    for batch in iterator:
        device_iterator(batch, device)
        optimizer.zero_grad()
        text, text_lengths = batch.text
        predictions = model(text, text_lengths).squeeze(1)
        loss = criterion(predictions, batch.label)
        acc = binary_accuracy(predictions, batch.label)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        batch_count += 1
    if batch_count == 0:
        return 0.0, 0.0
    return epoch_loss / batch_count, epoch_acc / batch_count

def evaluate(model, iterator, criterion):
    device = next(model.parameters()).device
    epoch_loss = 0.0
    epoch_acc = 0.0
    batch_count = 0
    model.eval()
    with torch.no_grad():
        for batch in iterator:
            device_iterator(batch, device)
            text, text_lengths = batch.text
            predictions = model(text, text_lengths).squeeze(1)
            loss = criterion(predictions, batch.label)
            acc = binary_accuracy(predictions, batch.label)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
            batch_count += 1
    if batch_count == 0:
        return 0.0, 0.0
    return epoch_loss / batch_count, epoch_acc / batch_count

In [13]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


In [14]:
def save_history_csv_json(history, prefix):
    if not history:
        return
    json_path = f"history/{prefix}-history.json"
    with open(json_path, "w") as jf:
        json.dump(history, jf, indent=2)

`run and record`: run the experiments and record the results.

In [15]:
def run_and_record(model_name, model, train_iter, valid_iter, n_epochs=50, lr=1e-3, output_prefix="results"):
    model = model.to(DEVICE)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss().to(DEVICE)

    best_valid_loss = float('inf')
    history = []

    os.makedirs("models", exist_ok=True)

    for epoch in range(1, n_epochs+1):
        start_time = time.time()
        train_loss, train_acc = train(model, device_iterator(train_iter, DEVICE), optimizer, criterion)
        valid_loss, valid_acc = evaluate(model, device_iterator(valid_iter, DEVICE), criterion)
        end_time = time.time()
        mins, secs = divmod(int(end_time - start_time), 60)

        # per-epoch checkpoint <commenting, cause saving the best one instead, per epoch doesnt seem relevant rn>
        #epoch_ckpt_path = f"models/{output_prefix}-{model_name}-epoch{epoch}.pt"
        #torch.save(model.state_dict(), epoch_ckpt_path)

        # save best model
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model.state_dict(), f"models/{output_prefix}-{model_name}-best.pt")

        row = OrderedDict([
            ("epoch", epoch),
            ("train_loss", train_loss),
            ("train_acc", train_acc),
            ("valid_loss", valid_loss),
            ("valid_acc", valid_acc),
            ("time_mins", mins),
            ("time_secs", secs),
            ('num_parameters', count_parameters(model))
        ])
        history.append(row)

        print(f"{model_name} | Epoch {epoch:02} | {mins}m {secs}s | Train Loss {train_loss:.4f} Train Acc {train_acc*100:.2f}% | Val Loss {valid_loss:.4f} Val Acc {valid_acc*100:.2f}%")

    # save history
    save_history_csv_json(history, f"{output_prefix}-{model_name}")

    return history, None


### Tasks

1. Warm up: Read, understand, and reimplement the example in the code base.  
    The sample experiment provided in `Assignment_1_Simple_Sentiment_Analysis.ipynb` is reimplemented as part of the optimizer sweep.
2. Conduct experiments with different optimizers: SGD, Adam, Adagrad, and record the experimental results 
3. Use Adam optimizer, conduct experiments with different numbers of epochs: 5, 10, 20, and 50. 

In [16]:
def rnn_optimizer_and_epoch_sweeps(TEXT, train_iter, valid_iter, output_prefix="results"):
    base = build_models(len(TEXT.vocab))['rnn']

    # (2) Optimizer Sweep
    optimizers_to_try = {
        "SGD": (optim.SGD, {"lr": 1e-3}), # (1) Sample code reimplementation
        #"Adam": (optim.Adam, {"lr": 1e-3}), Adam is tested in next loop
        "Adagrad": (optim.Adagrad, {"lr": 1e-3})
    }

    # sweep optimizers with 50 epochs
    for opt_name, (opt_ctor, opt_kwargs) in optimizers_to_try.items():
        print("\n" + "="*60)
        run_name = f"rnn-{opt_name}"
        model = deepcopy(base)
        run_and_record(run_name, model, train_iter, valid_iter, n_epochs=20, lr=opt_kwargs.get("lr", 1e-3), output_prefix=output_prefix)

    # (3) Adam epoch sweep (5,10,20,50)
    for n_epochs in [5,10,20,50]:
        print("\n" + "="*60)
        run_name = f"rnn-Adam-e{n_epochs}"
        model = deepcopy(base)
        run_and_record(run_name, model, train_iter, valid_iter, n_epochs=n_epochs, lr=1e-3, output_prefix=output_prefix)

    print("RNN optimizer/epoch sweeps complete. Per-epoch checkpoints and histories are in models/ and history/.")

rnn_optimizer_and_epoch_sweeps(TEXT, train_iterator, valid_iterator)


rnn-SGD | Epoch 01 | 0m 5s | Train Loss 0.6800 Train Acc 57.26% | Val Loss 0.6640 Val Acc 60.09%
rnn-SGD | Epoch 02 | 0m 4s | Train Loss 0.6790 Train Acc 55.79% | Val Loss 0.6932 Val Acc 51.14%
rnn-SGD | Epoch 03 | 0m 4s | Train Loss 0.6932 Train Acc 52.43% | Val Loss 0.6919 Val Acc 52.53%
rnn-SGD | Epoch 04 | 0m 4s | Train Loss 0.6930 Train Acc 52.21% | Val Loss 0.6890 Val Acc 53.30%
rnn-SGD | Epoch 05 | 0m 4s | Train Loss 0.6908 Train Acc 52.96% | Val Loss 0.6867 Val Acc 53.44%
rnn-SGD | Epoch 06 | 0m 4s | Train Loss 0.6873 Train Acc 53.80% | Val Loss 0.6838 Val Acc 53.48%
rnn-SGD | Epoch 07 | 0m 4s | Train Loss 0.6820 Train Acc 55.43% | Val Loss 0.6717 Val Acc 58.33%
rnn-SGD | Epoch 08 | 0m 4s | Train Loss 0.6429 Train Acc 62.76% | Val Loss 0.6257 Val Acc 67.01%
rnn-SGD | Epoch 09 | 0m 4s | Train Loss 0.5902 Train Acc 69.96% | Val Loss 0.6525 Val Acc 59.05%
rnn-SGD | Epoch 10 | 0m 4s | Train Loss 0.5673 Train Acc 71.14% | Val Loss 0.6344 Val Acc 63.15%
rnn-SGD | Epoch 11 | 0m 4s | 

Use Adam optimizer, 50 epochs, and randomly initialized embeddings, run the 
experiments with the following models: 
1. One-layer feed forward neural network, hidden dimension is 500.  
    model: ff1
2. Two-layer feed forward neural network, hidden dimensions are 500 and 300.  
    model: ff2
3. Three-layer feed-forward neural network, hidden dimensions are 500, 300 and 200  
    model: ff3
4. CNN model (using three feature maps with the sizes of feature map are 1, 2, and 3)  
    model:cnn
5. LSTM model  
    model: lstm
6. Bi-LSTM model  
    model: bilstm

These are called in `build_models` method:  
    models['ff1'] = FeedForwardText(INPUT_DIM, EMBEDDING_DIM, hidden_dims=[500], output_dim=OUTPUT_DIM)  
    models['ff2'] = FeedForwardText(INPUT_DIM, EMBEDDING_DIM, hidden_dims=[500,300], output_dim=OUTPUT_DIM)  
    models['ff3'] = FeedForwardText(INPUT_DIM, EMBEDDING_DIM, hidden_dims=[500,300,200], output_dim=OUTPUT_DIM)  
    models['cnn'] = CNNText(INPUT_DIM, EMBEDDING_DIM, n_filters=100, filter_sizes=(1,2,3), output_dim=OUTPUT_DIM, padding_idx=None)  
    models['lstm'] = LSTMModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, n_layers=1, dropout=0.5)  
    models['bilstm'] = BiLSTMModel(INPUT_DIM, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM, n_layers=1, dropout=0.5)  

In [17]:
# Build models and train (50 epochs, Adam)
INPUT_DIM = len(TEXT.vocab)
EMB_DIM = 100
HID_DIM = 256
OUT_DIM = 1

models = build_models(INPUT_DIM, EMBEDDING_DIM=EMB_DIM, HIDDEN_DIM=HID_DIM, OUTPUT_DIM=OUT_DIM)

all_histories = {}

for name, model in models.items():
    if name == 'rnn':
        continue
    print("\n" + "="*80)
    print(f"Starting {name} (Adam, 50 epochs)")
    history, _ = run_and_record(name, model, train_iterator, valid_iterator, n_epochs=50, lr=1e-3, output_prefix="results")
    all_histories[name] = history

with open("history/results-all-models.json", "w") as f:
    json.dump(all_histories, f, indent=2)

print("All models trained and per-epoch checkpoints saved in models/. Histories saved in history/.")


Starting ff1 (Adam, 50 epochs)
ff1 | Epoch 01 | 0m 2s | Train Loss 0.5885 Train Acc 67.72% | Val Loss 0.4525 Val Acc 79.14%
ff1 | Epoch 02 | 0m 2s | Train Loss 0.3651 Train Acc 84.36% | Val Loss 0.3412 Val Acc 85.94%
ff1 | Epoch 03 | 0m 2s | Train Loss 0.2735 Train Acc 89.05% | Val Loss 0.3295 Val Acc 86.39%
ff1 | Epoch 04 | 0m 2s | Train Loss 0.2253 Train Acc 91.81% | Val Loss 0.2995 Val Acc 88.06%
ff1 | Epoch 05 | 0m 2s | Train Loss 0.1751 Train Acc 93.88% | Val Loss 0.3136 Val Acc 87.82%
ff1 | Epoch 06 | 0m 2s | Train Loss 0.1255 Train Acc 95.77% | Val Loss 0.3167 Val Acc 88.40%
ff1 | Epoch 07 | 0m 2s | Train Loss 0.1004 Train Acc 96.81% | Val Loss 0.4011 Val Acc 87.55%
ff1 | Epoch 08 | 0m 2s | Train Loss 0.1178 Train Acc 97.43% | Val Loss 0.3657 Val Acc 88.43%
ff1 | Epoch 09 | 0m 2s | Train Loss 0.0922 Train Acc 98.07% | Val Loss 0.3909 Val Acc 88.23%
ff1 | Epoch 10 | 0m 2s | Train Loss 0.0373 Train Acc 99.13% | Val Loss 0.4263 Val Acc 87.96%
ff1 | Epoch 11 | 0m 2s | Train Loss 0.

### Load and View Results
Created two dataframes:
1. `val_df`: The accuracies of the model on validation set at given epoch.
2. `test_df`: results of the best model of each type, after running on test set.

In [19]:
MODELS_DIR = "models"
HISTORY_DIR = RESULTS_DIR = "history"
os.makedirs(RESULTS_DIR, exist_ok=True)

VALID_EPOCHS = [5, 10, 20, 50] # Load validation results on epoch 5, 10, 20 and 50

def load_validation_from_history(model_name):
    fn = os.path.join(HISTORY_DIR, f"results-{model_name}-history.json")
    if not os.path.exists(fn):
        return None
    with open(fn, "r") as f:
        data = json.load(f)
    epoch_map = {}
    if isinstance(data, dict):
        entries = [data]
    else:
        entries = data
    for entry in entries:
        if not isinstance(entry, dict):
            continue
        if "epoch" in entry and ("valid_acc" in entry or "valid_accuracy" in entry):
            acc_key = "valid_acc" if "valid_acc" in entry else "valid_accuracy"
            try:
                epoch_map[int(entry["epoch"])] = float(entry[acc_key])
            except Exception:
                pass
    return epoch_map

Load the best models of each type and get results on test set.

In [20]:
TEST_EPOCHS = ["best"]

def evaluate_checkpoints_on_test(models_list, test_iterator, epochs_to_check=TEST_EPOCHS, prefix="results"):
    results_by_model = {}
    for model_name in models_list:
        model_results = []
        for epoch_label in epochs_to_check:
            # build candidate paths
            if epoch_label == "best":
                candidates = [
                    os.path.join(MODELS_DIR, f"{prefix}-{model_name}-best.pt"),
                    os.path.join(MODELS_DIR, f"{model_name}-best.pt"),
                ]
            else:
                candidates = [
                    os.path.join(MODELS_DIR, f"{prefix}-{model_name}-epoch{epoch_label}.pt"),
                    os.path.join(MODELS_DIR, f"{model_name}-epoch{epoch_label}.pt"),
                ]
            ckpt_path = next((p for p in candidates if os.path.exists(p)), None)
            if ckpt_path is None:
                model_results.append((str(epoch_label), None))
                continue

            # instantiate fresh model and load state
            models_dict = build_models(len(TEXT.vocab))
            if model_name.startswith('rnn'): model = models_dict['rnn']
            else: model = models_dict[model_name]

            print(model_name, ckpt_path)
            state = torch.load(ckpt_path, map_location=DEVICE)
            # handle saved dicts that wrap state dicts
            if isinstance(state, dict) and "state_dict" in state:
                state = state["state_dict"]
            model.load_state_dict(state)
            model = model.to(DEVICE)
            criterion = nn.BCEWithLogitsLoss().to(DEVICE)
            loss, acc = evaluate(model, device_iterator(test_iterator, DEVICE), criterion)
            if isinstance(acc, torch.Tensor):
                acc = acc.item()
            model_results.append((str(epoch_label), float(acc)))
        results_by_model[model_name] = model_results

    # save JSON summary
    out = {m: [{"epoch": e, "test_acc": (None if a is None else a)} for e, a in lst] for m, lst in results_by_model.items()}
    with open(os.path.join(RESULTS_DIR, "results-test-summary.json"), "w") as f:
        json.dump(out, f, indent=2)
    return results_by_model

In [21]:
# build validation table from histories
def build_validation_table(models_list, epochs_list=VALID_EPOCHS):
    rows = {}
    for m in models_list:
        hist = load_validation_from_history(m)
        if hist is None:
            rows[m] = {e: float("nan") for e in epochs_list}
            continue
        row = {}
        for e in epochs_list:
            if e in hist:
                row[e] = hist[e]
            else:
                row[e] = float("nan")
        rows[m] = row
    df = pd.DataFrame.from_dict(rows, orient="index", columns=epochs_list)
    df.index.name = "model"
    df.to_csv(os.path.join(RESULTS_DIR, "validation-accuracies-by-epoch.csv"))
    with open(os.path.join(RESULTS_DIR, "validation-accuracies-by-epoch.md"), "w") as f:
        f.write(df.to_markdown())
    return df

In [22]:
# build test table from results_by_model
def build_test_table(results_by_model, epochs_list=TEST_EPOCHS):
    rows = {}
    for m, lst in results_by_model.items():
        mapping = {e: a for e, a in lst}
        rows[m] = {str(e): (float(mapping.get(str(e))) if mapping.get(str(e)) is not None else float("nan")) for e in epochs_list}
    df = pd.DataFrame.from_dict(rows, orient="index", columns=[str(e) for e in epochs_list])
    df.index.name = "model"
    df.to_csv(os.path.join(RESULTS_DIR, "test-accuracies-by-epoch.csv"))
    with open(os.path.join(RESULTS_DIR, "test-accuracies-by-epoch.md"), "w") as f:
        f.write(df.to_markdown())
    return df

In [23]:
models_list = [
 'ff1',
 'ff2',
 'ff3',
 'cnn',
 'lstm',
 'bilstm',
 'rnn-Adagrad', 
 'rnn-SGD',
 'rnn-Adam-e5',
 'rnn-Adam-e10',
 'rnn-Adam-e20',
 'rnn-Adam-e50'
]


test_results = evaluate_checkpoints_on_test(models_list, test_iterator, epochs_to_check=TEST_EPOCHS, prefix="results")
val_df = build_validation_table(models_list, epochs_list=VALID_EPOCHS)
test_df = build_test_table(test_results, epochs_list=TEST_EPOCHS)

ff1 models\results-ff1-best.pt
ff2 models\results-ff2-best.pt
ff3 models\results-ff3-best.pt
cnn models\results-cnn-best.pt
lstm models\results-lstm-best.pt
bilstm models\results-bilstm-best.pt
rnn-Adagrad models\results-rnn-Adagrad-best.pt
rnn-SGD models\results-rnn-SGD-best.pt
rnn-Adam-e5 models\results-rnn-Adam-e5-best.pt
rnn-Adam-e10 models\results-rnn-Adam-e10-best.pt
rnn-Adam-e20 models\results-rnn-Adam-e20-best.pt
rnn-Adam-e50 models\results-rnn-Adam-e50-best.pt


In [24]:
val_df

Unnamed: 0_level_0,5,10,20,50
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ff1,0.878178,0.879635,0.878487,0.873014
ff2,0.873985,0.875221,0.874382,0.87372
ff3,0.876368,0.873985,0.872925,0.872528
cnn,0.856109,0.883121,0.871337,0.846354
lstm,0.8189,0.812588,0.854299,0.861538
bilstm,0.798023,0.861494,0.864936,0.865643
rnn-Adagrad,0.659031,0.672007,0.706082,
rnn-SGD,0.534428,0.631532,0.764786,
rnn-Adam-e5,0.681806,,,
rnn-Adam-e10,0.610169,0.683351,,


In [25]:
test_df

Unnamed: 0_level_0,best
model,Unnamed: 1_level_1
ff1,0.864003
ff2,0.858488
ff3,0.858967
cnn,0.877126
lstm,0.827126
bilstm,0.848857
rnn-Adagrad,0.663059
rnn-SGD,0.736165
rnn-Adam-e5,0.695836
rnn-Adam-e10,0.697642
