# Testing and Training Models

In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from collections import Counter
from itertools import chain
from nltk.tokenize import word_tokenize  # Ensure NLTK is installed
from sklearn.metrics import accuracy_score
import os
import json
import torch
import random
from etl import * 
from translate import * 

In [2]:
### GLOBAL VARIABLES ###
SEED = 28
SPECIAL_TOKENS = {'<PAD>': 0, '<START>': 1, '<END>': 2, '<UNKNOWN>': 3}
MAX_LEN = 17
### END OF GLOBAL VARIABLES ###

# set random seed
random.seed(SEED)
torch.manual_seed(SEED)  # Seed for CPU computations
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)  # Seed for GPU computations

---

In [3]:
# loading in test/train/validation datasets and building eng and jpn vocabs for training
train_loader, test_loader, val_loader, eng_vocab, jpn_vocab = load_and_process_data(
    max_vocab = 9000,
    subset_train = 0.15, # can only use smaller subset of data due to computational limitations
    subset_test = 1.0 ,
    subset_val = 1.0
    )

Loading Data
Vocabularies saved successfully!


In [None]:
### TRAINING AND TESTING ###
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# models trial (no CNN)
models = {
    'Transformer_Seq2Seq_1': TransformerSeq2Seq(len(eng_vocab), len(jpn_vocab), 15, 3, 3, 0.2),
    'LSTM_Seq2Seq_1': LSTMSeq2Seq(len(eng_vocab), len(jpn_vocab), 15, 10, 3, 0.2),
    'GRU_Seq2Seq_1': GRUSeq2Seq(len(eng_vocab), len(jpn_vocab), 15, 5, 3, 0.2),
}

results = {}
EPOCHS = 50

# evaluate_model
for name, model in models.items():
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr = 0.001, weight_decay = 1e-5)
    criterion = nn.CrossEntropyLoss(ignore_index=SPECIAL_TOKENS['<PAD>'])
    print(f"\nTraining {name}...")
    for epoch in range(EPOCHS):  # Adjust number of epochs
        train_loss, train_accuracy = train_model(model, train_loader, optimizer, criterion, device)
        val_loss, val_accuracy = evaluate_model(model, val_loader, criterion, device)
        print(f"Epoch {epoch + 1}: Train Loss = {train_loss:.4f}, Train Accuracy = {train_accuracy:.4f}, Val Accuracy = {val_accuracy:.4f}")
    test_loss, test_accuracy = evaluate_model(model, test_loader, criterion, device)
    print(f"{name} Test Loss = {test_loss:.4f}, Test Accuracy = {test_accuracy:.4f}")
    results[name] = (test_loss, test_accuracy)
    save_dir = "saved"
    save_model_and_params(models, save_dir)

# Print comparison results
print("\nModel Comparison:")
for model_name, (loss, accuracy) in results.items():
    print(f"{model_name}: Test Loss = {loss:.4f}, Test Accuracy = {accuracy:.4f}")





Training Transformer_Seq2Seq_1...
Epoch 1: Train Loss = 4.7825, Train Accuracy = 0.2171, Val Accuracy = 0.1857
Epoch 2: Train Loss = 4.4399, Train Accuracy = 0.2496, Val Accuracy = 0.2036
Epoch 3: Train Loss = 4.3371, Train Accuracy = 0.2590, Val Accuracy = 0.2082
Epoch 4: Train Loss = 4.2648, Train Accuracy = 0.2667, Val Accuracy = 0.2181
Epoch 5: Train Loss = 4.2164, Train Accuracy = 0.2717, Val Accuracy = 0.2256
Epoch 6: Train Loss = 4.1879, Train Accuracy = 0.2744, Val Accuracy = 0.2220
Epoch 7: Train Loss = 4.1706, Train Accuracy = 0.2758, Val Accuracy = 0.2184
Epoch 8: Train Loss = 4.1594, Train Accuracy = 0.2767, Val Accuracy = 0.2192
Epoch 9: Train Loss = 4.1507, Train Accuracy = 0.2774, Val Accuracy = 0.2205
Epoch 10: Train Loss = 4.1439, Train Accuracy = 0.2780, Val Accuracy = 0.2257
Epoch 11: Train Loss = 4.1381, Train Accuracy = 0.2786, Val Accuracy = 0.2291
Epoch 12: Train Loss = 4.1330, Train Accuracy = 0.2789, Val Accuracy = 0.2261
Epoch 13: Train Loss = 4.1288, Train A

In [7]:
### TRAINING AND TESTING - CNN ###
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

cnns = {'CNN_Seq2Seq_5': CNNSeq2Seq(len(eng_vocab), len(jpn_vocab), 15, kernel_size=3, num_channels=5)}
results = {}
EPOCHS = 50

# evaluate_model
for name, model in cnns.items():
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr = 0.001, weight_decay = 1e-5)
    criterion = nn.CrossEntropyLoss(ignore_index=SPECIAL_TOKENS['<PAD>'])
    print(f"\nTraining {name}...")
    for epoch in range(EPOCHS):  # Adjust number of epochs
        train_loss, train_accuracy = train_model(model, train_loader, optimizer, criterion, device)
        val_loss, val_accuracy = evaluate_model(model, val_loader, criterion, device)
        if epoch%10 == 0:
            print(f"Epoch {epoch + 1}: Train Loss = {train_loss:.4f}, Train Accuracy = {train_accuracy:.4f}, Val Accuracy = {val_accuracy:.4f}")
    test_loss, test_accuracy = evaluate_model(model, test_loader, criterion, device)
    print(f"{name} Test Loss = {test_loss:.4f}, Test Accuracy = {test_accuracy:.4f}")
    results[name] = (test_loss, test_accuracy)
    save_dir = "saved"
    save_model_and_params(cnns, save_dir)


# Print comparison results
print("\nModel Comparison:")
for model_name, (loss, accuracy) in results.items():
    print(f"{model_name}: Test Loss = {loss:.4f}, Test Accuracy = {accuracy:.4f}")



Training CNN_Seq2Seq_5...
Epoch 1: Train Loss = 1.1313, Train Accuracy = 0.8305, Val Accuracy = 0.9184
Epoch 11: Train Loss = 0.3601, Train Accuracy = 0.9318, Val Accuracy = 0.9338
Epoch 21: Train Loss = 0.3060, Train Accuracy = 0.9393, Val Accuracy = 0.9362
Epoch 31: Train Loss = 0.2927, Train Accuracy = 0.9418, Val Accuracy = 0.9396
Epoch 41: Train Loss = 0.2875, Train Accuracy = 0.9430, Val Accuracy = 0.9397
Epoch 51: Train Loss = 0.2845, Train Accuracy = 0.9436, Val Accuracy = 0.9393
CNN_Seq2Seq_5 Test Loss = 0.2690, Test Accuracy = 0.9387
Saved CNN_Seq2Seq_5 to saved/CNN_Seq2Seq_5_params.json and saved/CNN_Seq2Seq_5_weights.pt.

Model Comparison:
CNN_Seq2Seq_5: Test Loss = 0.2690, Test Accuracy = 0.9387


---

In [None]:
# Load a specific model dynamically
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_name = "CNN_Seq2Seq_5"  # Example: Change to "Transformer_Seq2Seq_1" 
loaded_model = load_model(model_name, save_dir, device)