In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.datasets as ds
import torchvision.transforms as transforms
import torch.optim as optim
import pickle
from sklearn.model_selection import train_test_split

import ffn

In [None]:
train_validation_dataset = ds.EMNIST(root='./data', split='letters', train=True,
                                     transform=transforms.Compose([
                                         transforms.ToTensor(),  # Convert images to PyTorch tensors
                                         transforms.Lambda(lambda x: torch.flatten(x))  # Flatten the images
                                     ]),
                                     download=True)
test_dataset = ds.EMNIST(root='./data', split='letters', train=False,
                         transform=transforms.Compose([
                             transforms.ToTensor(),  # Convert images to PyTorch tensors
                             transforms.Lambda(lambda x: torch.flatten(x))  # Flatten the images
                         ]),
                         download=True)

In [None]:
from torch.utils.data import random_split

# Calculate the number of samples to include in validation and training
num_train = len(train_validation_dataset)
num_val = int(num_train * 0.15)  # 15% for the validation set
num_train -= num_val

# Split the dataset
train_dataset, val_dataset = random_split(train_validation_dataset, [num_train, num_val])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1024, shuffle=False)  # Typically you don't need to shuffle the validation data

In [None]:
# Define your different model architectures (This is just an example. You should define these according to your needs)
model_architectures = [
    [784, 1024, 26],     # Model 1 architecture
    [784, 1024, 2048, 26]      # Model 2 architecture
]

# Define the different learning rates you want to test
learning_rates = [0.005, 0.001, 0.0005]
dropout_rates = [0.3, 0.4, 0.5]

for architecture in model_architectures:
    for lr in learning_rates:
        print(f"Training model with architecture {architecture} and learning rate {lr}")

        # Initialize model with the current architecture
        model = ffn.Model(layer_sizes=architecture, dropout_rate=0.3)  # Update dropout_rate as needed

        # Train the model with the current learning rate
        # metrics = model.train(train_loader, val_loader, learning_rate=lr, epochs=10)
        
        best_model, best_params, best_accuracy = model.train(train_loader, val_loader, learning_rate=lr, epochs=10, best_accuracy=0.0)
        current_accuracy = best_accuracy

for architecture in model_architectures:
    for lr in learning_rates:
        print(f"Training model with architecture {architecture} and learning rate {lr}")

        # Initialize model with the current architecture
        model = ffn.Model(layer_sizes=architecture, dropout_rate=0.0)  # Update dropout_rate as needed

        # Train the model with the current learning rate
        # metrics = model.train(train_loader, val_loader, learning_rate=lr, epochs=10)
        
        best_model, best_params, best_accuracy = model.train(train_loader, val_loader, learning_rate=lr, epochs=10, best_accuracy=current_accuracy)
        
with open('best_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)
    
with open('best_params.pkl', 'wb') as f:
    pickle.dump(best_params, f)