In [66]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import random_split


In [67]:

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hyperparameters
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size =     100
learning_rate = 0.001


# Define subset sizes
train_subset_size = 6000
test_subset_size = 6000


In [68]:

# Fully connected neural network
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [69]:

# MNIST dataset
full_train_dataset = torchvision.datasets.MNIST(root='./data/',
                                                train=True,
                                                transform=transforms.ToTensor(),
                                                download=True)
full_test_dataset = torchvision.datasets.MNIST(root='./data/',
                                               train=False,
                                               transform=transforms.ToTensor())


# Create subsets of train and test datasets
train_subset = torch.utils.data.Subset(full_train_dataset, range(train_subset_size))
test_subset = torch.utils.data.Subset(full_test_dataset, range(test_subset_size))

train_dataset, val_dataset = random_split(train_subset, [5000, 1000])


# Data loaders
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                            batch_size=batch_size,
                                            shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=full_test_dataset,
                                            batch_size=batch_size,
                                            shuffle=False)

validation_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                            batch_size=batch_size,
                                            shuffle=True)


In [None]:

images, labels = test_loader.dataset[0]


In [75]:
def train_and_val_by_seed(seeds, train_dataset=train_loader, val_dataset=validation_loader, test_dataset=test_loader):
    seeds_val_errors = []
    seeds_test_errors = []
    for seed in seeds:
        val_errors = []
        test_errors = []

        torch.manual_seed(seed)
        
        # Fully connected neural network
        model = NeuralNet(input_size, hidden_size, num_classes).to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        
        # Train the model
        for epoch in range(num_epochs):
            # Train
            for i, (images, labels) in enumerate(train_dataset):
                images = images.reshape(-1, input_size).to(device)
                labels = labels.to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
            
            # Validate
            val_loss = 0
            with torch.no_grad():
                for images, labels in val_dataset:
                    images = images.reshape(-1, input_size).to(device)
                    labels = labels.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    val_loss += loss.item()

            val_loss /= len(val_dataset)
            val_errors.append(val_loss)

            
            # Test
            test_loss = 0
            with torch.no_grad():
                for images, labels in test_dataset:
                    images = images.reshape(-1, input_size).to(device)
                    labels = labels.to(device)
                    outputs = model(images)
                    loss = criterion(outputs, labels)
                    test_loss += loss.item()

            test_loss /= len(test_loader)
            test_errors.append(test_loss)
        seeds_val_errors.append(val_errors)
        seeds_test_errors.append(test_errors)


    min_val = np.inf
    for i in range (len(seeds_val_errors)):
        for j in range(len(seeds_val_errors[i])):
            if seeds_val_errors[i][j] < min_val:
                min_val = seeds_val_errors[i][j]
                min_val_epoch = j
                min_val_seed = i

    min_pair = [min_val, seeds_test_errors[min_val_seed][min_val_epoch]]
    return min_pair







In [76]:
min_pair = train_and_val_by_seed([1,2,3,4,5])
print(f'minimum pair {min_pair}')

minimum pair [0.2073454052209854, 0.2533122509531677]
