In [96]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import random_split
import csv 


Setups

In [97]:
# default Hyperparameters
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 100
learning_rate = 0.001

# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [98]:
# Fully connected neural network
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size) 
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)  

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [99]:

# MNIST dataset
full_train_dataset = torchvision.datasets.MNIST(root='./data/',
                                                train=True,
                                                transform=transforms.ToTensor(),
                                                download=True)
full_test_dataset = torchvision.datasets.MNIST(root='./data/',
                                               train=False,
                                               transform=transforms.ToTensor())

Methods

In [100]:
# parameters : train_subset_size, test_subset_size
# return train, test datasets
def load_data_set(batch_size=batch_size ,train_subset_size=6000,test_subset_size=6000, seed=None, validation=False):
    
    if seed!=None:
        torch.manual_seed(seed)

    # Create subsets of train and test datasets
    train_subset = torch.utils.data.Subset(full_train_dataset, range(train_subset_size))
    test_subset = torch.utils.data.Subset(full_test_dataset, range(test_subset_size))

    # Data loaders
    train_loader = torch.utils.data.DataLoader(dataset=train_subset,
                                                batch_size=batch_size,
                                                shuffle=True)
    test_loader = torch.utils.data.DataLoader(dataset=test_subset,
                                                batch_size=batch_size,
                                                shuffle=False)
    
    if validation:
        train_dataset, val_dataset = random_split(train_subset, [5000, 1000])
        validation_loader = torch.utils.data.DataLoader(dataset=val_dataset,
                                            batch_size=batch_size, shuffle=True)
        train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                                    batch_size=batch_size,
                                                    shuffle=True)
        return train_loader, test_loader, validation_loader
    
    return train_loader, test_loader

In [101]:
def create_model_and_train(model, train_loader, test_loader, val_loader, num_epochs=5):
    

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

    # Train the model
    train_loss_per_epoch = []
    test_loss_per_epoch = []
    val_loss_per_epoch = []
    for epoch in range(num_epochs):
        # Train
        train_loss = 0
        for i, (images, labels) in enumerate(train_loader):
            images = images.reshape(-1, input_size).to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        train_loss /= len(train_loader)
        train_loss_per_epoch.append(train_loss)
        
        # Validate
        val_loss = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.reshape(-1, input_size).to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        val_loss_per_epoch.append(val_loss)

        # Test
        test_loss = 0
        with torch.no_grad():
            for images, labels in test_loader:
                images = images.reshape(-1, input_size).to(device)
                labels = labels.to(device)
                outputs = model(images)
                loss = criterion(outputs, labels)
                test_loss += loss.item()

        test_loss /= len(test_loader)
        test_loss_per_epoch.append(test_loss)
    train_loss_per_epoch = np.array(train_loss_per_epoch)
    test_loss_per_epoch = np.array(test_loss_per_epoch)
    val_loss_per_epoch = np.array(val_loss_per_epoch)
    return train_loss_per_epoch, test_loss_per_epoch, val_loss_per_epoch  

Ex4

In [102]:
def grid_search(hidden_sizes=[hidden_size], batch_sizes=[batch_size], learning_rates=[learning_rate], num_epochs=5):
    
    paramaters_loss = {}
    iter = 0

    for b_size in batch_sizes:
            train_loader, test_loader, val_loader = load_data_set(batch_size=b_size,validation=True)
            
            for h_size in hidden_sizes:
                
                for l_rate in learning_rates:
                    model = NeuralNet(input_size, hidden_size, num_classes).to(device)
                    paramaters_loss[(b_size, h_size, l_rate)] = create_model_and_train(model, train_loader, test_loader,
                                                                                        val_loader)[1:] #only val,test loss
                    iter +=1
    return paramaters_loss

In [103]:
hidden_sizes = [400, 500, 600]
batch_sizes = [100, 200, 300]
learning_rates = [0.01, 0.001]


In [None]:
parameters_loss = grid_search(hidden_sizes, batch_sizes, learning_rates)

In [105]:
def min_validation_loss(test_loss, val_loss):
    min_index = np.argmin(val_loss)
    return test_loss[min_index], val_loss[min_index]

In [106]:
s_grid = []
fields = ['Batch Size', 'Hidden Size', 'Learning Rate', 'Best validation error', 'Test error for same epoch']
for key in parameters_loss.keys():
    b_size, h_size, l_rate = key
    test_loss, val_loss = parameters_loss[key]
    test_loss, val_loss = min_validation_loss(test_loss, val_loss)
    s_grid.append({'Batch Size':str(b_size),
                   'Hidden Size':str(h_size),
                   'Learning Rate': str(l_rate),
                   'Best validation error': str(val_loss),
                   'Test error for same epoch':str(test_loss)})

# writing to the CSV file
with open('search grid.csv', 'w', newline='') as csvfile:
    # creating a csv dict writer object
    writer = csv.DictWriter(csvfile, fieldnames=fields)
    
    # writing headers (field names)
    writer.writeheader()
    
    # writing rows (dictionary values)
    for row in s_grid:
        writer.writerow(row)
    
    