Imports

In [1]:
import torch
import torch.nn as nn
from torchvision import datasets
import torch.nn.functional as f
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Dataset

Now we define the architecture for the networks

In [2]:
class shallowNet(nn.Module):
  #Structure:
        #---Input layer (784 nodes)
        #---1 Hidden Layer (128 nodes)
        #---Output Layer (10 nodes)
    def __init__(self, input_features=784, h1=128, output_features=10, dropout_rate = 0):
      super().__init__()
      self.flatten = nn.Flatten()
      self.fc1 = nn.Linear(input_features, h1)
      self.out = nn.Linear(h1, output_features)
      self.dropout = nn.Dropout(dropout_rate) # Initialize dropout layer

    def forward(self, x):
      x = self.flatten(x) # Add flatten here
      x = f.relu(self.fc1(x))
      x = self.dropout(x) # Use the initialized dropout layer
      x = self.out(x)
      return x

In [3]:
class mediumNet(nn.Module):
  '''
  Structure:
  Input layer (784 nodes)
  1st Hidden layer: 512 nodes
  2nd Hidden layer: 256 nodes
  3rd Hidden layer: 128 nodes
  Output Layer (10 nodes)
  '''
  def __init__(self, input_features=784, h1=512, h2=256, h3=128, output_features=10, dropout_rate= 0):
    super().__init__()
    self.flatten = nn.Flatten()
    self.fc1 = nn.Linear(input_features, h1)
    self.fc2 = nn.Linear(h1,h2)
    self.fc3 = nn.Linear(h2,h3)
    self.out = nn.Linear(h3, output_features)
    self.dropout = nn.Dropout(dropout_rate) # Initialize dropout layer

  def forward(self, x):
    x = self.flatten(x) # Add flatten here
    x = f.relu(self.fc1(x))
    x = self.dropout(x)
    x = f.relu(self.fc2(x))
    x = self.dropout(x)
    x = f.relu(self.fc3(x))
    x = self.dropout(x)
    x = self.out(x)
    return x

In [4]:
class deepNet(nn.Module):
  def __init__(self, input_features=784, h1=1024, h2=512, h3=256, h4=512, h5=128, output_features=10, dropout_rate=0):
    super().__init__()
    self.flatten = nn.Flatten()
    self.fc1 = nn.Linear(input_features, h1)
    self.fc2 = nn.Linear(h1, h2)
    self.fc3 = nn.Linear(h2, h3)
    self.fc4 = nn.Linear(h3, h4)
    self.fc5 = nn.Linear(h4, h5)
    self.out = nn.Linear(h5, output_features)
    self.dropout = nn.Dropout(dropout_rate) # Initialize dropout layer


  def forward(self, x):
    x = self.flatten(x) # Add flatten here
    x = f.relu(self.fc1(x))
    x = self.dropout(x)
    x = f.relu(self.fc2(x))
    x = self.dropout(x)
    x = f.relu(self.fc3(x))
    x = self.dropout(x)
    x = f.relu(self.fc4(x))
    x = self.dropout(x)
    x = f.relu(self.fc5(x))
    x = self.dropout(x)
    x = self.out(x)
    return x

Now we define the Architecture for the CNNs

**Baseline**:
 Cifar -> 64 * 16 * 16
 MNIST -> 64 * 14 * 14

**Enhanced**:
cifar -> 64 * 16 * 16
MNIST -> 64 * 14 * 14

**Deep**
cifar -> 256 * 8 * 8
MNIST -> 256 * 14 * 14

In [5]:
class baseline_CNN(nn.Module):
  def __init__(self, input_channels=1):
    super().__init__()
    self.conv1 = nn.Conv2d(in_channels=input_channels, out_channels=32, kernel_size=3, stride=1, padding=1)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
    self.pool = nn.MaxPool2d(kernel_size=2)
    if input_channels == 1: #if its MNIST
      self.fc = nn.Linear(64 * 14 * 14, 10)
    else:
      self.fc = nn.Linear(64 * 16 * 16, 10) # Updated from 64 * 14 * 14 to 64 * 16 * 16 for 32x32 input
    self.flatten = nn.Flatten()

  def forward(self, x):
    x = self.conv1(x)
    x = f.relu(x)
    x = self.conv2(x)
    x = f.relu(x)
    x = self.pool(x)
    x = self.flatten(x)
    x = self.fc(x)
    return x

In [6]:
class enhanced_CNN(nn.Module):
  def __init__(self, dropout_rate=0.2, input_channels=1):
    super().__init__()
    #convolutional block
    #adds batch normalization and dropout
    self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1) # Changed from 1 to input_channels
    self.batchnorm1 = nn.BatchNorm2d(32)
    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
    self.batchnorm2 = nn.BatchNorm2d(64)
    self.pool = nn.MaxPool2d(kernel_size=2)
    #fully connected block
    if input_channels == 1: #if its MNIST
      self.fc1 = nn.Linear(64 * 14 * 14, 10)
    else:
      self.fc1 = nn.Linear(64 * 16 * 16, 10)
    self.dropout = nn.Dropout(dropout_rate)
    self.flatten = nn.Flatten()


  def forward(self, x):
    x = self.conv1(x)
    x = self.batchnorm1(x)
    x = f.relu(x)
    x = self.conv2(x)
    x = self.batchnorm2(x)
    x = f.relu(x)
    x = self.pool(x)
    x = self.flatten(x)
    x = self.dropout(x)
    x = self.fc1(x)
    return x

In [7]:
class deep_CNN(nn.Module):
  def __init__(self, dropout_rate=0.2, input_channels=1):
    super().__init__()
    #pass through first conv layer
    self.conv1 = nn.Conv2d(input_channels, 32, kernel_size=3, stride=1, padding=1) # Changed from 1 to input_channels
    self.batchNorm1 = nn.BatchNorm2d(32)
    #pass through 2nd conv layer
    self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
    self.batchNorm2 = nn.BatchNorm2d(64)
    #apply pooling
    self.pool1 = nn.MaxPool2d(kernel_size=2)
    #pass through third conv layer
    self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
    self.batchNorm3 = nn.BatchNorm2d(128)
    #Pass through a fourth convolutional layer
    self.conv4 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
    self.batchNorm4 = nn.BatchNorm2d(256)
    #pool one last time
    self.pool2 = nn.MaxPool2d(kernel_size=2)
    #flatten
    self.flatten = nn.Flatten()
    self.dropout = nn.Dropout(dropout_rate)
    if input_channels == 1:
      self.fc1 = nn.Linear(256 * 14 * 14, 10)
    else:
      self.fc1 = nn.Linear(256 * 16 * 16, 10)

  def forward(self, x):
    #1st convolution
    x = self.conv1(x)
    x = self.batchNorm1(x)
    x = f.relu(x)
    #2nd convolution
    x = self.conv2(x)
    x = self.batchNorm2(x)
    x = f.relu(x)
    #1st pool
    x = self.pool1(x)
    #3rd convolution
    x = self.conv3(x)
    x = self.batchNorm3(x)
    x = f.relu(x)
    #4th convolution
    x = self.conv4(x)
    x = self.batchNorm4(x)
    x = f.relu(x)
    #pass into fully connected layer
    x = self.flatten(x)
    x = self.dropout(x)
    x = self.fc1(x)
    return x


Here we Load in the data

In [8]:
mnist_train = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())
cifar_train = datasets.CIFAR10(root='./data', train=True, download=True, transform=transforms.ToTensor())
cifar_test = datasets.CIFAR10(root='./data', train=False, download=True, transform=transforms.ToTensor())

mnist_train, mnist_val = torch.utils.data.random_split(mnist_train, [50000, 10000], generator=torch.Generator().manual_seed(42))
cifar_train, cifar_val = torch.utils.data.random_split(cifar_train, [45000,5000], generator=torch.Generator().manual_seed(42))

#print(mnist_x_train)
#print(mnist_y_train)

100%|██████████| 9.91M/9.91M [00:00<00:00, 12.6MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 334kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 3.20MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 12.3MB/s]
100%|██████████| 170M/170M [00:05<00:00, 29.7MB/s]


**Experiment**

This function takes in all the input parameters and trains any specified model on any specified dataset and returns the model itself as well as its validation accuracy

In [9]:
#actual Experiment
#import matplotlib.pyplot as plt #plot for debugging
from torch.utils.data import DataLoader #to load in and iterate through the data

def experiment(model_type, data_set, learning_rate, batch_size, optimizer, dropout_rate):
  # Define the device
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#=================================================================================
# Data Loaders and Batch size
  if data_set == 'mnist':
    train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True, generator=torch.Generator().manual_seed(42))
    val_loader = DataLoader(mnist_val, batch_size=batch_size, shuffle=False, generator=torch.Generator().manual_seed(42))
    inputs = 784
    channels = 1
  elif data_set == 'cifar':
    train_loader = DataLoader(cifar_train, batch_size=batch_size, shuffle=True, generator=torch.Generator().manual_seed(42))
    val_loader = DataLoader(cifar_val, batch_size=batch_size, shuffle=False, generator=torch.Generator().manual_seed(42))
    inputs = 3072
    channels = 3
  #Model
  if model_type == 'shallow':
    model = shallowNet(dropout_rate=dropout_rate, input_features=inputs).to(device)
  elif model_type == 'medium':
    model = mediumNet(dropout_rate=dropout_rate, input_features=inputs).to(device)
  elif model_type == 'deep':
    model = deepNet(dropout_rate=dropout_rate, input_features=inputs).to(device)
  elif model_type == 'baseline':
    model = (baseline_CNN(input_channels=channels)).to(device) # Pass input_channels
  elif model_type == 'enhanced':
    model = enhanced_CNN(dropout_rate=dropout_rate, input_channels=channels).to(device) # Pass input_channels
  elif model_type == 'deep_cnn':
    model = deep_CNN(dropout_rate=dropout_rate, input_channels=channels).to(device) # Pass input_channels
#Optimizer
  criterion = nn.CrossEntropyLoss() #set the criterion
  if optimizer == 'adam':
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  else:
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)


  #=========================================================================
#actual Experiment
  #print("Running experiment with model: ", model_type, " and dataset: ", data_set)
  epochs = 10
  accuracy_val = []
  #main loop
  for epoch in range(epochs):
    model.train() #set the model to training mode
    for x, y in train_loader:
      x, y = x.to(device), y.to(device) # Send data to device
      y_pred = model.forward(x) #run the forward pass
      loss = criterion(y_pred, y) #evaluate the loss function
      optimizer.zero_grad()
      loss.backward() #compute gradient
      optimizer.step() #bagpropagation step
#====================================================================================
  #now that the model is trained, we evaluate on the validation set
  model.eval() #set the model to evaluation mode
  total_predictions = 0
  correct_predictions = 0
  with torch.no_grad(): # Add no_grad for validation
    for x, y in val_loader:
      x, y = x.to(device), y.to(device) # Send data to device
      #get the accuracy for the epoch
      y_pred = model.forward(x) #run the validation set on through the model
      prediction = torch.argmax(y_pred, dim=1) #get the predictions from the model for each sample
      #record predictions to report accuracy
      for i in range(len(prediction)):
        if prediction[i] == y[i]:
          correct_predictions += 1
        total_predictions += 1
    #compute the final accuracy
    acc = (100.0 * (correct_predictions/total_predictions))
    #print(acc)
    return [model, acc]

  #plt.plot(loss_train, label='Training Loss') # loss_train is not defined
  #plt.plot(loss_val, label='Validation Loss') # These plotting lines are commented out
  #plt.plot(accuracy_val, label='Validation Accuracy') # These plotting lines are commented out
  #plt.xlabel('Epoch')
  #plt.ylabel('Loss/Accuracy')
  #plt.ylim(0,100)
  #plt.legend()
  #plt.show()

**run_experiments**

This method does hyperparameter tuning for a specific model type on a specific dataset and returns the best model with its accuracy as well as the runtime for that specific model

**NOTE**

The runtime returned is for the specific model that is best, not the overall runtime of all the models trained

In [14]:
import time
def run_experiments(model_type, data_set):
  lr = [0.001]
  batch_size = [32, 64, 128]
  optimizer = ['adam']
  dropout_rate = [0.0, 0.2, 0.5]

  best_candidate = None
  best_accuracy = 0.0
  best_config = None

  for l in lr:
    for b in batch_size:
      for o in optimizer:
        for d in dropout_rate:
          start_time = time.time()
          #print(f"Testing with learning rate: {l}, batch size: {b}, optimizer: {o}, dropout rate: {d}")
          candidate, candidate_acc = experiment(model_type, data_set, l, b, o, d)
          end_time = time.time()
          if candidate_acc > best_accuracy:
            best_candidate = candidate
            best_accuracy = candidate_acc
            best_batch_size = b
            best_config = {'lr': l, 'batch_size': b, 'optimizer': o, 'dropout_rate': d}
            best_runtime = (end_time - start_time) / 60.0
  return best_candidate, best_accuracy, best_config, best_runtime
#==================================================================================================================

Script to run MNIST all MLP architectures

In [11]:
best_candidate = None
best_accuracy = 0.0
best_config = None

#finding hyperparameters for Shallow Architecture
print("Working on Shallow Architecture... ")
shallow_candidate, shallow_accuracy, shallow_config, shallow_runtime = run_experiments('shallow', 'mnist')
print("Best candidate: ", shallow_config)
print("Validation Accuracy: ", shallow_accuracy)
print("Runtime: ", shallow_runtime, "Min")
best_accuracy = shallow_accuracy
best_candidate = shallow_candidate
best_config = shallow_config

#tuning hyperparameters for medium architecture
print("Working on Medium Architecture... ")
medium_candidate, medium_accuracy, medium_config, medium_runtime = run_experiments('medium', 'mnist')
print("Best Candidate: ", medium_config)
print("Validation Accuracy: ", medium_accuracy)
print("Runtime: ", medium_runtime, "Min")
if medium_accuracy > best_accuracy:
  best_candidate = medium_candidate
  best_accuracy = medium_accuracy
  best_config = medium_config

#tuning hyperparameters for deep architecture
print("Working on Deep Architecture...")
deep_candidate, deep_accuracy, deep_config, deep_runtime = run_experiments('deep', 'mnist')
print("Best Deep Candidate: ", deep_config)
print("Validation Accuracy: ", deep_accuracy)
print(f'Runtime: ', deep_runtime,   'Min')
if deep_accuracy > best_accuracy:
  best_candidate = deep_candidate
  best_accuracy = deep_accuracy
  best_config = deep_config

print("Best candidate: ", best_config)

print("Testing on the test Set: ")
test_loader = DataLoader(mnist_test, shuffle=False, generator=torch.Generator().manual_seed(42))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Define device for testing
best_candidate.to(device) # Send model to device for testing
best_candidate.eval() #evaluation mode
total_predictions = 0
correct_predictions = 0
with torch.no_grad():
  for x, y in test_loader:
    x, y = x.to(device), y.to(device) # Send data to device for testing
    y_pred = best_candidate.forward(x)
    prediction = torch.argmax(y_pred, dim=1) #prediction is the largest output
    for i in range(len(prediction)):
      if prediction[i] == y[i]:
        correct_predictions += 1
      total_predictions += 1
acc = (100.0 * (correct_predictions/total_predictions))
print("Test Accuracy on Final MLP Model: ", acc)

Working on Shallow Architecture... 
Best candidate:  {'lr': 0.001, 'batch_size': 32, 'optimizer': 'adam', 'dropout_rate': 0.0}
Validation Accuracy:  97.61
Runtime:  1.2985139568646749 Min
Working on Medium Architecture... 
Best Candidate:  {'lr': 0.001, 'batch_size': 128, 'optimizer': 'adam', 'dropout_rate': 0.2}
Validation Accuracy:  97.87
Runtime:  1.0283886631329855 Min
Working on Deep Architecture...
Best Deep Candidate:  {'lr': 0.001, 'batch_size': 128, 'optimizer': 'adam', 'dropout_rate': 0.5}
Validation Accuracy:  97.87
Runtime:  1.0498501539230347 Min
Best candidate:  {'lr': 0.001, 'batch_size': 128, 'optimizer': 'adam', 'dropout_rate': 0.2}
Testing on the test Set: 
Test Accuracy on Final MLP Model:  98.0


Script to test MLP Architectures on CIFAR

In [12]:
best_candidate = None
best_accuracy = 0.0
best_config = None

#finding hyperparameters for Shallow Architecture
print("Working on Shallow Architecture... ")
shallow_candidate, shallow_accuracy, shallow_config, shallow_runtime = run_experiments('shallow', 'cifar')
print("Best candidate: ", shallow_config)
print("Validation Accuracy: ", shallow_accuracy)
print("Runtime: ", shallow_runtime, " Min")
best_accuracy = shallow_accuracy
best_candidate = shallow_candidate
best_config = shallow_config

#tuning hyperparameters for medium architecture
print("Working on Medium Architecture... ")
medium_candidate, medium_accuracy, medium_config, medium_runtime = run_experiments('medium', 'cifar')
print("Best Candidate: ", medium_config)
print("Validation Accuracy: ", medium_accuracy)
print("Runtime: ", medium_runtime, " Min")
if medium_accuracy > best_accuracy:
  best_candidate = medium_candidate
  best_accuracy = medium_accuracy
  best_config = medium_config

#tuning hyperparameters for deep architecture
print("Working on Deep Architecture...")
deep_candidate, deep_accuracy, deep_config, deep_runtime = run_experiments('deep', 'cifar')
print("Best Candidate: ", deep_config)
print("Validation Accuracy: ", deep_accuracy)
print('Runtime: ', deep_runtime,   ' Min')
if deep_accuracy > best_accuracy:
  best_candidate = deep_candidate
  best_accuracy = deep_accuracy
  best_config = deep_config

print("Best candidate: ", best_config)

print("Testing on the test Set: ")
test_loader = DataLoader(cifar_test, shuffle=False, generator=torch.Generator().manual_seed(42))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Define device for testing
best_candidate.to(device) # Send model to device for testing
best_candidate.eval() #evaluation mode
total_predictions = 0
correct_predictions = 0
with torch.no_grad():
  for x, y in test_loader:
    x, y = x.to(device), y.to(device) # Send data to device for testing
    y_pred = best_candidate.forward(x)
    prediction = torch.argmax(y_pred, dim=1) #prediction is the largest output
    for i in range(len(prediction)):
      if prediction[i] == y[i]:
        correct_predictions += 1
      total_predictions += 1
acc = (100.0 * (correct_predictions/total_predictions))
print("Test Accuracy on Final MLP Model: ", acc)

Working on Shallow Architecture... 
Best candidate:  {'lr': 0.001, 'batch_size': 64, 'optimizer': 'adam', 'dropout_rate': 0.0}
Validation Accuracy:  46.339999999999996
Runtime:  1.1633050839106243  Min
Working on Medium Architecture... 
Best Candidate:  {'lr': 0.001, 'batch_size': 128, 'optimizer': 'adam', 'dropout_rate': 0.0}
Validation Accuracy:  49.120000000000005
Runtime:  1.0917295734087626  Min
Working on Deep Architecture...
Best Candidate:  {'lr': 0.001, 'batch_size': 64, 'optimizer': 'adam', 'dropout_rate': 0.0}
Validation Accuracy:  48.14
Runtime:  1.2163305242856344  Min
Best candidate:  {'lr': 0.001, 'batch_size': 128, 'optimizer': 'adam', 'dropout_rate': 0.0}
Testing on the test Set: 
Test Accuracy on Final MLP Model:  50.33


CNN Architectures for MNIST

In [15]:
best_candidate = None
best_accuracy = 0.0
best_config = None

#finding hyperparameters for Baseline Architecture
print("Working on Baseline Architecture... ")
Baseline_candidate, Baseline_accuracy, Baseline_config, Baseline_runtime = run_experiments('baseline', 'mnist')
print("Best candidate: ", Baseline_config)
print("Validation Accuracy: ", Baseline_accuracy)
print("Runtime: ", Baseline_runtime, "Min")
best_accuracy = Baseline_accuracy
best_candidate = Baseline_candidate
best_config = Baseline_config

#tuning hyperparameters for medium architecture
print("Working on Enhanced Architecture... ")
enhanced_candidate, enhanced_accuracy, enhanced_config, enhanced_runtime = run_experiments('enhanced', 'mnist')
print("Runtime: ", enhanced_runtime, " Min")
print("Validation Accuracy:" , enhanced_accuracy)
print("Best Candidate: ", enhanced_config)
if enhanced_accuracy > best_accuracy:
  best_candidate = enhanced_candidate
  best_accuracy = enhanced_accuracy
  best_config = enhanced_config

#tuning hyperparameters for deep architecture
print("Working on Deep Architecture...")
deep_candidate, deep_accuracy, deep_config, deep_runtime = run_experiments('deep_cnn', 'mnist')
print("Best Deep Candidate: ", deep_config)
print("Validation Accuracy: ", deep_accuracy)
print(f'Runtime: ', deep_runtime,   ' Min')
if deep_accuracy > best_accuracy:
  best_candidate = deep_candidate
  best_accuracy = deep_accuracy
  best_config = deep_config

print("Best candidate: ", best_config)

print("Testing on the test Set: ")
test_loader = DataLoader(mnist_test, shuffle=False, generator=torch.Generator().manual_seed(42))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Define device for testing
best_candidate.to(device) # Send model to device for testing
best_candidate.eval() #evaluation mode
total_predictions = 0
correct_predictions = 0
with torch.no_grad():
  for x, y in test_loader:
    x, y = x.to(device), y.to(device) # Send data to device for testing
    y_pred = best_candidate.forward(x)
    prediction = torch.argmax(y_pred, dim=1) #prediction is the largest output
    for i in range(len(prediction)):
      if prediction[i] == y[i]:
        correct_predictions += 1
      total_predictions += 1
acc = (100.0 * (correct_predictions/total_predictions))
print("Test Accuracy on Final MLP Model: ", acc)

Working on Baseline Architecture... 
Best candidate:  {'lr': 0.001, 'batch_size': 32, 'optimizer': 'adam', 'dropout_rate': 0.2}
Validation Accuracy:  98.65
Runtime:  1.4425546646118164 Min
Working on Enhanced Architecture... 
Runtime:  1.5719817479451497  Min
Validation Accuracy: 98.78
Best Candidate:  {'lr': 0.001, 'batch_size': 32, 'optimizer': 'adam', 'dropout_rate': 0.2}
Working on Deep Architecture...
Best Deep Candidate:  {'lr': 0.001, 'batch_size': 32, 'optimizer': 'adam', 'dropout_rate': 0.5}
Validation Accuracy:  99.19
Runtime:  1.7923338770866395  Min
Best candidate:  {'lr': 0.001, 'batch_size': 32, 'optimizer': 'adam', 'dropout_rate': 0.5}
Testing on the test Set: 
Test Accuracy on Final MLP Model:  99.31


CNN Architectures for CIFAR Dataset

In [16]:
best_candidate = None
best_accuracy = 0.0
best_config = None

#finding hyperparameters for Baseline Architecture
print("Working on Baseline Architecture... ")
Baseline_candidate, Baseline_accuracy, Baseline_config, Baseline_runtime = run_experiments('baseline', 'cifar')
print("Best candidate: ", Baseline_config)
print("Validation Accuracy: ", Baseline_accuracy)
print("Runtime: ", Baseline_runtime, "Min")
best_accuracy = Baseline_accuracy
best_candidate = Baseline_candidate
best_config = Baseline_config

#tuning hyperparameters for medium architecture
print("Working on Enhanced Architecture... ")
enhanced_candidate, enhanced_accuracy, enhanced_config, enhanced_runtime = run_experiments('enhanced', 'cifar')
print("Validation Accuracy:" , enhanced_accuracy)
print("Best Candidate: ", enhanced_config)
print("Runtime: ", enhanced_runtime, " Min")
if enhanced_accuracy > best_accuracy:
  best_candidate = enhanced_candidate
  best_accuracy = enhanced_accuracy
  best_config = enhanced_config

#tuning hyperparameters for deep architecture
print("Working on Deep Architecture...")
deep_candidate, deep_accuracy, deep_config, deep_runtime = run_experiments('deep_cnn', 'cifar')
print("Best Deep Candidate: ", deep_config)
print("Validation Accuracy: ", deep_accuracy)
print(f'Runtime: ', deep_runtime,   ' Min')
if deep_accuracy > best_accuracy:
  best_candidate = deep_candidate
  best_accuracy = deep_accuracy
  best_config = deep_config

print("Best candidate: ", best_config)

print("Testing on the test Set: ")
test_loader = DataLoader(cifar_test, shuffle=False, generator=torch.Generator().manual_seed(42))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Define device for testing
best_candidate.to(device) # Send model to device for testing
best_candidate.eval() #evaluation mode
total_predictions = 0
correct_predictions = 0
with torch.no_grad():
  for x, y in test_loader:
    x, y = x.to(device), y.to(device) # Send data to device for testing
    y_pred = best_candidate.forward(x)
    prediction = torch.argmax(y_pred, dim=1) #prediction is the largest output
    for i in range(len(prediction)):
      if prediction[i] == y[i]:
        correct_predictions += 1
      total_predictions += 1
acc = (100.0 * (correct_predictions/total_predictions))
print("Test Accuracy on Final MLP Model: ", acc)

Working on Baseline Architecture... 
Best candidate:  {'lr': 0.001, 'batch_size': 64, 'optimizer': 'adam', 'dropout_rate': 0.0}
Validation Accuracy:  67.9
Runtime:  1.2014027078946432 Min
Working on Enhanced Architecture... 
Validation Accuracy: 69.92
Best Candidate:  {'lr': 0.001, 'batch_size': 32, 'optimizer': 'adam', 'dropout_rate': 0.2}
Runtime:  1.541283110777537  Min
Working on Deep Architecture...
Best Deep Candidate:  {'lr': 0.001, 'batch_size': 32, 'optimizer': 'adam', 'dropout_rate': 0.5}
Validation Accuracy:  80.0
Runtime:  1.7887909531593322  Min
Best candidate:  {'lr': 0.001, 'batch_size': 32, 'optimizer': 'adam', 'dropout_rate': 0.5}
Testing on the test Set: 
Test Accuracy on Final MLP Model:  79.34
