# setup the model and load in the dataset


This part sets up the dataset

In [2]:
STUDENTID = 567     # this will be used for random states

In [3]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import logging
from utils import *
import torch.optim as optim

# Define data transformations
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert PIL Image to PyTorch Tensor
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize the data
])

# Download and load CIFAR-100 datasets
train_dataset = torchvision.datasets.CIFAR100(root='./data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR100(root='./data', train=False, transform=transform, download=True)


# Calculate the sizes for train, validation, and test sets
train_size = int(0.8 * len(train_dataset))
valid_size = len(train_dataset) - train_size

# Split the train dataset into train and validation sets using a random seed
train_dataset, valid_dataset = torch.utils.data.random_split(train_dataset, [train_size, valid_size], generator=torch.Generator().manual_seed(STUDENTID))

# Create data loaders for training, validation, and test sets
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Print dataset sizes
print("Number of training examples:", len(train_dataset))
print("Number of validation examples:", len(valid_dataset))
print("Number of test examples:", len(test_dataset))



Files already downloaded and verified
Files already downloaded and verified
Number of training examples: 40000
Number of validation examples: 10000
Number of test examples: 10000


Setting up the train, evaluation, and test functions


Setting up the neural network model


In [5]:
# creating a model that automatically runs the forward function i guess, since it is easier
import torch
import torch.nn as nn
import torch.optim as optim

# i can set up a parent model, where the forward and train can be overwritten so that i can customize any neural network i want
# i can probably setup the train, test, and eval function from somewhere else


class NeuralNetwork(nn.Module):
  def __init__(self, num_classes: int = 100):
    super(NeuralNetwork, self).__init__()

    # define the layers of the neural network
    self.features = nn.Sequential(
      #slightly modified version of alexnet to make it smaller

      #Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode='zeros', device=None, dtype=None)
      nn.Conv2d(3, 64, (3, 3), (4, 4), (2, 2)),
      nn.ReLU(True),
      nn.BatchNorm2d(64),
      # MaxPool2d(kernel_size, stride=None, padding=0, dilation=1, return_indices=False, ceil_mode=False)
      nn.MaxPool2d((3, 3), (2, 2)),
      nn.Conv2d(64, 192, (3, 3), (1, 1), (1, 1)),
      nn.ReLU(True),
      nn.BatchNorm2d(192),
      nn.Conv2d(192, 192, (3, 3), (1, 1), (1, 1)),
      nn.ReLU(True),
      nn.BatchNorm2d(192),
      nn.MaxPool2d((3, 3), (2, 2)),
    )

    self.avgpool = nn.AdaptiveAvgPool2d((6, 6))

    self.classifier = nn.Sequential(
      nn.Dropout(0.5),
      # this one i need to recalculate
      nn.Linear(6912, 4096),
      nn.ReLU(True),
      nn.Dropout(0.5),
      nn.Linear(4096, 4096),
      nn.ReLU(True),
      nn.Linear(4096, num_classes),
      # it seems like i dont need a softmax activation function here, as it automatically does it with cross entropy loss
    )

    print('define the self')

  def forward(self, x):
    out = self.features(x)
    out = self.avgpool(out)
    out = torch.flatten(out, 1)   # this one is for the batches, to resize it so that it wont have an issue
    out = self.classifier(out)
    return out


In [8]:
# i would likely make it so that to use the training and evaluation function, i will just pass the arguments, network, and the logger into the function and they will train it automatically for me, seems like an easier implementation anyways compared to trying to shove it into the class itself i guess
from utils import *

def train(model, argDict, givenDataloader):
  # get all the stuff out
  # update the learning rate of the optimizer
  for param_group in argDict['optimizer'].param_groups:
    param_group['lr'] = argDict['lr']

  # get the device type, and set it to cuda
  if torch.cuda.is_available():
    device = torch.device('cuda')
  else:
    device = torch.device('cpu')

  # casting model to device
  model.to(device)

  # training for multiple epochs
  epoch_accuracy_values = []
  epoch_loss_values = []

  best_epoch_value = 0
  best_epoch_epoch = 0

  for currentEpoch in range(argDict['maxEpoch']):
    accuracy_values = []
    loss_values = []

    for idx, (data, label) in enumerate(givenDataloader):
      data = data.to(device)
      label = label.to(device)

      # this will be the training loop
      outputs = model(data)

      loss = argDict['criterion'](outputs, label)

      # backward pass and optimization
      argDict['optimizer'].zero_grad()
      loss.backward()
      argDict['optimizer'].step()

      # data logging phase, obtains loss and accuracy
      loss_values.append((loss.item()))

      # getting the accuracy
      _, predicted = torch.max(outputs, 1)
      accuracy = (predicted == label).float().mean()
      accuracy_values.append(accuracy)

    # calculating epoch losses
    epoch_loss = np.mean(loss_values)
    epoch_loss_values.append(epoch_loss)
    epoch_accuracy = torch.mean(torch.stack(accuracy_values))   # due to it being tensor
    epoch_accuracy_values.append(epoch_accuracy)

    tempString = 'currently at epoch ' + str(currentEpoch) + ' accuracy: ' + str(epoch_accuracy) + ' loss of: ' + str(epoch_loss)
    argDict['logger'].log(tempString)

    # evaluating whether to break training or not
    if epoch_accuracy > best_epoch_value:
      best_epoch_value = epoch_accuracy
      best_epoch_epoch = currentEpoch

      # save the model as well
      save_model_to_file(model, argDict['outputName'], argDict['outputName'])
    else:
      if (currentEpoch - best_epoch_epoch) > argDict['idleEpoch']:
        # this means that this is the max trained  epoch
        break

  argDict['epoch_loss_values'] = epoch_loss_values
  argDict['epoch_accuracy_values'] = epoch_accuracy_values
  argDict['trainingStopEpoch'] = currentEpoch

  # saves the dictionary as well
  return argDict


currently at epoch 0 accuracy: tensor(0.0254, device='cuda:0') loss of: 4.530574586486816
currently at epoch 1 accuracy: tensor(0.0546, device='cuda:0') loss of: 4.253593740081787
currently at epoch 2 accuracy: tensor(0.0811, device='cuda:0') loss of: 4.061206823348999
currently at epoch 3 accuracy: tensor(0.1014, device='cuda:0') loss of: 3.900673265838623
currently at epoch 4 accuracy: tensor(0.1213, device='cuda:0') loss of: 3.765455725860596
currently at epoch 5 accuracy: tensor(0.1402, device='cuda:0') loss of: 3.654823439788818
currently at epoch 6 accuracy: tensor(0.1544, device='cuda:0') loss of: 3.5533440673828127
currently at epoch 7 accuracy: tensor(0.1710, device='cuda:0') loss of: 3.464891162490845
currently at epoch 8 accuracy: tensor(0.1836, device='cuda:0') loss of: 3.380004871749878
currently at epoch 9 accuracy: tensor(0.2006, device='cuda:0') loss of: 3.2977186016082762
currently at epoch 10 accuracy: tensor(0.2112, device='cuda:0') loss of: 3.2327966472625733
curren

KeyboardInterrupt: 

In [None]:
model = NeuralNetwork()

argDict = {
  'logger': MyLogger("FirstModelTest.log"),
  'lr': 0.001,
  'maxEpoch': 250,
  'idleEpoch': 25,
  'outputName': 'FirstModelTest',
  'optimizer': optim.SGD(model.parameters(), lr=0.001),
  'criterion': nn.CrossEntropyLoss()
}

temp = train(model, argDict, train_loader)
save_dict_to_file(str(temp), argDict['outputName'], argDict['outputName'])

del model
del argDict

In [None]:
model = NeuralNetwork()

argDict = {
  'logger': MyLogger("higherLearningRate.log"),
  'lr': 0.003,
  'maxEpoch': 250,
  'idleEpoch': 25,
  'outputName': 'higherLearningRate',
  'optimizer': optim.SGD(model.parameters(), lr=0.001),
  'criterion': nn.CrossEntropyLoss()
}

temp = train(model, argDict, train_loader)
save_dict_to_file(str(temp), argDict['outputName'], argDict['outputName'])

del model
del argDict

In [None]:
model = NeuralNetwork()

argDict = {
  'logger': MyLogger("lowerLearningRate.log"),
  'lr': 0.0005,
  'maxEpoch': 250,
  'idleEpoch': 25,
  'outputName': 'lowerLearningRate',
  'optimizer': optim.SGD(model.parameters(), lr=0.001),
  'criterion': nn.CrossEntropyLoss()
}

temp = train(model, argDict, train_loader)
save_dict_to_file(str(temp), argDict['outputName'], argDict['outputName'])

del model
del argDict

In [15]:
def save_dict_to_file(dict, folder_path, filename):
  import json
  import os
  def is_json_serializable(obj):
    try:
      json.dumps(obj)
      return True
    except:
      return False

  def copy_dict_with_serializable_items(original_dict):
    new_dict = {}
    for key, value in original_dict.items():
      if is_json_serializable(value):
        new_dict[key] = value
    return new_dict

  # Check if the folder exists, and create it if not
  if not os.path.exists(folder_path):
    os.makedirs(folder_path)

  # Add the ".pth" extension to the filename if missing
  if not filename.endswith(".json"):
    filename += ".json"

  # modify the .json file so that it can be saved to file
  new_dict = copy_dict_with_serializable_items(dict)

  # Save the dictionary to a JSON file
  filename = os.path.join(folder_path, filename)
  with open(filename, "w") as json_file:
    json.dump(new_dict, json_file)

save_dict_to_file(argDict, argDict['outputName'], argDict['outputName'])