# Optimization for machine learning - Mini Project: Large mini-batch baseline

## 1. Loading libraries, modules and setting directories

In [None]:
## Run this cell if PyTorch is not installed on Colab
#!pip3 install torch torchvision

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

from google.colab import drive
drive.mount('/content/gdrive', force_remount= True)

import sys
sys.path.append('gdrive/My Drive/Colab Notebooks/OptML')

import torch
import torchvision
import torchvision.transforms as transforms

from models import CNN as CNN

import time
import math

Mounted at /content/gdrive


### Creating device and checking GPU availability 

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

## 2. Downloading and normalizing CIFAR 10

In [None]:
# Normalize dataset and transform into tensor
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5),(0.5, 0.5, 0.5))
    ]
)

In [None]:
# Load dataset and convert images to tensors
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

classes = ('plane', 'car', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck')

used_categories = range(len(classes))

Files already downloaded and verified
Files already downloaded and verified


## 3. Training the network 

#### Training and testing is set up with a simple validation loop (no CV), looping over for the trainset for a specified number of epochs. At every epoch, the loss, accuracy and time is captured as raw data for later plotting. If GPU was available the model is transferred to the device along with a transformation of the tensor into cuda. To load the data for every batchsize, the torch Dataloader has been utilized. The Cross Entropy loss is used as the loss function and the optimizer used througout the training is the SGD (no momentum or weight-decay). Learning rate equal to 0.001 for all of the small minibatches.

In [None]:

import torch.optim as optim


# Batch sizes and number of epochs - as there is no need to run 400 epochs for the smaller batch sizes (128, 256), we suggest the following setup for running code below:
# (batches, num_epochs): (128, 100), (256, 150), (512, 200), (1048, 300), (2048, 400)
batches = [128, 256, 512, 1024, 2048]
num_epoch = 400

# Lists for storing results
accuracies = []
epochs = []
time_pr_epoch = []
batch_sizes = []
loss_pr_epoch = []

for BATCH_SIZE in batches:
  model = CNN()
  model.to(device)
  print("######### BATCH SIZE = ",BATCH_SIZE," ######### " )

  # Initialize trainloader and test loader   
  trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                            shuffle=True, num_workers=2)
  testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2)

  criterion =  torch.nn.CrossEntropyLoss()
  optimizer = optim.SGD(model.parameters(),lr=0.001)
  for epoch in range(num_epoch):  # loop over the dataset multiple times
      start_time = time.time()

      running_loss = 0.0
      running_loss_for_epoch = 0.0
      for i, data in enumerate(trainloader, 0):
          # get the inputs
          inputs, labels = data
          

          # wrap them in Variable
          inputs, labels = inputs.to(device), labels.to(device)

          # zero the parameter gradients
          optimizer.zero_grad()

          # forward + backward + optimize
          outputs = model(inputs)
          loss = criterion(outputs,labels)
          loss.backward()
          optimizer.step()
          

          # print statistics
          running_loss += loss.item()
          running_loss_for_epoch += loss.item()
          if i % 100 == 99:    # print every 100 mini-batches
              print('[epoch : %d, minibatch : %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 100))
              running_loss = 0.0

          correct = 0
          total = 0

      for i, data in enumerate(testloader, 0):
          inputs, labels = data
          inputs, labels = inputs.to(device), labels.to(device)
          #outputs = model(Variable(inputs))
          outputs = model(inputs)
          _, predicted = torch.max(outputs.data, 1)
          total += labels.size(0)
          correct += (predicted == labels).sum().item()

      print('Accuracy of the network on the {} test images: {:4.2f} %'.format(
          testset.data.shape[0], 100 * (correct / total)))

      finish_time = (time.time() - start_time)
      batches_pr_epoch = math.ceil(trainset.data.shape[0] / BATCH_SIZE)
      loss_pr_epoch.append(running_loss_for_epoch / batches_pr_epoch)
      accuracies.append(100 * (correct / total))
      batch_sizes.append(BATCH_SIZE)
      epochs.append(epoch)
      time_pr_epoch.append(finish_time)
      
    
  print('Finished Training')

  
print("loss pr epoch", loss_pr_epoch)
print("accuracies", accuracies)
print("batch_sizes", batch_sizes)
print("epochs", epochs)
print("time_pr_epoch", time_pr_epoch)

######### BATCH SIZE =  16  ######### 
[epoch : 1, minibatch :   100] loss: 2.302
[epoch : 1, minibatch :   200] loss: 2.300
[epoch : 1, minibatch :   300] loss: 2.294
[epoch : 1, minibatch :   400] loss: 2.292
[epoch : 1, minibatch :   500] loss: 2.289
[epoch : 1, minibatch :   600] loss: 2.283
[epoch : 1, minibatch :   700] loss: 2.277
[epoch : 1, minibatch :   800] loss: 2.268
[epoch : 1, minibatch :   900] loss: 2.262
[epoch : 1, minibatch :  1000] loss: 2.256
[epoch : 1, minibatch :  1100] loss: 2.242
[epoch : 1, minibatch :  1200] loss: 2.230
[epoch : 1, minibatch :  1300] loss: 2.215
[epoch : 1, minibatch :  1400] loss: 2.191
[epoch : 1, minibatch :  1500] loss: 2.165
[epoch : 1, minibatch :  1600] loss: 2.128
[epoch : 1, minibatch :  1700] loss: 2.091
[epoch : 1, minibatch :  1800] loss: 2.051
[epoch : 1, minibatch :  1900] loss: 2.021


## 4. Saving results

#### Results are saved in a tab-seperated csv.

In [None]:
import pandas as pd

results = pd.DataFrame(
    {'batch_size': batch_sizes,
     'epoch': epochs,
     'accuracy': accuracies,
     'loss': loss_pr_epoch,
     'time': time_pr_epoch
    })

results.to_csv('gdrive/MyDrive/results_large_batches_wlr_2048.csv', sep ='\t')