In [0]:
######################################
############ 1. load data ############
######################################

# CIFAR 10 documentation: 50 000 training instances
# 32 pixels x 32 pixels x 3 channels, 10 classes, 10 000 test instances

# TUTORIAL: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html?fbclid=IwAR3VHznvftP-KPTgY0Ffu6wNLD7Lx0iYVhNzJqlrt1Ef3frcZrDqc_moru4

import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
from torch.utils.data.sampler import SubsetRandomSampler
import torch.optim as optim
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import math

### optional data augmentation ###
augmented_transform = transforms.Compose(
    [transforms.RandomChoice([transforms.RandomHorizontalFlip(p=0.5), transforms.RandomRotation(degrees=30), transforms.RandomVerticalFlip(p=0.5)]),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# batch size: how many instances do you run through before moving in the direction of the gradient (minibatch-SGD)
batch = 20

# training data
traindata = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

# optional training data augmentation
#traindata = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=augmented_transform)

# use 20% of training data as a validation set
trainset, valset = torch.utils.data.random_split(traindata, [40000, 10000])
# load validation set
valloader = torch.utils.data.DataLoader(valset, batch_size=batch, shuffle=True, num_workers=2)

# load test set
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


possible hyper-parameters:


*   **f**: filter size (3, 4 or 5)
*   **max_channels**: number of channels output by last convolution (16 or 18)
*   **convs**: number of convolutional layers (2, 3 or 4)
*   **fcs**: number of fully connected layers (3, 4 or 5)
*   **pool**: different pooling options
        1: max pool LAST layer only
        2: avg pool LAST layer only
        3: max pool LAST two layers
        4: avg pool LAST two layers
        5: max pool before-last layer, avg pool last layer
        6: avg pool before-last layer, max pool last layer

In [0]:
##################################################
### 2. functions to generate customizable CNNs ###
##################################################

def define_stuff(f,max_channels,convs,fcs,pool):
    def pool_dim(s):
        s = math.floor(((s-2)/2)+1)
        return s
    def conv_dim(s, f):
        s = s - f + 1
        return s

    # array with numbers of channels output by each convolution
    channel_options = [[6,16],[6,18],[6,10,16],[6,12,18],[5,10,15,18]]

    # 2 conv layers:
    if (max_channels*convs == 32 or max_channels*convs == 36):
        if (max_channels*convs == 32): channels=channel_options[0]
        else: channels=channel_options[1]
        last_channel = channels[1]
        if (pool == 1 or pool == 2): s = pool_dim(conv_dim(conv_dim(32, f),f))
        if (pool != 1 and pool != 2): s = pool_dim(conv_dim(pool_dim(conv_dim(32, f)),f))
    # 3 conv layers:
    if (max_channels*convs == 48 or max_channels*convs == 54):
        if (max_channels*convs == 48): channels=channel_options[2]
        else: channels=channel_options[3]
        last_channel = channels[2]
        if (pool == 1 or pool == 2): s = pool_dim(conv_dim(conv_dim(conv_dim(32, f),f),f))
        if (pool != 1 and pool != 2):s = pool_dim(conv_dim(pool_dim(conv_dim(conv_dim(32, f),f)),f))
    # 4 conv layers:
    if (max_channels*convs == 72): 
        channels=channel_options[4]
        last_channel = channels[3]
        if (pool == 1 or pool == 2): s = pool_dim(conv_dim(conv_dim(conv_dim(conv_dim(32, f),f),f),f))
        if (pool != 1 and pool != 2): s = pool_dim(conv_dim(pool_dim(conv_dim(conv_dim(conv_dim(32, f),f),f)),f))
    return channels, last_channel, s
class Net(nn.Module):
    def __init__(self,f,max_channels,convs,fcs,pool):
        super(Net, self).__init__()
        channels, last_channel, s = define_stuff(f,max_channels,convs,fcs,pool)
        self.conv1 = nn.Conv2d(3, channels[0],f)
        self.conv2 = nn.Conv2d(channels[0], channels[1], f)
        if len(channels) >= 3:
            self.conv3 = nn.Conv2d(channels[1], channels[2], f)
            if len(channels) >= 4:
                self.conv4 = nn.Conv2d(channels[2], channels[3], f)
        self.max_pool = nn.MaxPool2d(2, 2)
        self.avg_pool = nn.AvgPool2d(2, 2)
        if (fcs == 3):
            self.fc1 = nn.Linear(last_channel * s * s, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 10)
        if (fcs == 4):   
            self.fc1 = nn.Linear(last_channel * s * s, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 54)
            self.fc4 = nn.Linear(54, 10)
        if (fcs == 5):   
            self.fc1 = nn.Linear(last_channel * s * s, 120)
            self.fc2 = nn.Linear(120, 90)
            self.fc3 = nn.Linear(90, 80)       
            self.fc4 = nn.Linear(80, 64)
            self.fc5 = nn.Linear(64, 10)      
    def forward(self,x,f,max_channels,convs,fcs,pool):
        channels, last_channel, s = define_stuff(f,max_channels,convs,fcs,pool)
        if len(channels) == 2:
            if (pool == 1):
                x = F.relu(self.conv1(x))
                x = self.max_pool(F.relu(self.conv2(x)))
            if (pool == 2):
                x = F.relu(self.conv1(x))
                x = self.avg_pool(F.relu(self.conv2(x)))
            if (pool == 3):
                x = self.max_pool(F.relu(self.conv1(x)))
                x = self.max_pool(F.relu(self.conv2(x)))
            if (pool == 4):
                x = self.avg_pool(F.relu(self.conv1(x)))
                x = self.avg_pool(F.relu(self.conv2(x)))
            if (pool == 5):
                x = self.max_pool(F.relu(self.conv1(x)))
                x = self.avg_pool(F.relu(self.conv2(x)))
            if (pool == 6):
                x = self.avg_pool(F.relu(self.conv1(x)))
                x = self.max_pool(F.relu(self.conv2(x)))
        if len(channels) == 3:
            if (pool == 1):
                x = F.relu(self.conv1(x))
                x = F.relu(self.conv2(x))
                x = self.max_pool(F.relu(self.conv3(x)))
            if (pool == 2):
                x = F.relu(self.conv1(x))
                x = F.relu(self.conv2(x))
                x = self.avg_pool(F.relu(self.conv3(x)))
            if (pool == 3):
                x = F.relu(self.conv1(x))
                x = self.max_pool(F.relu(self.conv2(x)))
                x = self.max_pool(F.relu(self.conv3(x)))
            if (pool == 4):
                x = F.relu(self.conv1(x))
                x = self.avg_pool(F.relu(self.conv2(x)))
                x = self.avg_pool(F.relu(self.conv3(x)))
            if (pool == 5):
                x = F.relu(self.conv1(x))
                x = self.max_pool(F.relu(self.conv2(x)))
                x = self.avg_pool(F.relu(self.conv3(x)))
            if (pool == 6):
                x = F.relu(self.conv1(x))
                x = self.avg_pool(F.relu(self.conv2(x)))
                x = self.max_pool(F.relu(self.conv3(x)))
        if len(channels) == 4:
            if (pool == 1):
                x = F.relu(self.conv1(x))
                x = F.relu(self.conv2(x))
                x = F.relu(self.conv3(x))
                x = self.max_pool(F.relu(self.conv4(x)))
            if (pool == 2):
                x = F.relu(self.conv1(x))
                x = F.relu(self.conv2(x))
                x = F.relu(self.conv3(x))
                x = self.avg_pool(F.relu(self.conv4(x)))
            if (pool == 3):
                x = F.relu(self.conv1(x))
                x = F.relu(self.conv2(x))
                x = self.max_pool(F.relu(self.conv3(x)))
                x = self.max_pool(F.relu(self.conv4(x)))
            if (pool == 4):
                x = F.relu(self.conv1(x))
                x = F.relu(self.conv2(x))
                x = self.avg_pool(F.relu(self.conv3(x)))
                x = self.avg_pool(F.relu(self.conv4(x)))
            if (pool == 5):
                x = F.relu(self.conv1(x))
                x = F.relu(self.conv2(x))
                x = self.max_pool(F.relu(self.conv3(x)))
                x = self.avg_pool(F.relu(self.conv4(x)))
            if (pool == 6):
                x = F.relu(self.conv1(x))
                x = F.relu(self.conv2(x))
                x = self.avg_pool(F.relu(self.conv3(x)))
                x = self.max_pool(F.relu(self.conv4(x)))
        x = x.view(-1, last_channel * s * s) 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        if (fcs == 3):
            x = self.fc3(x)
            return x
        else:
            x = F.relu(self.fc3(x))
            if (fcs == 4):
                x = self.fc4(x)
                return x
            else:
                x = F.relu(self.fc4(x))
                x = self.fc5(x)
                return x

In [0]:
############################################
### 3. function to train cutomizable CNN ###
############################################

def train_model(num_epochs,f,max_channels,convs,fcs,pool, opt,batch):
    stop = 0 # variable that will be used for early stopping

    epoch_accuracy = [] # stores validation accuracy per epoch
    train_accuracy = [] # stores training accuracy per epoch
    test_accuracy = [] # stores test accuracy per epoch

    losses_tuples = [] # stores average loss per batch of 2000 instances
    net = Net(f,max_channels,convs,fcs,pool) # generate net
    criterion = nn.CrossEntropyLoss() # loss function: cross-entropy loss
    
    # define optimier:
    if opt == 1: optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
    if opt == 2: optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.5)
    if opt == 3: optimizer = optim.Adam(net.parameters(),lr=0.001)
    if opt == 4: optimizer = optim.Adagrad(net.parameters(), lr=0.01, lr_decay=0, weight_decay=0, initial_accumulator_value=0, eps=1e-10)
    if opt == 5: optimizer = optim.Adadelta(net.parameters(), lr=1.0, rho=0.9, eps=1e-06, weight_decay=0)
    if opt == 6: optimizer = optim.AdamW(net.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08, weight_decay=0.01, amsgrad=False)
    if opt == 8: optimizer = optim.Adamax(net.parameters(), lr=0.002, betas=(0.9, 0.999), eps=1e-08, weight_decay=0)
    if opt == 9: optimizer = optim.ASGD(net.parameters(), lr=0.01, lambd=0.0001, alpha=0.75, t0=1000000.0, weight_decay=0)
    if opt == 10: optimizer = optim.RMSprop(net.parameters(), lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False)
    if opt == 11: optimizer = optim.Rprop(net.parameters(), lr=0.01, etas=(0.5, 1.2), step_sizes=(1e-06, 50))
    
    # variables to keep track of for early stopping
    curr = 0
    prev = 0
    prev_prev = 0

    # loop over the dataset multiple times
    for epoch in range(num_epochs): 
        # shuffle training data with each iteration
        trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch, shuffle=True, num_workers=2)
        running_loss = 0.0
        losses = []
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs,f,max_channels,convs,fcs,pool)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
                losses_tuples.append((epoch + 1, i + 1, running_loss/ 2000))
                running_loss = 0.0

        # training data accuracy
        correct = 0
        total = 0
        with torch.no_grad():
            for j, data in enumerate(trainloader, 0): 
                images, labels = data
                outputs = net(images,f,max_channels,convs,fcs,pool)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            train_accuracy.append((epoch + 1, 100 * correct / total))

        # test data accuracy
        correct = 0
        total = 0
        with torch.no_grad():
            for j, data in enumerate(testloader, 0): 
                images, labels = data
                outputs = net(images,f,max_channels,convs,fcs,pool)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            test_accuracy.append((epoch + 1, 100 * correct / total))

        # validation data accuracy (and implementation of early stopping!)
        correct = 0
        total = 0
        with torch.no_grad():
            for i, data in enumerate(valloader, 0): 
                images, labels = data
                outputs = net(images,f,max_channels,convs,fcs,pool)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            epoch_accuracy.append((epoch + 1, 100 * correct / total))
            print('Accuracy of the network on validation set: %d %%' % (100 * correct / total))

        # implementation of early stopping:
            curr = 100 * correct / total
            if (epoch == 0): prev_prev = curr
            if (epoch == 1): prev = curr
            if (epoch >= 2):
                if (prev_prev == prev == curr):
                    stop += 1
                if (prev_prev >= curr):
                    stop += 1
                prev_prev = prev
                prev = curr
        if stop == 1: break
    
    # save outputs:
    pd.DataFrame(losses_tuples, columns = ['epoch','iteration','loss']).to_csv(f'{num_epochs}epochs_{batch}batch_{f}filters_{max_channels}channels_{convs}convs_{fcs}fcs_{pool}pool_{opt}optim_loss_aug.csv')
    pd.DataFrame(epoch_accuracy, columns = ['epoch','iteration']).to_csv(f'{num_epochs}epochs_{batch}batch_{f}filters_{max_channels}channels_{convs}convs_{fcs}fcs_{pool}pool__{opt}optim_validation_accuracy_aug.csv')
    pd.DataFrame(train_accuracy, columns = ['epoch','iteration']).to_csv(f'{num_epochs}epochs_{batch}batch_{f}filters_{max_channels}channels_{convs}convs_{fcs}fcs_{pool}pool__{opt}optim_train_accuracy_aug.csv')
    pd.DataFrame(test_accuracy, columns = ['epoch','iteration']).to_csv(f'{num_epochs}epochs_{batch}batch_{f}filters_{max_channels}channels_{convs}convs_{fcs}fcs_{pool}pool__{opt}optim_test_accuracy_aug.csv')
    print('Finished Training')

In [0]:
#############################
### 4. Running the models ###
#############################

# Final model parameters:
num_epochs=40
f=3
max_channels=18
convs=3
fcs=3
pool=3
opt=8
batch=20 

train_model(num_epochs,f,max_channels,convs,fcs,pool,opt,batch)

[1,  2000] loss: 1.783
Accuracy of the network on validation set: 41 %
[2,  2000] loss: 1.512
Accuracy of the network on validation set: 45 %
[3,  2000] loss: 1.401
Accuracy of the network on validation set: 51 %
[4,  2000] loss: 1.311
Accuracy of the network on validation set: 52 %
[5,  2000] loss: 1.248
Accuracy of the network on validation set: 55 %
[6,  2000] loss: 1.196
Accuracy of the network on validation set: 57 %
[7,  2000] loss: 1.154
Accuracy of the network on validation set: 58 %
[8,  2000] loss: 1.121
Accuracy of the network on validation set: 58 %
[9,  2000] loss: 1.091
Accuracy of the network on validation set: 60 %
[10,  2000] loss: 1.061
Accuracy of the network on validation set: 61 %
[11,  2000] loss: 1.036
Accuracy of the network on validation set: 62 %
[12,  2000] loss: 1.013
Accuracy of the network on validation set: 62 %
[13,  2000] loss: 0.999
Accuracy of the network on validation set: 63 %
[14,  2000] loss: 0.978
Accuracy of the network on validation set: 63 %
[

In [0]:
##################################################
### 5. download each .csv file to save outputs ###
##################################################

import os 
from google.colab import files
import re

for item in os.listdir():
    if (re.search(".csv", item)):
        files.download(item)

# New Section

# New Section

# New Section