In [1]:
# File:        myNet-cifar10.ipynb
# Author:      Edward Hanson (eth20)
# Description: NN model for problem 5 of HW 2; CIFAR10 dataset

In [2]:
import argparse
import os, sys
import time
import datetime

# Import pytorch dependencies
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms # useful library for preprocessing
import torch.optim as optim
from tqdm import tqdm_notebook as tqdm

# Import matplotlib dependencies
import matplotlib
import matplotlib.pyplot as plt

# Additional dependencies for file manipulation
import numpy as np
import csv

# You cannot change this line.
from tools.dataloader import CIFAR10

In [3]:
# Create the neural network module: myNet (by Edward Hanson)
class myNet(nn.Module):
    def __init__(self):
        super(myNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 8, kernel_size=3, stride=1, padding=1), # in_channels, out_channels, kernel_size, stride=1, padding=1
            nn.BatchNorm2d(8), # insert batch normalization BEFORE ReLU
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(8, 32, kernel_size=3, stride=1, padding=1), 
            nn.BatchNorm2d(32),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1), 
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), 
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer5 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), 
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer6 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), 
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer7 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1), 
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        
        self.layer8 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1), 
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        self.layer9 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1), 
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        # bottleneck
        self.layer10 = nn.Sequential(
            nn.Conv2d(128, 64, kernel_size=1, stride=1, padding=0), 
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer11 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1), 
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.layer12 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer13 = nn.Sequential(
            nn.Linear(4 * 4 * 64, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout()
        )
        self.layer14 = nn.Linear(256, 10)
        # CrossEntropyLoss() loss function already implements nn.Softmax(), so no need to include here
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.layer5(out)
        out = self.layer6(out)
        out = self.layer7(out)
        out = self.layer8(out)
        out = self.layer9(out)
        out = self.layer10(out)
        out = self.layer11(out)
        out = self.layer12(out)
        out = out.view(out.size(0),-1)
        out = self.layer13(out)
        y_pred = self.layer14(out)
        return y_pred

In [4]:
# Setting some hyperparameters
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE = 100
INITIAL_LR = 0.035
MOMENTUM = 0.9
REG = 3e-4
EPOCHS = 100

DECAY_EPOCHS = 2
DECAY = 0.98

DATAROOT = "./data"
CHECKPOINT_PATH = "./saved_model"

**Your answer:**

In [5]:
"""Data preprocessing"""
# Specify preprocessing function.
# Reference mean/std value for 

# Data augmentation for training
# ToTensor() HAS to be placed after all PIL transforms
transform_train = transforms.Compose([ # Compose multiple transformations together
    transforms.RandomHorizontalFlip(), # probability=0.5
    transforms.ToTensor(), # convert np array to torch tensor; also normalizes array to [0,1]
    transforms.Normalize((0.4914, 0.4822, 0.4465),(0.2023, 0.1994, 0.2010)) # standardize array; mean , std (for each channel)
    ])
    
# Data augmentation for inference mode
transform_val = transforms.Compose([ 
    transforms.ToTensor(), 
    transforms.Normalize((0.4914, 0.4822, 0.4465),(0.2023, 0.1994, 0.2010))
    ])

**Your answer:**

In [6]:
# Call the dataset Loader
trainset = CIFAR10(root=DATAROOT, train=True, download=True, transform=transform_train)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=1) # applies batch shuffle here
valset = CIFAR10(root=DATAROOT, train=False, download=True, transform=transform_val)
valloader = torch.utils.data.DataLoader(valset, batch_size=VAL_BATCH_SIZE, shuffle=False, num_workers=1)

Using downloaded and verified file: ./data/cifar10_trainval.tar.gz
Extracting ./data/cifar10_trainval.tar.gz to ./data
Files already downloaded and verified
Using downloaded and verified file: ./data/cifar10_trainval.tar.gz
Extracting ./data/cifar10_trainval.tar.gz to ./data
Files already downloaded and verified


In [7]:
# Specify the device for computation
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = myNet()
net = net.to(device)
if device =='cuda':
    print("Train on GPU...")
else:
    print("Train on CPU...")

Train on GPU...


In [8]:
# FLAG for loading the pretrained model
TRAIN_FROM_SCRATCH = False
# Code for loading checkpoint and recover epoch id.
CKPT_PATH = "./saved_model/modelMyNet.h5"
def get_checkpoint(ckpt_path):
    try:
        ckpt = torch.load(ckpt_path)
    except Exception as e:
        print(e)
        return None
    return ckpt

ckpt = get_checkpoint(CKPT_PATH)
if ckpt is None or TRAIN_FROM_SCRATCH:
    if not TRAIN_FROM_SCRATCH:
        print("Checkpoint not found.")
    print("Training from scratch ...")
    start_epoch = 0
    current_learning_rate = INITIAL_LR
else:
    print("Successfully loaded checkpoint: %s" %CKPT_PATH)
    net.load_state_dict(ckpt['net'])
    start_epoch = ckpt['epoch'] + 1
    current_learning_rate = ckpt['lr']
    print("Starting from epoch %d " %start_epoch)

print("Starting from learning rate %f:" %current_learning_rate)

Successfully loaded checkpoint: ./saved_model/model_curr.h5
Starting from epoch 97 
Starting from learning rate 0.013271:


In [9]:
# Create loss function and specify regularization
criterion = nn.CrossEntropyLoss()
# Add optimizer
optimizer = optim.SGD(net.parameters(), lr=INITIAL_LR, momentum=MOMENTUM, weight_decay=REG)

In [10]:
""" Start the training/validation process"""
# The process should take about 5 minutes on a GTX 1070-Ti
# if the code is written efficiently.
global_step = 0
best_val_acc = 0
initialPass = True

train_accs = []
val_accs = []

for i in range(start_epoch, EPOCHS):
    print(datetime.datetime.now())
    # Switch to train mode
    net.train()
    print("Epoch %d:" %i)

    total_examples = 0
    correct_examples = 0

    train_loss = 0
    train_acc = 0
    # Train the training dataset for 1 epoch.
    print(len(trainloader))
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # Copy inputs to device [needs to be consistent with the device that model is loaded in]
        inputs = inputs.to(device=device, dtype=torch.float)
        targets = targets.to(device=device, dtype=torch.long)
        # Zero the gradient
        optimizer.zero_grad()
        # Generate output
        outputs = net(inputs)
        loss = criterion(outputs, targets)
        # Now backward loss
        loss.backward()
        # Apply gradient
        optimizer.step()
        # Calculate predicted labels
        # Outputs of neurons for each input listed across dimension 1
        # Dimension 0 lists neuron outputs for all inputs
        # Neuron with largest output value is understood as y_pred
        # Max outputs (value, index) -- in this case, the index (ie neuron ID) is what we're after
        _, predicted = outputs.max(1)
        # Calculate accuracy
        total_examples += TRAIN_BATCH_SIZE
        correct_examples += (targets == predicted).sum() # .sum() all values that match between targets and predicted

        if initialPass == True:
            print("Initial loss = " + str(loss))
            initialPass = False
        train_loss += loss

        global_step += 1
        if global_step % 100 == 0:
            avg_loss = train_loss.float() / (batch_idx + 1)
        pass
    avg_acc = correct_examples.float() / total_examples # need .float() since both are ints by default
    train_accs.append(avg_acc)
    print("Training loss: %.4f, Training accuracy: %.4f" %(avg_loss, avg_acc))
    print(datetime.datetime.now())
    # Validate on the validation dataset
    print("Validation...")
    total_examples = 0
    correct_examples = 0
    
    ## Validation process ##
    net.eval()

    val_loss = 0
    val_acc = 0
    # Disable gradient during validation
    with torch.no_grad():
        for batch_idx, (inputs, targets) in enumerate(valloader):
            # Copy inputs to device
            inputs = inputs.to(device=device, dtype=torch.float)
            targets = targets.to(device=device, dtype=torch.long)
            # Zero the gradient
            optimizer.zero_grad()
            # Generate output from the DNN.
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            # Calculate predicted labels
            _, predicted = outputs.max(1)
            # Calculate accuracy
            total_examples += VAL_BATCH_SIZE
            correct_examples += (targets == predicted).sum()
            val_loss += loss

    avg_loss = val_loss.float() / len(valloader)
    avg_acc = correct_examples.float() / total_examples
    val_accs.append(avg_acc)
    print("Validation loss: %.4f, Validation accuracy: %.4f" % (avg_loss, avg_acc))

    if i % DECAY_EPOCHS == 0 and i != 0:
        current_learning_rate *= DECAY
        for param_group in optimizer.param_groups:
            param_group['lr'] = current_learning_rate
            
        print("Current learning rate has decayed to %f" %current_learning_rate)
    
    # Save for checkpoint
    if avg_acc > best_val_acc:
        best_val_acc = avg_acc
        if not os.path.exists(CHECKPOINT_PATH):
            os.makedirs(CHECKPOINT_PATH)
        print("Saving ...")
        state = {'net': net.state_dict(),
                 'epoch': i,
                 'lr': current_learning_rate}
        torch.save(state, os.path.join(CHECKPOINT_PATH, 'modelMyNet.h5'))

print("Optimization finished.")

2019-09-30 23:41:16.877493
Epoch 97:
704
Initial loss = tensor(0.0463, device='cuda:0', grad_fn=<NllLossBackward>)
Training loss: 0.4012, Training accuracy: 0.8662
2019-09-30 23:41:46.536905
Validation...
Validation loss: 0.5991, Validation accuracy: 0.8052
Saving ...
2019-09-30 23:41:47.746844
Epoch 98:
704
Training loss: 0.3108, Training accuracy: 0.8951
2019-09-30 23:42:09.917729
Validation...
Validation loss: 0.5252, Validation accuracy: 0.8290
Current learning rate has decayed to 0.013006
Saving ...
2019-09-30 23:42:11.238406
Epoch 99:
704
Training loss: 0.1379, Training accuracy: 0.9534
2019-09-30 23:42:33.730768
Validation...
Validation loss: 0.3828, Validation accuracy: 0.8860
Saving ...
Optimization finished.


In [10]:
# Save training results
with open('report_data/MYNET7_train_acc_curr.txt', 'w') as filehandle:
    for val in train_accs:
        filehandle.write('%s\n' % val)
with open('report_data/MYNET7_val_acc_curr.txt', 'w') as filehandle:
    for val in val_accs:
        filehandle.write('%s\n' % val)

# plot results 
fig, ax = plt.subplots()
ax.plot(range(len(train_accs)), train_accs)
ax.plot(range(len(val_accs)), val_accs)
ax.set_xlabel('Epoch', fontsize = 'large')
ax.set_ylabel('Accuracy', fontsize = 'large')
ax.set_title('myNet')
ax.legend(['Train Accuracy','Val Accuracy'], loc=4)
ax.grid(True)
#plt.xlim(80,100)
#plt.ylim(0.65,0.82)
plt.show()

print("Max Validation Accuracy: "+str(max(val_accs)))
print("Final Training Accuracy: "+str(train_accs[-1]))

In [10]:
"""Apply model to testset data"""
# Load testset
testset = np.float64(np.load('cifar10-batches-images-test.npy'))
# normalize to [0,1]
testset *= (1.0 / testset.max())
# standardize based on mean and std
for i in range(len(testset)):
    for j in range(len(testset[i])):
        for k in range(len(testset[i][j])):
            for l in range(len(testset[i][j][k])):
                if l == 0:
                    testset[i][j][k][l] = (testset[i][j][k][l] - 0.4914) / 0.2023
                if l == 1:
                    testset[i][j][k][l] = (testset[i][j][k][l] - 0.4822) / 0.1994
                if l == 2:
                    testset[i][j][k][l] = (testset[i][j][k][l] - 0.4465) / 0.2010
# Input format has to be [N,C,W,H]
testset = np.swapaxes(testset,2,3)
testset = np.swapaxes(testset,1,2)
testset = torch.from_numpy(testset)

testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, num_workers=1)

In [10]:
# Calculate testset results
# Disable gradient during test
predictions = []
net.eval()
with torch.no_grad():
    for inputs in testloader:
        # Copy inputs to device
        inputs = inputs.to(device=device, dtype=torch.float)
        # Zero the gradient
        optimizer.zero_grad()
        # Generate output from the DNN.
        outputs = net(inputs)
        # Calculate predicted labels
        _, predicted = outputs.max(1)
        #print(outputs)
        predictions.append(predicted.item())

NameError: name 'testloader' is not defined

In [None]:
# Save testset results
with open('testpredictions.csv',mode='w') as outfile:
    predictions_writer = csv.writer(outfile, delimiter=',')
    predictions_writer.writerow(['Id','Category'])
    for idx, output in enumerate(predictions):
        predictions_writer.writerow([idx,output])