# **CNN**

## **Initial CNN (from tutorial)**

This section is from the provided tutorial: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html#sphx-glr-beginner-blitz-cifar10-tutorial-py

Set Number of Epochs to Run

In [49]:
numepochs = 1

Importing torchvision

In [50]:
import torch
import torchvision
import torchvision.transforms as transforms
import time
from prettytable import PrettyTable  

# for an easy format later
t = PrettyTable(['Trial', 'Accuracy', 'Time/Epoch', 'Initial Loss/Batch', 'Average Loss/Batch', 'Final Loss/Batch'])

Add GPU Support

In [51]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Assuming that we are on a CUDA machine, this should print a CUDA device:

print(device)

cuda:0


Transforming imagers to tensors

In [52]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


Modified for 3-channel images

In [53]:
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 1000, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(1000, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

#speed
net.to(device)

Net(
  (conv1): Conv2d(3, 1000, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(1000, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

Define loss function and optimizer

In [54]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

train network

In [55]:
tic = time.perf_counter()
numphases = 0
initloss = 0.0
aveloss = 0.0
for epoch in range(numepochs):  # loop over the dataset multiple times
    
    
    finloss = 0.0
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data[0].to(device), data[1].to(device)
 
        # zero the parameter gradients
        optimizer.zero_grad()
 
        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
 
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            numphases += 1
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            if initloss == 0:
                initloss = running_loss / 2000
            aveloss += running_loss / 2000
            finloss = running_loss / 2000
            running_loss = 0.0
    
aveloss = aveloss / numphases
toc = time.perf_counter()
runtime = (toc - tic)/numepochs
print('Finished Training\n')

[1,  2000] loss: 1.947
[1,  4000] loss: 1.629
[1,  6000] loss: 1.510
[1,  8000] loss: 1.454
[1, 10000] loss: 1.367
[1, 12000] loss: 1.312
Finished Training



use network on whole dataset

In [56]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        
acc = (100 * correct / total)
print('Accuracy of the network on the 10000 test images: %d %%' % acc)

Accuracy of the network on the 10000 test images: 50 %


In [57]:
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = net(images)
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        classes[i], 100 * class_correct[i] / class_total[i]))

Accuracy of plane : 27 %
Accuracy of   car : 86 %
Accuracy of  bird : 21 %
Accuracy of   cat : 19 %
Accuracy of  deer : 42 %
Accuracy of   dog : 52 %
Accuracy of  frog : 64 %
Accuracy of horse : 74 %
Accuracy of  ship : 62 %
Accuracy of truck : 52 %


Summary Table addition

In [58]:
# add to summary table
t.add_row(['Base', '%2d %%' % acc, 
        '{:.0f}m {:.0f}s'.format(runtime // 60, runtime % 60), 
        '%.3f' % initloss, '%.3f' % aveloss, '%.3f' % finloss])

## **DNN experimentation**

I had problems with not re-initializing things, so for safety I will re-init everything.

### **Learning Rate Schedule**

In [59]:
import torch
import torchvision
import torchvision.transforms as transforms
import time

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

var_lr_list = ['.00001', '.0001', '.01', '.1']

In [60]:
for var_lr in var_lr_list:
    print('\nRunning with %s lr:\n' % var_lr)

    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                            shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                            shuffle=False, num_workers=2)

    classes = ('plane', 'car', 'bird', 'cat',
            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    import torch.nn as nn
    import torch.nn.functional as F


    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 1000, 5)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(1000, 16, 5)
            self.fc1 = nn.Linear(16 * 5 * 5, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 10)

        def forward(self, x):
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            x = x.view(-1, 16 * 5 * 5)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x


    net = Net()

    #speed
    net.to(device)

    import torch.optim as optim

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr = float(var_lr), momentum = 0.9)

    initloss = 0.0
    phasecount = 0
    aveloss = 0.0
    
    for epoch in range(numepochs):  # loop over the dataset multiple times
        
        
        finloss = 0.0
        tic = time.perf_counter()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data[0].to(device), data[1].to(device)
    
            # zero the parameter gradients
            optimizer.zero_grad()
    
            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                phasecount += 1
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 2000))
                if initloss == 0:
                    initloss = running_loss / 2000
                aveloss += running_loss / 2000
                finloss = running_loss / 2000
                running_loss = 0.0
        



    print('Finished Training')
    

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
    acc = (100 * correct / total)
    print('Accuracy of the network on the 10000 test images: %d %%' % acc)

    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs, 1)
            c = (predicted == labels).squeeze()
            for i in range(4):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1


    for i in range(10):
        print('Accuracy of %5s : %2d %%' % (
            classes[i], 100 * class_correct[i] / class_total[i]))
    toc = time.perf_counter()
    runtime = (toc - tic)
    aveloss = aveloss / phasecount
    t.add_row(['L Rate = %s' % var_lr, '%2d %%' % acc, 
            '{:.0f}m {:.0f}s'.format(runtime // 60, runtime % 60), 
            '%.3f' % initloss, '%.3f' % (aveloss / phasecount), '%.3f' % finloss])


Running with .00001 lr:

Files already downloaded and verified
Files already downloaded and verified
[1,  2000] loss: 2.301
[1,  4000] loss: 2.298
[1,  6000] loss: 2.293
[1,  8000] loss: 2.287
[1, 10000] loss: 2.277
[1, 12000] loss: 2.262
Finished Training
Accuracy of the network on the 10000 test images: 13 %
Accuracy of plane :  0 %
Accuracy of   car :  6 %
Accuracy of  bird :  1 %
Accuracy of   cat :  1 %
Accuracy of  deer :  0 %
Accuracy of   dog :  0 %
Accuracy of  frog : 15 %
Accuracy of horse : 89 %
Accuracy of  ship : 20 %
Accuracy of truck :  1 %

Running with .0001 lr:

Files already downloaded and verified
Files already downloaded and verified
[1,  2000] loss: 2.249
[1,  4000] loss: 1.995
[1,  6000] loss: 1.852
[1,  8000] loss: 1.733
[1, 10000] loss: 1.651
[1, 12000] loss: 1.586
Finished Training
Accuracy of the network on the 10000 test images: 43 %
Accuracy of plane : 43 %
Accuracy of   car : 70 %
Accuracy of  bird : 25 %
Accuracy of   cat : 43 %
Accuracy of  deer : 13 %


### **Momentum Shift**

In [61]:
import torch
import torchvision
import torchvision.transforms as transforms
import time

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

var_mom_list = ['.3', '.5', '.95', '.99']

In [62]:
for var_mom in var_mom_list:
    print('\nRunning with %s momentum:\n' % var_mom)

    transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

    trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=transform)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                            shuffle=True, num_workers=2)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)
    testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                            shuffle=False, num_workers=2)

    classes = ('plane', 'car', 'bird', 'cat',
            'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

    import torch.nn as nn
    import torch.nn.functional as F


    class Net(nn.Module):
        def __init__(self):
            super(Net, self).__init__()
            self.conv1 = nn.Conv2d(3, 1000, 5)
            self.pool = nn.MaxPool2d(2, 2)
            self.conv2 = nn.Conv2d(1000, 16, 5)
            self.fc1 = nn.Linear(16 * 5 * 5, 120)
            self.fc2 = nn.Linear(120, 84)
            self.fc3 = nn.Linear(84, 10)

        def forward(self, x):
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            x = x.view(-1, 16 * 5 * 5)
            x = F.relu(self.fc1(x))
            x = F.relu(self.fc2(x))
            x = self.fc3(x)
            return x


    net = Net()

    #speed
    net.to(device)

    import torch.optim as optim

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr = .001, momentum = float(var_mom))
    
    initloss = 0.0
    numphases = 0
    aveloss = 0.0
    
    for epoch in range(numepochs):  # loop over the dataset multiple times
        
        
        finloss = 0.0
        tic = time.perf_counter()
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data[0].to(device), data[1].to(device)
    
            # zero the parameter gradients
            optimizer.zero_grad()
    
            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
            # print statistics
            running_loss += loss.item()
            if i % 2000 == 1999:    # print every 2000 mini-batches
                numphases += 1
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 2000))
                if initloss == 0:
                    initloss = running_loss / 2000
                aveloss += running_loss / 2000
                finloss = running_loss / 2000
                running_loss = 0.0
        

    print('Finished Training')
    

    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
    acc = (100 * correct / total)
    print('Accuracy of the network on the 10000 test images: %d %%' % acc)

    class_correct = list(0. for i in range(10))
    class_total = list(0. for i in range(10))
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs, 1)
            c = (predicted == labels).squeeze()
            for i in range(4):
                label = labels[i]
                class_correct[label] += c[i].item()
                class_total[label] += 1


    for i in range(10):
        print('Accuracy of %5s : %2d %%' % (
            classes[i], 100 * class_correct[i] / class_total[i]))
    toc = time.perf_counter()
    runtime = (toc - tic)
    t.add_row(['Momentum = %s' % var_mom, '%2d %%' % acc, 
            '{:.0f}m {:.0f}s'.format(runtime // 60, runtime % 60), 
            '%.3f' % initloss, '%.3f' % (aveloss / numphases), '%.3f' % finloss])


Running with .3 momentum:

Files already downloaded and verified
Files already downloaded and verified
[1,  2000] loss: 2.184
[1,  4000] loss: 1.870
[1,  6000] loss: 1.712
[1,  8000] loss: 1.617
[1, 10000] loss: 1.557
[1, 12000] loss: 1.517
Finished Training
Accuracy of the network on the 10000 test images: 46 %
Accuracy of plane : 52 %
Accuracy of   car : 54 %
Accuracy of  bird : 15 %
Accuracy of   cat : 23 %
Accuracy of  deer : 29 %
Accuracy of   dog : 59 %
Accuracy of  frog : 73 %
Accuracy of horse : 59 %
Accuracy of  ship : 58 %
Accuracy of truck : 37 %

Running with .5 momentum:

Files already downloaded and verified
Files already downloaded and verified
[1,  2000] loss: 2.142
[1,  4000] loss: 1.867
[1,  6000] loss: 1.694
[1,  8000] loss: 1.592
[1, 10000] loss: 1.519
[1, 12000] loss: 1.479
Finished Training
Accuracy of the network on the 10000 test images: 49 %
Accuracy of plane : 54 %
Accuracy of   car : 75 %
Accuracy of  bird : 33 %
Accuracy of   cat : 28 %
Accuracy of  deer : 

### **Summary**

Prints a summary for easy analysis

In [63]:
print('Base settings: Simple CNN, lr = .001, momentum = .9')
print('Epochs/Trial: %2d\n' % numepochs)
print(t)

Base settings: Simple CNN, lr = .001, momentum = .9
Epochs/Trial:  1

+-----------------+----------+------------+--------------------+--------------------+------------------+
|      Trial      | Accuracy | Time/Epoch | Initial Loss/Batch | Average Loss/Batch | Final Loss/Batch |
+-----------------+----------+------------+--------------------+--------------------+------------------+
|       Base      |   50 %   |   1m 3s    |       1.947        |       1.536        |      1.312       |
| L Rate = .00001 |   13 %   |   1m 22s   |       2.301        |       2.287        |      2.262       |
|  L Rate = .0001 |   43 %   |   1m 23s   |       2.249        |       1.844        |      1.586       |
|   L Rate = .01  |   16 %   |   1m 22s   |       2.164        |       2.129        |      2.100       |
|   L Rate = .1   |   10 %   |   1m 22s   |       2.360        |       2.361        |      2.359       |
|  Momentum = .3  |   46 %   |   1m 22s   |       2.184        |       1.743        |     

## **Different Architectures**

Using examples from: https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html

Going to re-import things here so I can run this section stand alone if needed

In [64]:

import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import copy

Initialize

In [65]:
# Top level data directory. Here we assume the format of the directory conforms
#   to the ImageFolder structure
data_dir = "./data"

# Number of classes in the dataset
num_classes = 10

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "squeezenet"

# Batch size for training 
batch_size = 4

# Number of epochs to train for
num_epochs = numepochs

# Flag for feature extracting. When False, we finetune the whole model,
#   when True we only update the reshaped layer params
feature_extract = True

# Classes
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

**Helper Methods**

trainer

In [66]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=num_epochs, is_inception=False):
    tic = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    numphases = 0
    initloss = 0.0
    aveloss = 0.0
    for epoch in range(num_epochs):
        finloss = 0.0    
        
        
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            
            running_loss = 0.0
            running_corrects = 0
            run_phase = 0.0
            i = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                run_phase += loss.item()

                # statistics
                if (i % 2000 == 1999) and (phase == 'train'):
                    numphases += 1
                    print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, run_phase / 2000))
                    if initloss == 0:
                        initloss = run_phase / 2000
                    finloss = run_phase / 2000
                    run_phase = 0.0
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                i += 1
                

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            aveloss += epoch_loss
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

            print()
            

    runtime = time.time() - tic
    print('Training complete in {:.0f}m {:.0f}s'.format(runtime // 60, runtime % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    
    
    t.add_row(['%s' % model_name, '%3d %%' % (100 * epoch_acc), 
        '{:.0f}m {:.0f}s'.format((runtime / num_epochs) // 60, (runtime / num_epochs) % 60), 
        '%.3f' % initloss, '%.3f' % (aveloss / num_epochs), '%.3f' % finloss])
    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

Helper to set .requires_grad to false by default

In [67]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

Initialize

In [68]:
choices = [1, 3, 5, 8, 10, 12, 15, 20, 50, 100]



def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "alexnet":
        """ Alexnet
        """
        model_ft = models.alexnet(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        """
        model_ft = models.vgg11_bn(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs,num_classes)
        input_size = 224

    elif model_name == "squeezenet":
        """ Squeezenet
        """
        model_ft = models.squeezenet1_0(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        model_ft.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1))
        model_ft.num_classes = num_classes
        input_size = 224

    elif model_name == "densenet":
        """ Densenet
        """
        model_ft = models.densenet121(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier.in_features
        model_ft.classifier = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    elif model_name == "inception":
        """ Inception v3
        Be careful, expects (299,299) sized images and has auxiliary output
        """
        model_ft = models.inception_v3(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        # Handle the auxilary net
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        # Handle the primary net
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs,num_classes)
        input_size = 299

    else:
        print("Invalid model name, exiting...")
        exit()

    return model_ft, input_size


# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)


### **Run for all classes**

In [69]:
# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
choices = ["resnet", "alexnet", "vgg", "squeezenet", "densenet", "inception"]

for model_name in choices:
    print('\nRunning %s:\n' % model_name)

    # Initialize the model for this run
    model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

    # Data augmentation and normalization for training
    # Just normalization for validation
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]),
        'val': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ]),
    }

    print("Initializing Datasets and Dataloaders...")

    # Create training and validation datasets
    image_datasets = {
        'train': torchvision.datasets.CIFAR10(root='./data', train=True,
                                            download=True, transform=data_transforms['train']),
        'val': torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=data_transforms['val'])
    }
    # Create training and validation dataloaders
    dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}

    # Detect if we have a GPU available
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Send the model to GPU
    model_ft = model_ft.to(device)

    # Gather the parameters to be optimized/updated in this run. If we are
    #  finetuning we will be updating all parameters. However, if we are
    #  doing feature extract method, we will only update the parameters
    #  that we have just initialized, i.e. the parameters with requires_grad
    #  is True.
    params_to_update = model_ft.parameters()
    print("Params to learn:")
    if feature_extract:
        params_to_update = []
        for name,param in model_ft.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
                print("\t",name)
    else:
        for name,param in model_ft.named_parameters():
            if param.requires_grad == True:
                print("\t",name)

    # Observe that all parameters are being optimized
    optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()

    # Train and evaluate
    model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))


Running resnet:

Initializing Datasets and Dataloaders...
Files already downloaded and verified
Files already downloaded and verified
Params to learn:
	 fc.weight
	 fc.bias
Epoch 1/1
----------
[1,  2000] loss: 1.893
[1,  4000] loss: 1.730
[1,  6000] loss: 1.722
[1,  8000] loss: 1.719
[1, 10000] loss: 1.726
[1, 12000] loss: 1.699
train Loss: 1.7478 Acc: 0.4240

val Loss: 0.8180 Acc: 0.7250

Training complete in 1m 53s
Best val Acc: 0.725000

Running alexnet:

Initializing Datasets and Dataloaders...
Files already downloaded and verified
Files already downloaded and verified
Params to learn:
	 classifier.6.weight
	 classifier.6.bias
Epoch 1/1
----------
[1,  2000] loss: 2.646
[1,  4000] loss: 2.813
[1,  6000] loss: 2.753
[1,  8000] loss: 2.740
[1, 10000] loss: 2.784
[1, 12000] loss: 2.951
train Loss: 2.7802 Acc: 0.4181

val Loss: 1.4133 Acc: 0.6722

Training complete in 1m 12s
Best val Acc: 0.672200

Running vgg:

Initializing Datasets and Dataloaders...
Files already downloaded and ve

### **Summary**

Prints a summary for easy analysis

In [70]:
print('Base settings: Simple CNN, lr = .001, momentum = .9')
print('Epochs/Trial: %2d\n' % numepochs)
print(t)

Base settings: Simple CNN, lr = .001, momentum = .9
Epochs/Trial:  1

+-----------------+----------+------------+--------------------+--------------------+------------------+
|      Trial      | Accuracy | Time/Epoch | Initial Loss/Batch | Average Loss/Batch | Final Loss/Batch |
+-----------------+----------+------------+--------------------+--------------------+------------------+
|       Base      |   50 %   |   1m 3s    |       1.947        |       1.536        |      1.312       |
| L Rate = .00001 |   13 %   |   1m 22s   |       2.301        |       2.287        |      2.262       |
|  L Rate = .0001 |   43 %   |   1m 23s   |       2.249        |       1.844        |      1.586       |
|   L Rate = .01  |   16 %   |   1m 22s   |       2.164        |       2.129        |      2.100       |
|   L Rate = .1   |   10 %   |   1m 22s   |       2.360        |       2.361        |      2.359       |
|  Momentum = .3  |   46 %   |   1m 22s   |       2.184        |       1.743        |     

# **Linear Classifier**

Got a start from http://cs231n.stanford.edu/2017/

In [467]:
import torch, torchvision, PIL
from torchvision.datasets import CIFAR10 
from PIL import Image
import numpy as np
from io import BytesIO
import torchvision.transforms as transforms
import IPython.display

imgTransform = transforms.Compose([
    transforms.ToTensor()
])

classes = ['airplane', 'automobile', 'bird', 'cat', 'deer',
           'dog', 'frog', 'horse', 'ship', 'truck']
class2id = {name: idx for (idx, name) in enumerate(classes)}

trainset = CIFAR10(root='./data', train = True, transform = imgTransform)
valset = CIFAR10(root='./data', train = False, transform = imgTransform)

In [468]:
# Softmax function: exp(a) / sum(exp(a))
# Note that this function works for tensors of any shape, it is not a scalar function.
def softmax(a):
    max_val = a.max()  # This is to avoid variable overflows.
    exp_a = (a - max_val).exp()
    return exp_a.div(exp_a.sum())

# Classification function: y = softmax(Wx + b)
def linear(x, weight, bias):
    return torch.matmul(weight, x) + bias

# Initialize bias and weight with random values.
weight = torch.Tensor(10, 3 * 32 * 32).normal_(0, 0.01)
bias = torch.Tensor(10, 1).normal_(0, 0.01)

# Now predict the category using this un-trained classifier.
img, _ = trainset[0]
x = img.view(3 * 32 * 32, 1)
a = linear(x, weight, bias)
predictions = softmax(a)

# Show the results of the classifier.
max_score, max_label = predictions.max(0)
print('Image predicted as %s with confidence %.2f' % (classes[max_label[0]], max_score[0]))

Image predicted as ship with confidence 0.13


In [469]:
def loss(label, predictions):
    return -predictions[label].log()

y_hat[airplane] = 0.06
y_hat[automobile] = 0.11
y_hat[bird] = 0.11
y_hat[cat] = 0.08
y_hat[deer] = 0.10
y_hat[dog] = 0.09
y_hat[frog] = 0.08
y_hat[horse] = 0.12
y_hat[ship] = 0.13
y_hat[truck] = 0.12

Loss: 2.48


In [470]:
epsilon = 0.0001
shifted_weight = weight.clone()  # Make a copy of the weights.

# Initialize gradients for bias and weight with zero values.
gradWeight = torch.Tensor(10, 3 * 32 * 32).fill_(0)
gradBias = torch.Tensor(10, 1).fill_(0)

# Compute gradients for each weight w_ij
for i in range(0, weight.shape[0]):
    for j in range(0, weight.shape[1]):
        # Compute f(x + h)
        shifted_weight[i, j] = shifted_weight[i, j] + epsilon
        f1 = softmax(linear(x, shifted_weight, bias))
        loss1 = loss(class2id['frog'], f1)
        shifted_weight[i, j] = weight[i, j] # restore original value.
        
        # Compute f(x - h)
        shifted_weight[i, j] = shifted_weight[i, j] - epsilon
        f2 = softmax(linear(x, shifted_weight, bias))
        loss2 = loss(class2id['frog'], f2)
        shifted_weight[i, j] = weight[i, j] # restore original value.

        # Compute [f(x + h) - f(x - h)] / 2h.
        gradWeight[i, j] = (loss1[0] - loss2[0]) / (2 * epsilon)

numericalGradWeight = gradWeight 

In [471]:
def loss_softmax_backward(label, predictions):
    grad_inputs = predictions.clone()
    grad_inputs[label] = grad_inputs[label] - 1
    return grad_inputs

def linear_backward(x, weight, bias, gradOutput):
    gradBias = bias.clone().zero_()
    gradWeight = weight.clone().zero_()
    gradWeight = gradOutput * x.t()
    gradBias.copy_(gradOutput)
    return gradWeight, gradBias

gradOutput = loss_softmax_backward(class2id['frog'], predictions)
gradWeight, gradBias = linear_backward(x, weight, bias, gradOutput)

# Let's print the gradWeight again.
print(gradWeight)

g1 = gradWeight.view(-1, 1).squeeze()
g2 = numericalGradWeight.view(-1, 1).squeeze()
print('Distance betwen numerical and analytical gradients: %.6f' % 
      (torch.norm(g1 - g2) / torch.norm(g1 + g2)))

tensor([[0.0139, 0.0102, 0.0118,  ..., 0.0331, 0.0199, 0.0170],
        [0.0247, 0.0180, 0.0209,  ..., 0.0587, 0.0352, 0.0302],
        [0.0258, 0.0188, 0.0219,  ..., 0.0613, 0.0368, 0.0315],
        ...,
        [0.0287, 0.0209, 0.0243,  ..., 0.0682, 0.0409, 0.0351],
        [0.0294, 0.0214, 0.0249,  ..., 0.0698, 0.0419, 0.0359],
        [0.0277, 0.0202, 0.0234,  ..., 0.0656, 0.0394, 0.0337]])
Distance betwen numerical and analytical gradients: 0.001869


In [472]:

learningRates = [1e-4, 1e-5]
weightDecay = 1e-6 # Regularization strength.

# Initialize bias and weight with random values again.
weight = torch.Tensor(10, 3 * 32 * 32).normal_(0, 0.1)
bias = torch.Tensor(10, 1).normal_(0, 10)
print('Testing with biases:\n%s\n' % bias)

tic = time.perf_counter()
for epoch in range(0, numepochs):
    correct = 0.0
    cum_loss = 0.0
    learningRate = learningRates[0] if epoch < 5 else learningRates[1]
    
    # Make a pass over the training data.
    for (i, (img, label)) in enumerate(trainset):
        x = img.view(3 * 32 * 32, 1)
        
        # Forward pass. (Prediction stage)
        predictions = softmax(linear(x, weight, bias))
        cum_loss += loss(label, predictions)[0]
        max_score, max_label = predictions.max(0)
        if max_label[0] == label: correct += 1
        
        #Backward pass. (Gradient computation stage)
        gradOutput = loss_softmax_backward(label, predictions)
        gradWeight, gradBias = linear_backward(x, weight, bias, gradOutput)
        
        # Parameter updates.
        gradWeight.add_(weightDecay, weight)
        weight.add_(-learningRate, gradWeight)
        #bias = bias - learningRate * gradBias
        
        # Logging the current results on training.
        if (i + 1) % 10000 == 0:
            print('Train-epoch %d. Iteration %05d, Avg-Loss: %.4f, Accuracy: %.4f' % 
                  (epoch, i + 1, cum_loss / (i + 1), correct / (i + 1)))
    
    
    # Make a pass over the validation data.
    correct = 0.0
    cum_loss = 0.0
    for (i, (img, label)) in enumerate(valset):
        x = img.view(3 * 32 * 32, 1)
        
        # Forward pass. (Prediction stage)
        predictions = softmax(linear(x, weight, bias))
        cum_loss += loss(label, predictions)[0]
        max_score, max_label = predictions.max(0)
        if max_label[0] == label: correct += 1
            
    # Logging the current results on validation.
    print('Validation-epoch %d. Avg-Loss: %.4f, Accuracy: %.4f' % 
          (epoch, cum_loss / len(valset), correct / len(valset)))
    
toc = time.perf_counter()
print('%5d seconds' % ((toc - tic)/num_epochs))

# Initialize weight with, bias to 0
weight = torch.Tensor(10, 3 * 32 * 32).normal_(0, 0.01)
bias = torch.Tensor(10, 1).zero_()


print('Testing with biases:\n%s\n' % bias)
tic = time.perf_counter()
for epoch in range(0, numepochs):
    correct = 0.0
    cum_loss = 0.0
    learningRate = learningRates[0] if epoch < 5 else learningRates[1]
    
    # Make a pass over the training data.
    for (i, (img, label)) in enumerate(trainset):
        x = img.view(3 * 32 * 32, 1)
        
        # Forward pass. (Prediction stage)
        predictions = softmax(linear(x, weight, bias))
        cum_loss += loss(label, predictions)[0]
        max_score, max_label = predictions.max(0)
        if max_label[0] == label: correct += 1
        
        #Backward pass. (Gradient computation stage)
        gradOutput = loss_softmax_backward(label, predictions)
        gradWeight, gradBias = linear_backward(x, weight, bias, gradOutput)
        
        # Parameter updates.
        gradWeight.add_(weightDecay, weight)
        weight.add_(-learningRate, gradWeight)
        #bias = bias - learningRate * gradBias
        
        # Logging the current results on training.
        if (i + 1) % 10000 == 0:
            print('Train-epoch %d. Iteration %05d, Avg-Loss: %.4f, Accuracy: %.4f' % 
                  (epoch, i + 1, cum_loss / (i + 1), correct / (i + 1)))
    
    
    # Make a pass over the validation data.
    correct = 0.0
    cum_loss = 0.0
    for (i, (img, label)) in enumerate(valset):
        x = img.view(3 * 32 * 32, 1)
        
        # Forward pass. (Prediction stage)
        predictions = softmax(linear(x, weight, bias))
        cum_loss += loss(label, predictions)[0]
        max_score, max_label = predictions.max(0)
        if max_label[0] == label: correct += 1
            
    # Logging the current results on validation.
    print('Validation-epoch %d. Avg-Loss: %.4f, Accuracy: %.4f' % 
          (epoch, cum_loss / len(valset), correct / len(valset)))
    
print('Confirming biases:\n%s\n' % bias)
toc = time.perf_counter()
print('%5d seconds' % ((toc - tic)/num_epochs))
        

Testing with biases:
tensor([[ 2.1986],
        [-1.1776],
        [11.7782],
        [ 3.4704],
        [-5.4980],
        [ 5.2877],
        [ 7.3121],
        [ 1.5822],
        [-1.6860],
        [ 9.9831]])

Train-epoch 0. Iteration 10000, Avg-Loss: 3.1818, Accuracy: 0.1583
Train-epoch 0. Iteration 20000, Avg-Loss: 2.9427, Accuracy: 0.1776
Train-epoch 0. Iteration 30000, Avg-Loss: 2.8110, Accuracy: 0.1958
Train-epoch 0. Iteration 40000, Avg-Loss: 2.7309, Accuracy: 0.2069
Train-epoch 0. Iteration 50000, Avg-Loss: 2.6747, Accuracy: 0.2151
Validation-epoch 0. Avg-Loss: 2.4220, Accuracy: 0.2559
Train-epoch 1. Iteration 10000, Avg-Loss: 2.4052, Accuracy: 0.2593
Train-epoch 1. Iteration 20000, Avg-Loss: 2.4094, Accuracy: 0.2587
Train-epoch 1. Iteration 30000, Avg-Loss: 2.3901, Accuracy: 0.2650
Train-epoch 1. Iteration 40000, Avg-Loss: 2.3806, Accuracy: 0.2687
Train-epoch 1. Iteration 50000, Avg-Loss: 2.3732, Accuracy: 0.2699
Validation-epoch 1. Avg-Loss: 2.3285, Accuracy: 0.2820
Train-e

# **Nearest Neighbor**

*(Never got this working)*



**Uncomment to install pykeops for Lazy Tensor**

In [336]:
#!pip install pykeops[full] > install.log

In [362]:
import numpy as np
import os
import torch.nn as nn
import torch.nn.functional as F
from pykeops.torch import LazyTensor
use_cuda = torch.cuda.is_available()
tensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor

In [363]:
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [376]:
# Data augmentation and normalization for training
# Just normalization for validation
transform = transforms.Compose(
        [transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4,
                                        shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                    download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                            shuffle=False, num_workers=2)


# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Send the model to GPU
model_ft = model_ft.to(device)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

Files already downloaded and verified
Files already downloaded and verified


In [394]:
x = tensor(trainset.data.astype('float32'))
y = tensor(testset.data.astype('int64'))

In [400]:
D = x.shape[1]
Ntrain, Ntest = (60000, 10000) if use_cuda else (1000, 100)
x_train, y_train = x[:Ntrain,:].contiguous(), y[:Ntrain].contiguous()
x_test,  y_test  = x[Ntrain:Ntrain+Ntest,:].contiguous(), y[Ntrain:Ntrain+Ntest].contiguous()

In [398]:
K = 3  # N.B.: K has very little impact on the running time

start = time.time()    # Benchmark:

X_i = LazyTensor(x_test[:, None, :])  #  test set
X_j = LazyTensor(x_train[None, :, :])  #  train set
D_ij = ((X_i - X_j) ** 2).sum(-1)  # (10000, 60000) symbolic matrix of squared L2 distances

ind_knn = D_ij.argKmin(K, dim=1)  # Samples <-> Dataset, (N_test, K)
lab_knn = y_train[ind_knn]  # (N_test, K) array of integers in [0,9]
y_knn, _ = lab_knn.mode()   # Compute the most likely label

if use_cuda: torch.cuda.synchronize()
end = time.time()

error = (y_knn != y_test).float().mean().item()
time  = end - start

print("{}-NN test error = {:.2f}% in {:.2f}s.".format(K, error*100, time))

ValueError: ignored