In [1]:
import torch
import numpy as np
import torchvision
import torchvision.datasets as datasets
import pandas as pd
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim

In [2]:
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)


testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


In [3]:
train_len = len(trainset)
test_len = len(testset)
index = list(range(train_len))
print(train_len, test_len)

50000 10000


In [4]:
#construct validation set (lets use 10 percent)
np.random.shuffle(index)
#number of blocks of data
split = int(0.1 * train_len)
train_index = index[split:]
validation_index = index[:split]
#Need to use a dataloader to control batch size and also enable SGD
train_loader = torch.utils.data.DataLoader(trainset, sampler = train_index, batch_size = 4, num_workers = 10)
validation_loader = torch.utils.data.DataLoader(trainset, sampler = validation_index)
test_loader = torch.utils.data.DataLoader(testset)

In [5]:
traindataiter = iter(train_loader)
trainimages, trainlabels = traindataiter.next()

In [6]:
class ModelA(nn.Module):
    def __init__(self):
        super(ModelA, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding = 1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3)
        self.fc1 = nn.Linear(15*15*128, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(F.max_pool2d(self.conv2(x),2))
        x = x.view(-1,15*15*128 )
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)
modelA = ModelA()

In [7]:
optimizer = optim.SGD(modelA.parameters(), lr = 0.003)

In [8]:
criterion = nn.CrossEntropyLoss()

In [9]:
for epoch in range(40): #loop over the data set 
    
    running_loss = 0.0
    for i, data in enumerate(train_loader,0):
        #get inputs; data is list of [inputs,labels]
        inputs, trainlabels = data
        
        #zero param gradients
        optimizer.zero_grad()
        
        #forward + backward + optimize
        outputs = modelA(inputs)
        loss = criterion(outputs, trainlabels)
        loss.backward()
        optimizer.step()
        
        #print stats
        running_loss += loss.item()
        if i % 2000 == 1999: #print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch+1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')

[1, 2000] loss: 2.002
[1, 4000] loss: 1.667
[1, 6000] loss: 1.523
[1, 8000] loss: 1.418
[1,10000] loss: 1.357
[2, 2000] loss: 1.262
[2, 4000] loss: 1.193
[2, 6000] loss: 1.155
[2, 8000] loss: 1.103
[2,10000] loss: 1.073
[3, 2000] loss: 1.011
[3, 4000] loss: 0.965
[3, 6000] loss: 0.931
[3, 8000] loss: 0.892
[3,10000] loss: 0.873
[4, 2000] loss: 0.826
[4, 4000] loss: 0.790
[4, 6000] loss: 0.756
[4, 8000] loss: 0.726
[4,10000] loss: 0.706
[5, 2000] loss: 0.662
[5, 4000] loss: 0.628
[5, 6000] loss: 0.591
[5, 8000] loss: 0.564
[5,10000] loss: 0.541
[6, 2000] loss: 0.494
[6, 4000] loss: 0.459
[6, 6000] loss: 0.420
[6, 8000] loss: 0.387
[6,10000] loss: 0.365
[7, 2000] loss: 0.314
[7, 4000] loss: 0.285
[7, 6000] loss: 0.245
[7, 8000] loss: 0.212
[7,10000] loss: 0.202
[8, 2000] loss: 0.160
[8, 4000] loss: 0.143
[8, 6000] loss: 0.135
[8, 8000] loss: 0.112
[8,10000] loss: 0.117
[9, 2000] loss: 0.098
[9, 4000] loss: 0.094
[9, 6000] loss: 0.102
[9, 8000] loss: 0.093
[9,10000] loss: 0.092
[10, 2000]

In [10]:
# To test modelA on the data
# getting predictions on test set and measuring the performance
correct_count, all_count = 0, 0
for inp,labels in test_loader:
  for i in range(len(labels)):
    with torch.no_grad():
        logps = modelA(inp)

    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Accuracy = 0.7152


In [11]:
torch.save(modelA.state_dict(), '/home/brian_chen/modelA.pth')

In [12]:
modelA = ModelA()
modelA.load_state_dict(torch.load('/home/brian_chen/modelA.pth'))

<All keys matched successfully>

In [13]:
class ModelB(nn.Module):
    def __init__(self):
        super(ModelB, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding = 1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3)
        self.conv3 = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv4 = nn.Conv2d(128,256, kernel_size = 4, padding = 1)
        self.fc1 = nn.Linear(7*7*256, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(F.max_pool2d(self.conv2(x),2))
        x = F.relu(self.conv3(x))
        x = F.relu(F.max_pool2d(self.conv4(x),2))
        x = x.view(-1,7*7*256 )
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

modelB = ModelB()

In [14]:
optimizer = optim.SGD(modelB.parameters(), lr = 0.003)

In [15]:
for epoch in range(40): #loop over the data set 
    
    running_loss = 0.0
    for i, data in enumerate(train_loader,0):
        #get inputs; data is list of [inputs,labels]
        inputs, trainlabels = data
        
        #zero param gradients
        optimizer.zero_grad()
        
        #forward + backward + optimize
        outputs = modelB(inputs)
        loss = criterion(outputs, trainlabels)
        loss.backward()
        optimizer.step()
        
        #print stats
        running_loss += loss.item()
        if i % 2000 == 1999: #print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch+1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')

[1, 2000] loss: 2.268
[1, 4000] loss: 1.968
[1, 6000] loss: 1.784
[1, 8000] loss: 1.637
[1,10000] loss: 1.557
[2, 2000] loss: 1.448
[2, 4000] loss: 1.368
[2, 6000] loss: 1.320
[2, 8000] loss: 1.256
[2,10000] loss: 1.222
[3, 2000] loss: 1.139
[3, 4000] loss: 1.071
[3, 6000] loss: 1.040
[3, 8000] loss: 0.988
[3,10000] loss: 0.963
[4, 2000] loss: 0.909
[4, 4000] loss: 0.860
[4, 6000] loss: 0.838
[4, 8000] loss: 0.793
[4,10000] loss: 0.780
[5, 2000] loss: 0.736
[5, 4000] loss: 0.698
[5, 6000] loss: 0.672
[5, 8000] loss: 0.630
[5,10000] loss: 0.620
[6, 2000] loss: 0.576
[6, 4000] loss: 0.539
[6, 6000] loss: 0.509
[6, 8000] loss: 0.467
[6,10000] loss: 0.459
[7, 2000] loss: 0.404
[7, 4000] loss: 0.375
[7, 6000] loss: 0.346
[7, 8000] loss: 0.302
[7,10000] loss: 0.294
[8, 2000] loss: 0.242
[8, 4000] loss: 0.230
[8, 6000] loss: 0.226
[8, 8000] loss: 0.198
[8,10000] loss: 0.193
[9, 2000] loss: 0.163
[9, 4000] loss: 0.161
[9, 6000] loss: 0.163
[9, 8000] loss: 0.153
[9,10000] loss: 0.164
[10, 2000]

In [16]:
# To test modelB on the data
# getting predictions on test set and measuring the performance
correct_count, all_count = 0, 0
for inp,labels in test_loader:
  for i in range(len(labels)):
    with torch.no_grad():
        logps = modelB(inp)

    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Accuracy = 0.7672


In [48]:
torch.save(modelB.state_dict(), '/home/brian_chen/modelB.pth')

In [49]:
modelB = ModelB()
modelB.load_state_dict(torch.load('/home/brian_chen/modelB.pth'))

<All keys matched successfully>

In [17]:
class ModelC(nn.Module):
    def __init__(self):
        super(ModelC, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding = 1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3)
        self.conv3 = nn.Conv2d(128, 128, kernel_size = 3, padding = 1)
        self.conv4 = nn.Conv2d(128, 256, kernel_size = 4, padding = 1)
        self.conv5 = nn.Conv2d(256, 256, kernel_size = 3, padding = 1)
        self.conv6 = nn.Conv2d(256,512, kernel_size = 2)
        self.fc1 = nn.Linear(6*6*512, 256)
        self.fc2 = nn.Linear(256, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(F.max_pool2d(self.conv2(x),2))
        x = F.relu(self.conv3(x))
        x = F.relu(F.max_pool2d(self.conv4(x),2))
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = x.view(-1,6*6*512)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

modelC = ModelC()

In [18]:
optimizer = optim.SGD(modelC.parameters(), lr = 0.003)

In [None]:
for epoch in range(40): #loop over the data set 
    
    running_loss = 0.0
    for i, data in enumerate(train_loader,0):
        #get inputs; data is list of [inputs,labels]
        inputs, trainlabels = data
        
        #zero param gradients
        optimizer.zero_grad()
        
        #forward + backward + optimize
        outputs = modelC(inputs)
        loss = criterion(outputs, trainlabels)
        loss.backward()
        optimizer.step()
        
        #print stats
        running_loss += loss.item()
        if i % 2000 == 1999: #print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch+1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')

[1, 2000] loss: 2.303
[1, 4000] loss: 2.302
[1, 6000] loss: 2.252
[1, 8000] loss: 2.016
[1,10000] loss: 1.873
[2, 2000] loss: 1.714
[2, 4000] loss: 1.620
[2, 6000] loss: 1.546
[2, 8000] loss: 1.483
[2,10000] loss: 1.456
[3, 2000] loss: 1.381
[3, 4000] loss: 1.326
[3, 6000] loss: 1.283
[3, 8000] loss: 1.243
[3,10000] loss: 1.235


In [58]:
# To test modelC on the data
# getting predictions on test set and measuring the performance
correct_count, all_count = 0, 0
for inp,labels in test_loader:
  for i in range(len(labels)):
    with torch.no_grad():
        logps = modelC(inp)

    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))

Number Of Images Tested = 10000

Model Accuracy = 0.6905


In [61]:
torch.save(modelC.state_dict(), '/home/brian_chen/modelC.pth')

In [62]:
modelC = ModelC()
modelC.load_state_dict(torch.load('/home/brian_chen/modelC.pth'))

<All keys matched successfully>

In [69]:
class ModelD(nn.Module):
    def __init__(self):
        super(ModelD, self).__init__()
        self.conv1 = nn.Conv2d(3, 8, kernel_size=3, padding = 1)
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3)
        self.conv3 = nn.Conv2d(16, 32, kernel_size = 3, padding = 1)
        self.conv4 = nn.Conv2d(32, 64, kernel_size = 4, padding = 1)
        self.conv5 = nn.Conv2d(64, 128, kernel_size = 3, padding = 1)
        self.conv6 = nn.Conv2d(128,256, kernel_size = 2)
        self.fc1 = nn.Linear(6*6*256, 256)
        self.fc2 = nn.Linear(256,128)
        self.fc3 = nn.Linear(128,64)
        self.fc4 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(F.max_pool2d(self.conv2(x),2))
        x = F.relu(self.conv3(x))
        x = F.relu(F.max_pool2d(self.conv4(x),2))
        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = x.view(-1,6*6*256)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1)

modelD = ModelD()

In [70]:
optimizer = optim.SGD(modelD.parameters(), lr = 0.003)

In [71]:
for epoch in range(40): #loop over the data set 
    
    running_loss = 0.0
    for i, data in enumerate(train_loader,0):
        #get inputs; data is list of [inputs,labels]
        inputs, trainlabels = data
        
        #zero param gradients
        optimizer.zero_grad()
        
        #forward + backward + optimize
        outputs = modelD(inputs)
        loss = criterion(outputs, trainlabels)
        loss.backward()
        optimizer.step()
        
        #print stats
        running_loss += loss.item()
        if i % 2000 == 1999: #print every 2000 mini-batches
            print('[%d,%5d] loss: %.3f' % (epoch+1, i + 1, running_loss / 2000))
            running_loss = 0.0
print('Finished Training')

[1, 2000] loss: 2.303
[1, 4000] loss: 2.304
[1, 6000] loss: 2.303
[1, 8000] loss: 2.303
[1,10000] loss: 2.303
[2, 2000] loss: 2.303
[2, 4000] loss: 2.303
[2, 6000] loss: 2.303
[2, 8000] loss: 2.303
[2,10000] loss: 2.303
[3, 2000] loss: 2.303
[3, 4000] loss: 2.303
[3, 6000] loss: 2.303
[3, 8000] loss: 2.303
[3,10000] loss: 2.303
[4, 2000] loss: 2.303
[4, 4000] loss: 2.303
[4, 6000] loss: 2.303
[4, 8000] loss: 2.303
[4,10000] loss: 2.303
[5, 2000] loss: 2.303
[5, 4000] loss: 2.303
[5, 6000] loss: 2.303
[5, 8000] loss: 2.303
[5,10000] loss: 2.303
[6, 2000] loss: 2.303
[6, 4000] loss: 2.303


KeyboardInterrupt: 

In [None]:
# To test modelD on the data
# getting predictions on test set and measuring the performance
correct_count, all_count = 0, 0
for inp,labels in validation_loader:
  for i in range(len(labels)):
    with torch.no_grad():
        logps = modelD(inp)

    
    ps = torch.exp(logps)
    probab = list(ps.numpy()[0])
    pred_label = probab.index(max(probab))
    true_label = labels.numpy()[i]
    if(true_label == pred_label):
      correct_count += 1
    all_count += 1

print("Number Of Images Tested =", all_count)
print("\nModel Accuracy =", (correct_count/all_count))


In [None]:
torch.save(modelD.state_dict(), '/home/brian_chen/modelD.pth')

In [None]:
modelD = ModelD()
modelD.load_state_dict(torch.load('/home/brian_chen/modelD.pth'))

In [None]:
# Freeze these models
for param in modelA.parameters():
    param.requires_grad_(False)

for param in modelB.parameters():
    param.requires_grad_(False)
    
for param in modelC.parameters():
    param.requires_grad_(False)

for param in modelD.parameters():
    param.requires_grad_(False)

In [None]:
class MyEnsemble(nn.Module):
    def __init__(self, modelA, modelB, modelC, modelD, nb_classes=10):
        super(MyEnsemble, self).__init__()
        self.modelA = modelA
        self.modelB = modelB
        self.modelC = modelC
        self.modelD = modelD
        # Remove last linear layer
        self.modelA.fc = nn.Identity()
        self.modelB.fc = nn.Identity()
        self.modelC.fc = nn.Identity()
        self.modelD.fc = nn.Identity()
        
        # Create new classifier
        self.classifier = nn.Linear(256+256+256, nb_classes)
        
    def forward(self, x):
        x1 = self.modelA(x.clone())  # clone to make sure x is not changed by inplace methods
        x1 = x1.view(x1.size(0), -1)
        x2 = self.modelB(x)
        x2 = x2.view(x2.size(0), -1)
        x3 = self.modelC(x)
        x3 = x3.view(x3.size(0), -1)
        x4 = self.modelD(x)
        x4 = x4.view(x4.size(0), -1)
        x = torch.cat((x1, x2, x3, x4), dim=1)
        
        x = self.classifier(F.relu(x))
        return x
    
modelD = MyEnsemble(modelA, modelB, modelC, modelD)