CIFAR-10 contains 60000 labeled for 10 classes images 32x32 in size, train set has 50000 and test set 10000

The categories are: airplane, automobile, bird, cat, deer, dog, frog, horse, ship and truck.
More information regarding the CIFAR-10 and CIFAR-100 data sets can be found here.

In [1]:
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn.functional as F
import numpy as np
import torch.utils.data as td
import random,time
import matplotlib.pyplot as plt
num_epochs = 30

def cifar_loaders(batch_size, shuffle_test=False): 
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.225, 0.225, 0.225])
    train = datasets.CIFAR10('./', train=True, download=True, 
        transform=transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize,
        ]))
    test = datasets.CIFAR10('./', train=False, 
        transform=transforms.Compose([transforms.ToTensor(), normalize]))
    train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size,
        shuffle=True, pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size,
        shuffle=shuffle_test, pin_memory=True)
    return train_loader, test_loader

batch_size = 64
test_batch_size = 64
input_size = 3072

train_loader, _ = cifar_loaders(batch_size)
_, test_loader = cifar_loaders(test_batch_size)



Files already downloaded and verified
Files already downloaded and verified


In [2]:
import torch.nn as nn
import torch.nn.functional as F

class SevenLayerFC_Net(nn.Module):
    def __init__(self, D_in,H,D_out):
        """
        In the constructor we instantiate three nn.Linear modules and assign them as
        member variables.
        """
        super(SevenLayerFC_Net, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, H)
        self.linear3 = torch.nn.Linear(H, H)
        self.linear4 = torch.nn.Linear(H, H)
        self.linear5 = torch.nn.Linear(H, H)
        self.linear6 = torch.nn.Linear(H, H)
        self.linear7 = torch.nn.Linear(H, D_out)

        
    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        x = F.relu(self.linear1(x))
        x = F.relu(self.linear2(x))
        x = F.relu(self.linear3(x))
        x = F.relu(self.linear4(x))
        x = F.relu(self.linear5(x))
        x = F.relu(self.linear6(x))
        x = self.linear7(x)
        return F.log_softmax(x)  
      

In [3]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = batch_size, input_size, 200, 10
num_epochs = 30

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Construct our model by instantiating the class defined above
model = SevenLayerFC_Net(D_in, H, D_out)
#print(model)

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(num_epochs):
    avg_loss_epoch = 0
    batch_loss = 0
    total_batches = 0

    for i, (images, labels) in enumerate(train_loader):
        # Reshape images to (batch_size, input_size)

        images = images.reshape(-1, 32*32*3)            

        #print(images.shape)
        outputs = model(images)

        loss = criterion(outputs, labels)    
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()   

        total_batches += 1     
        batch_loss += loss.item()

    avg_loss_epoch = batch_loss/total_batches
    print ('Epoch [{}/{}], Averge Loss:for epoch[{}, {:.4f}]' 
                   .format(epoch+1, num_epochs, epoch+1, avg_loss_epoch ))





Epoch [1/30], Averge Loss:for epoch[1, 2.1157]
Epoch [2/30], Averge Loss:for epoch[2, 1.8126]
Epoch [3/30], Averge Loss:for epoch[3, 1.6854]
Epoch [4/30], Averge Loss:for epoch[4, 1.6224]
Epoch [5/30], Averge Loss:for epoch[5, 1.5679]
Epoch [6/30], Averge Loss:for epoch[6, 1.5314]
Epoch [7/30], Averge Loss:for epoch[7, 1.5056]
Epoch [8/30], Averge Loss:for epoch[8, 1.4760]
Epoch [9/30], Averge Loss:for epoch[9, 1.4560]
Epoch [10/30], Averge Loss:for epoch[10, 1.4384]
Epoch [11/30], Averge Loss:for epoch[11, 1.4201]
Epoch [12/30], Averge Loss:for epoch[12, 1.4022]
Epoch [13/30], Averge Loss:for epoch[13, 1.3893]
Epoch [14/30], Averge Loss:for epoch[14, 1.3797]
Epoch [15/30], Averge Loss:for epoch[15, 1.3673]
Epoch [16/30], Averge Loss:for epoch[16, 1.3551]
Epoch [17/30], Averge Loss:for epoch[17, 1.3444]
Epoch [18/30], Averge Loss:for epoch[18, 1.3328]
Epoch [19/30], Averge Loss:for epoch[19, 1.3240]
Epoch [20/30], Averge Loss:for epoch[20, 1.3162]
Epoch [21/30], Averge Loss:for epoch[2

In [4]:
# Test the Model
correct = 0.
total = 0.
for images, labels in test_loader:
    images = images.reshape(-1, 3*32*32)
    #print(labels)
    outputs_test = model(images)
    _, predicted = torch.max(outputs_test.data, 1)
    #print(predicted)
    total += labels.size(0) 
    
    correct += (predicted == labels).sum().item()
    
print('Accuracy of the network on the 10000 test images: %d %%' % (     100 * correct / total))
       



Accuracy of the network on the 10000 test images: 53 %


In [16]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])
print(params)

817610


In [8]:
print(model)

SevenLayerFC_Net(
  (linear1): Linear(in_features=3072, out_features=200, bias=True)
  (linear2): Linear(in_features=200, out_features=200, bias=True)
  (linear3): Linear(in_features=200, out_features=200, bias=True)
  (linear4): Linear(in_features=200, out_features=200, bias=True)
  (linear5): Linear(in_features=200, out_features=200, bias=True)
  (linear6): Linear(in_features=200, out_features=200, bias=True)
  (linear7): Linear(in_features=200, out_features=10, bias=True)
)


In [11]:
class SevenLayerFC_NoRelu_Net(nn.Module):
    def __init__(self, D_in,H,D_out):
        """
        In the constructor we instantiate three nn.Linear modules and assign them as
        member variables.
        """
        super(SevenLayerFC_NoRelu_Net, self).__init__()
        self.linear1 = torch.nn.Linear(D_in, H)
        self.linear2 = torch.nn.Linear(H, H)
        self.linear3 = torch.nn.Linear(H, H)
        self.linear4 = torch.nn.Linear(H, H)
        self.linear5 = torch.nn.Linear(H, H)
        self.linear6 = torch.nn.Linear(H, H)
        self.linear7 = torch.nn.Linear(H, D_out)

        
    def forward(self, x):
        """
        In the forward function we accept a Tensor of input data and we must return
        a Tensor of output data. We can use Modules defined in the constructor as
        well as arbitrary operators on Tensors.
        """
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.linear3(x)
        x = self.linear4(x)
        x = self.linear5(x)
        x = self.linear6(x)
        x = self.linear7(x)
        return F.log_softmax(x)  
  


In [12]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N, D_in, H, D_out = batch_size, input_size, 200, 10
num_epochs = 30

# Create random Tensors to hold inputs and outputs
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)

# Construct our model by instantiating the class defined above
model2 = SevenLayerFC_NoRelu_Net(D_in, H, D_out)
#print(model)

# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model2.parameters(), lr=0.01, momentum=0.9)

for epoch in range(num_epochs):
    avg_loss_epoch = 0
    batch_loss = 0
    total_batches = 0

    for i, (images, labels) in enumerate(train_loader):
        # Reshape images to (batch_size, input_size)

        images = images.reshape(-1, 32*32*3)            

        #print(images.shape)
        outputs = model2(images)

        loss = criterion(outputs, labels)    
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()   

        total_batches += 1     
        batch_loss += loss.item()

    avg_loss_epoch = batch_loss/total_batches
    print ('Epoch [{}/{}], Averge Loss:for epoch[{}, {:.4f}]' 
                   .format(epoch+1, num_epochs, epoch+1, avg_loss_epoch ))





Epoch [1/30], Averge Loss:for epoch[1, 1.9793]
Epoch [2/30], Averge Loss:for epoch[2, 1.9217]
Epoch [3/30], Averge Loss:for epoch[3, 1.9143]
Epoch [4/30], Averge Loss:for epoch[4, 1.9092]
Epoch [5/30], Averge Loss:for epoch[5, 1.9027]
Epoch [6/30], Averge Loss:for epoch[6, 1.9002]
Epoch [7/30], Averge Loss:for epoch[7, 1.8993]
Epoch [8/30], Averge Loss:for epoch[8, 1.8983]
Epoch [9/30], Averge Loss:for epoch[9, 1.8988]
Epoch [10/30], Averge Loss:for epoch[10, 1.8972]
Epoch [11/30], Averge Loss:for epoch[11, 1.8939]
Epoch [12/30], Averge Loss:for epoch[12, 1.8953]
Epoch [13/30], Averge Loss:for epoch[13, 1.8909]
Epoch [14/30], Averge Loss:for epoch[14, 1.8920]
Epoch [15/30], Averge Loss:for epoch[15, 1.8914]
Epoch [16/30], Averge Loss:for epoch[16, 1.8878]
Epoch [17/30], Averge Loss:for epoch[17, 1.8886]
Epoch [18/30], Averge Loss:for epoch[18, 1.8890]
Epoch [19/30], Averge Loss:for epoch[19, 1.8892]
Epoch [20/30], Averge Loss:for epoch[20, 1.8893]
Epoch [21/30], Averge Loss:for epoch[2

In [13]:
# Test the Model
correct = 0.
total = 0.
for images, labels in test_loader:
    images = images.reshape(-1, 3*32*32)
    #print(labels)
    outputs_test = model2(images)
    _, predicted = torch.max(outputs_test.data, 1)
    #print(predicted)
    total += labels.size(0) 
    
    correct += (predicted == labels).sum().item()
    
print('Accuracy of the network on the 10000 test images: %d %%' % (     100 * correct / total))
       



Accuracy of the network on the 10000 test images: 36 %


In [2]:
class Conv7Net(nn.Module):
    def __init__(self):
        super(Conv7Net, self).__init__()
        #Layer 1 - Output dimension - 28*28*8
        # Conv2d (input channel, output channel, kernel size)
        self.conv1 = nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=1)
        #self.pool1 = nn.MaxPool2d(2,2)
        
        #Layer 2  - Output dimension - 24*24*16
        self.conv2 = nn.Conv2d(64, 128, kernel_size=5, stride=1, padding=1)
        #self.pool2 = nn.MaxPool2d(3)

        #Layer 3  - Output dimension - 20*20*16
        self.conv3 = nn.Conv2d(128, 64, 7)
        #self.pool3 = nn.MaxPool2d(3)
        
        #Layer 4  - Output dimension - 16*16*16
        self.conv4 = nn.Conv2d(64, 16, 7)
        #self.pool4 = nn.MaxPool2d(3)
        
        #Layer 5, 6, and 7
        self.fc1 = nn.Linear(16*16*16,2048)
        self.fc2 = nn.Linear(2048,1024)
        self.fc3 = nn.Linear(1024,10)
    
    def forward(self, input):
        x = F.relu(self.conv1(input))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
                
        #flatten the tensor for the FC
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return F.log_softmax(x, dim=1)  

net = Conv7Net()
print(net)
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

Conv7Net(
  (conv1): Conv2d(3, 64, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(128, 64, kernel_size=(7, 7), stride=(1, 1))
  (conv4): Conv2d(64, 16, kernel_size=(7, 7), stride=(1, 1))
  (fc1): Linear(in_features=4096, out_features=2048, bias=True)
  (fc2): Linear(in_features=2048, out_features=1024, bias=True)
  (fc3): Linear(in_features=1024, out_features=10, bias=True)
)
14
torch.Size([64, 3, 5, 5])


In [3]:
# Construct our model by instantiating the class defined above
ConvModel = Conv7Net()
num_epochs = 100
#print(model)


# Construct our loss function and an Optimizer. The call to model.parameters()
# in the SGD constructor will contain the learnable parameters of the two
# nn.Linear modules which are members of the model.
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(ConvModel.parameters(), lr=0.001, momentum=0.9)

for epoch in range(num_epochs):
    avg_loss_epoch = 0
    batch_loss = 0
    total_batches = 0

    for i, (images, labels) in enumerate(train_loader):
        # Reshape images to (batch_size, input_size)

        #images = images.reshape(-1, 32*32*3)            

        #print(images.shape)
        outputs = ConvModel(images)

        loss = criterion(outputs, labels)    
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()   

        total_batches += 1     
        batch_loss += loss.item()

    avg_loss_epoch = batch_loss/total_batches
    print ('Epoch [{}/{}], Averge Loss:for epoch[{}, {:.4f}]' 
                   .format(epoch+1, num_epochs, epoch+1, avg_loss_epoch ))




KeyboardInterrupt: 

In [3]:
torch.save(ConvModel, "./models/ConvModel74")

# Then later:
#ConvModel = torch.load("./models/ConvModel")
state = {
    'epoch': num_epochs,
    'state_dict': ConvModel.state_dict(),
    'optimizer': optimizer.state_dict(),
   }
torch.save(state, "./models/ConvModelStateDict74")

#To resume training you would do things like: state = torch.load(filepath), and then, 
#o restore the state of each individual object, something like this:
#ConvModel.load_state_dict(state['state_dict'])
#optimizer.load_state_dict(state['optimizer'])

NameError: name 'ConvModel' is not defined

In [17]:
# Test the Model
correct = 0.
total = 0.
predicted = 0.
for images, labels in test_loader:
    #images = images.reshape(-1, 3*32*32)
    #print(labels)
    outputs_Conv_test = ConvModel(images)
    _, predicted = torch.max(outputs_Conv_test.data, 1)
    #print(predicted)
    total += labels.size(0) 
    
    correct += (predicted == labels).sum().item()
    
print('Accuracy of the network on the 10000 test images: %d %%' % (     100 * correct / total))
       

Accuracy of the network on the 10000 test images: 74 %


In [14]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0
