
Convolutional Neural Networks
===============




In [1]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
%matplotlib inline
# Device configuration
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# Hyper parameters
num_epochs = 2
num_classes = 10
batch_size = 100
learning_rate = 0.001

# input images of shape 1 x 28 x 28
class ConvNet(nn.Module):
    def __init__(self, num_classes=10):
        super(ConvNet, self).__init__()
        # self.weights=torch.randn(32,16,5,5)    
        # self.weights.requires_grad=True
        
        self.conv1=nn.Conv2d(1, 16, kernel_size=5, stride=(1,1), padding=2)
        self.layer1 = nn.Sequential(
            self.conv1,
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))
        
        self.fc = nn.Linear(7*7*32, num_classes)
        
    def forward(self, x):
        out = self.layer1(x)
        # out=torch.nn.functional.conv2d(out,self.weights, bias=None, stride=1, padding=2) ##
        out = self.layer2(out)
        out = out.reshape(out.size(0), -1)
        out = self.fc(out)
        return out
model = ConvNet(num_classes).to(device)


The learnable parameters of a model are returned by model.parameters()

In [2]:
params = list(model.parameters())
print('number of learnable parameters=', len(params))
print(params[0]) # conv1's .weight

number of learnable parameters= 10
Parameter containing:
tensor([[[[-0.1703, -0.0844, -0.1433,  0.0014,  0.1380],
          [ 0.0358, -0.1088,  0.0381,  0.0743,  0.1498],
          [ 0.0185,  0.0302,  0.0579,  0.1465, -0.1374],
          [-0.1229, -0.0961, -0.0668,  0.0532,  0.0851],
          [ 0.0240,  0.0054,  0.1965,  0.0847,  0.1884]]],


        [[[ 0.1330,  0.1046,  0.1868, -0.1849,  0.1108],
          [ 0.1597, -0.0141, -0.1538,  0.1554, -0.1292],
          [ 0.1856,  0.0433,  0.1666,  0.1571, -0.1783],
          [ 0.0915, -0.1579, -0.1862, -0.1617, -0.0913],
          [ 0.1584, -0.1019, -0.0858, -0.1443, -0.1747]]],


        [[[ 0.0033, -0.0575, -0.1132, -0.1870,  0.0853],
          [ 0.0093, -0.0515, -0.1398, -0.0230, -0.1095],
          [ 0.1656, -0.1975,  0.0288,  0.1076,  0.1007],
          [ 0.0604, -0.1646, -0.0911,  0.0429,  0.1083],
          [-0.1028,  0.1886,  0.1499, -0.0661,  0.0636]]],


        [[[ 0.1489, -0.0294,  0.1929, -0.1604, -0.1208],
          [-0.1812,

Let try a random 28 x 28 input Note: Expected input size to this model is 28 x 28

In [3]:
input = torch.randn(1, 1, 28, 28)
out = model(input)
print(out)

tensor([[-0.5849, -0.0414, -0.0816, -0.4699, -0.0023, -0.1852,  0.1841, -0.2544,
          0.0309, -0.0315]], grad_fn=<ThAddmmBackward>)


In [4]:
output = model(input)
target = torch.randn(10)  # a dummy target, for example
# print(target.shape)
target = target.view(1, -1)  # make it the same shape as output
# print(target.shape)
criterion = nn.MSELoss()

loss = criterion(output, target)
print('loss=  ',loss)
print('conv1.bias.grad before backward')
print(model.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(model.conv1.bias.grad)

loss=   tensor(1.0605, grad_fn=<MseLossBackward>)
conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([ 4.7221e-09,  4.4936e-09,  4.0705e-08,  6.3246e-09,  2.3683e-09,
         1.1321e-08, -4.4747e-09,  5.3355e-08,  1.6954e-08,  6.0536e-09,
         1.1752e-08, -1.7186e-08, -1.8908e-09, -1.6113e-08,  1.9921e-08,
        -4.2783e-08])


In [7]:
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(root='../../data/',
                                           train=True, 
                                           transform=transforms.ToTensor())

test_dataset = torchvision.datasets.MNIST(root='../../data/',
                                          train=False, 
                                          transform=transforms.ToTensor())
print('train_dataset  \n',train_dataset)
print('test_dataset  \n',test_dataset)
# Data loader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size, 
                                          shuffle=False)


train_dataset  
 Dataset MNIST
    Number of datapoints: 60000
    Split: train
    Root Location: ../../data/
    Transforms (if any): ToTensor()
    Target Transforms (if any): None
test_dataset  
 Dataset MNIST
    Number of datapoints: 10000
    Split: test
    Root Location: ../../data/
    Transforms (if any): ToTensor()
    Target Transforms (if any): None


In [8]:

model = ConvNet(num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Train the model
total_step = len(train_loader)
for epoch in range(1):
    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

# Test the model
model.eval()  # eval mode 
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

Epoch [1/2], Step [100/600], Loss: 0.1371
Epoch [1/2], Step [200/600], Loss: 0.1241
Epoch [1/2], Step [300/600], Loss: 0.0972
Epoch [1/2], Step [400/600], Loss: 0.0606
Epoch [1/2], Step [500/600], Loss: 0.0521
Epoch [1/2], Step [600/600], Loss: 0.0714
Test Accuracy of the model on the 10000 test images: 98.12 %
