In [1]:
import torch 
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms

from copy import deepcopy

In [2]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable
import os



In [3]:
## hyperparameters:
num_epochs = 5
batch_size = 100
learning_rate = 0.001

### Load dataset: MNIST

In [4]:
train_dataset  = torchvision.datasets.MNIST(root='./data/',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)

test_dataset = torchvision.datasets.MNIST(root='./data/',
                          train=False,
                          transform=transforms.ToTensor())

train_loader  = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                         batch_size=batch_size,
                                         shuffle=False)

## Model

In [5]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=5, padding=2), #1 * 28 * 28 -> 16 * 28 * 28
            nn.BatchNorm2d(16), #16 * 28 * 28
            nn.ReLU(),                 #16 * 28 * 28
            nn.MaxPool2d(2))       # 16 * 14 * 14
        self.layer2  = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=5, padding=2), # 16 * 14 * 14 -> 32 * 14 * 14
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2))       # 32 * 7 * 7
        self.fc = nn.Linear(32 * 7 * 7, 10)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out
    
model = CNN()

## Train the model

In [6]:
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        images = Variable(images)
        labels = Variable(labels)
            
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
            
        if (i+1) % 100 == 0:
            print("epoch, ",epoch,"loss - ", loss, )

epoch,  0 loss -  tensor(0.2129, grad_fn=<NllLossBackward>)
epoch,  0 loss -  tensor(0.1357, grad_fn=<NllLossBackward>)
epoch,  0 loss -  tensor(0.1141, grad_fn=<NllLossBackward>)
epoch,  0 loss -  tensor(0.0289, grad_fn=<NllLossBackward>)
epoch,  0 loss -  tensor(0.1090, grad_fn=<NllLossBackward>)
epoch,  0 loss -  tensor(0.0122, grad_fn=<NllLossBackward>)
epoch,  1 loss -  tensor(0.0192, grad_fn=<NllLossBackward>)
epoch,  1 loss -  tensor(0.0466, grad_fn=<NllLossBackward>)
epoch,  1 loss -  tensor(0.0685, grad_fn=<NllLossBackward>)
epoch,  1 loss -  tensor(0.0942, grad_fn=<NllLossBackward>)
epoch,  1 loss -  tensor(0.0216, grad_fn=<NllLossBackward>)
epoch,  1 loss -  tensor(0.0841, grad_fn=<NllLossBackward>)
epoch,  2 loss -  tensor(0.0079, grad_fn=<NllLossBackward>)
epoch,  2 loss -  tensor(0.0113, grad_fn=<NllLossBackward>)
epoch,  2 loss -  tensor(0.0224, grad_fn=<NllLossBackward>)
epoch,  2 loss -  tensor(0.0685, grad_fn=<NllLossBackward>)
epoch,  2 loss -  tensor(0.0087, grad_fn

In [7]:
print("Model's state_dict:")

for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())

print()

Model's state_dict:
layer1.0.weight 	 torch.Size([16, 1, 5, 5])
layer1.0.bias 	 torch.Size([16])
layer1.1.weight 	 torch.Size([16])
layer1.1.bias 	 torch.Size([16])
layer1.1.running_mean 	 torch.Size([16])
layer1.1.running_var 	 torch.Size([16])
layer1.1.num_batches_tracked 	 torch.Size([])
layer2.0.weight 	 torch.Size([32, 16, 5, 5])
layer2.0.bias 	 torch.Size([32])
layer2.1.weight 	 torch.Size([32])
layer2.1.bias 	 torch.Size([32])
layer2.1.running_mean 	 torch.Size([32])
layer2.1.running_var 	 torch.Size([32])
layer2.1.num_batches_tracked 	 torch.Size([])
fc.weight 	 torch.Size([10, 1568])
fc.bias 	 torch.Size([10])



In [8]:
for param_tensor in model.state_dict():
    print("\n\n\n",  model.state_dict()[param_tensor])





 tensor([[[[-1.9420e-01, -1.6090e-01, -5.9206e-02,  1.1052e-01,  9.3989e-02],
          [-1.4308e-01, -1.9076e-01, -1.8913e-01, -2.2099e-01, -1.3505e-02],
          [ 2.3600e-01,  1.2886e-02,  1.2736e-01,  1.0169e-01, -2.0292e-01],
          [-7.1781e-02,  1.8351e-01, -3.5066e-02, -7.1446e-02, -7.6804e-02],
          [ 2.6572e-02,  1.7802e-01, -6.9484e-02,  5.0888e-02,  1.4504e-01]]],


        [[[-1.9182e-01, -9.4777e-03, -6.2016e-02,  1.0417e-01,  1.7844e-02],
          [-4.5870e-02,  1.5727e-01, -4.4323e-02, -2.2859e-01,  1.4811e-01],
          [ 7.6114e-02,  9.2376e-02,  4.5873e-02, -2.2029e-01,  1.7676e-01],
          [ 1.8385e-01, -1.4945e-02, -1.2280e-01, -2.2663e-01,  8.3318e-02],
          [-1.2067e-01, -1.3029e-01, -1.4485e-01,  4.8450e-02,  9.3317e-02]]],


        [[[-1.9398e-01, -2.5151e-01, -1.2296e-01, -7.9709e-02, -2.3674e-01],
          [-6.5184e-02,  3.9264e-02,  6.8452e-02, -2.7957e-02, -2.1663e-01],
          [-7.6722e-02,  2.1158e-01, -9.1162e-02,  9.3339e-02, -

## Test the model

In [10]:
         
# Test the model
model.eval()  # eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
       # images = images.to(device)
       # labels = labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

# Save the model checkpoint
torch.save(model.state_dict(), 'model.ckpt')

Test Accuracy of the model on the 10000 test images: 98.74 %


In [None]:
PATH = './model.pth'
torch.save(model.state_dict(), PATH)