In [2]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader

from torchvision.datasets import MNIST
from torchvision.transforms import Compose,ToTensor

import numpy as np
import matplotlib.pyplot as plt
import PIL

#### DNN model

In [3]:
class MNISTDNN(nn.Module):
    def __init__(self,IMG_SIZE=28):
        super(MNISTDNN,self).__init__()
        self.fc1 = nn.Linear(IMG_SIZE*IMG_SIZE,32)
        self.BN1 = torch.nn.BatchNorm1d(32)
        self.fc2 = nn.Linear(32,10)

    def forward(self,x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.BN1(x)
        x = self.fc2(x)
        x = torch.softmax(x,dim=-1)
        return x

#### CNN model

In [4]:
class MNISTCNN(nn.Module):
    def __init__(self,IMG_SIZE=28):
        super(MNISTCNN,self).__init__()
        self.conv1 = nn.Conv2d(1,8,5,stride=2)
        self.BN1 = torch.nn.BatchNorm2d(8)
        self.conv2 = nn.Conv2d(8,8,5,stride=2)
        self.BN2 = torch.nn.BatchNorm2d(8)
        self.conv3 = nn.Conv2d(8,8,3,stride=1)
        self.fc = nn.Linear(8*2*2,10)

    def forward(self,x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.BN1(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = self.BN2(x)
        x = self.conv3(x)
        x = F.relu(x)
        x = x.view(-1,8*2*2)
        x = self.fc(x)
        x = torch.softmax(x,dim=-1)
        return x

#### Util function for calculating accuracy

In [5]:
def compute_acc(argmax,y):
    count = 0
    for i in range(len(argmax)):
        if argmax[i]==y[i]:
            count+=1
    return count / len(argmax)

#### hyperparameters & datasets

In [6]:
IMG_SIZE = 28
BATCH_SIZE = 256
LEARNING_RATE = 0.001
NUM_EPOCHES = 5

In [7]:
transforms = Compose([
    ToTensor(),
])

trainset = MNIST('./MNIST_models/',train=True,transform=transforms,download=True)
testset = MNIST('./MNIST_models/',train=False,transform=transforms,download=True)

args = {
    'num_workers' : 1,
    'batch_size' : BATCH_SIZE,
    'shuffle' : True,
}

train_loader = DataLoader(trainset,**args)
test_loader = DataLoader(testset,**args)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./MNIST_models/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:03<00:00, 3283723.92it/s]


Extracting ./MNIST_models/MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST_models/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./MNIST_models/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 147502.32it/s]


Extracting ./MNIST_models/MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST_models/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./MNIST_models/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:01<00:00, 1183499.64it/s]


Extracting ./MNIST_models/MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST_models/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST_models/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3883899.85it/s]

Extracting ./MNIST_models/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST_models/MNIST/raw






####Training part(DNN)

In [8]:
model = MNISTDNN(IMG_SIZE).cuda()

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
num_params = sum([np.prod(p.size()) for p in model_parameters])
print("number of parameters : {}".format(num_params))

optimizer = Adam(model.parameters(),lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(NUM_EPOCHES):
    tot_loss = 0.0

    for x,y in train_loader:
        optimizer.zero_grad()
        x = x.cuda().view(-1,IMG_SIZE*IMG_SIZE)
        y_ = model(x)
        loss = loss_fn(y_, y.cuda())
        loss.backward()
        tot_loss+=loss.item()
        optimizer.step()

    print("Epoch {}, Loss(train) : {}".format(epoch+1,tot_loss/BATCH_SIZE))
    if epoch % 2 == 1:
        x,y = next(iter(test_loader))
        x = x.cuda().view(-1,IMG_SIZE*IMG_SIZE)
        y_ = model(x)
        print(y_)
        _, argmax = torch.max(y_,dim=-1)
        test_acc = compute_acc(argmax,y.numpy())

        print("Acc(val) : {}".format(test_acc))

torch.save(model.state_dict(), "./MNIST_models/DNN.pt")

number of parameters : 25514
Epoch 1, Loss(train) : 1.579682664014399
Epoch 2, Loss(train) : 1.437080874107778
tensor([[1.6052e-04, 9.7494e-01, 2.2621e-03,  ..., 1.3216e-02, 1.3025e-03,
         4.4931e-03],
        [6.2200e-04, 1.1849e-03, 4.7773e-04,  ..., 1.8310e-04, 9.7208e-01,
         1.4159e-03],
        [3.3658e-05, 3.3361e-04, 9.9608e-01,  ..., 1.0234e-04, 3.7553e-05,
         2.4834e-05],
        ...,
        [1.7853e-04, 1.4881e-05, 2.3797e-04,  ..., 2.5855e-05, 9.9546e-01,
         3.8620e-05],
        [8.3516e-06, 9.9745e-01, 4.7626e-05,  ..., 2.6870e-04, 2.9431e-04,
         3.5997e-04],
        [7.0744e-04, 2.7514e-04, 2.4541e-03,  ..., 1.4005e-03, 6.0840e-04,
         3.2498e-04]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
Acc(val) : 0.95703125
Epoch 3, Loss(train) : 1.4089618208818138
Epoch 4, Loss(train) : 1.3955695256590843
tensor([[2.2614e-06, 6.5287e-09, 1.0239e-04,  ..., 6.9148e-07, 5.5957e-05,
         3.4548e-08],
        [3.7404e-03, 3.5711e-04, 1.8762e-04, 

In [9]:
list(model.parameters())[3].requires_grad

True

In [10]:
list(model.parameters())[0].size()

torch.Size([32, 784])

In [11]:
np.prod(list(model.parameters())[0].size())

25088

In [12]:
# self.fc1(x) 784 * 32
# F.relu(x) 32
# self.BN1(x) 32
# self.fc2(x) 10 * 32
# torch.softmax(x,dim=-1) 10

for i in model.parameters():
  print(i.size())

torch.Size([32, 784])
torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([10, 32])
torch.Size([10])


In [13]:
model_test = MNISTDNN(IMG_SIZE).cuda()
model_test.load_state_dict(torch.load("./MNIST_models/DNN.pt"))
model_test.eval()
x,y = next(iter(test_loader))
x = x.cuda().view(-1,IMG_SIZE*IMG_SIZE)
y_ = model_test(x)
_, argmax = torch.max(y_,dim=-1)
test_acc = compute_acc(argmax,y.numpy())

print("Acc(test) : {}".format(test_acc))

  model_test.load_state_dict(torch.load("./MNIST_models/DNN.pt"))


Acc(test) : 0.95703125


#### Training part(CNN)

In [14]:
model = MNISTCNN(IMG_SIZE).cuda()

model_parameters = filter(lambda p: p.requires_grad, model.parameters())
num_params = sum([np.prod(p.size()) for p in model_parameters])
print("number of parameters : {}".format(num_params))

optimizer = Adam(model.parameters(),lr=LEARNING_RATE)
loss_fn = nn.CrossEntropyLoss()

for epoch in range(NUM_EPOCHES):
    tot_loss = 0.0

    for x,y in train_loader:
        optimizer.zero_grad()
        x = x.cuda()
        y_ = model(x)
        loss = loss_fn(y_, y.cuda())
        loss.backward()
        tot_loss+=loss.item()
        optimizer.step()

    print("Epoch {}, Loss(train) : {}".format(epoch+1,tot_loss/BATCH_SIZE))
    if epoch % 2 == 1:
        model.eval()

        x,y = next(iter(test_loader))
        x = x.cuda()
        y_ = model(x)
        _, argmax = torch.max(y_,dim=-1)
        test_acc = compute_acc(argmax,y.numpy())

        print("Acc(test) : {}".format(test_acc))

        model.train()

torch.save(model.state_dict(), "./MNIST_models/CNN.pt")

number of parameters : 2762
Epoch 1, Loss(train) : 1.6857146392576396
Epoch 2, Loss(train) : 1.4118502708151937
Acc(test) : 0.95703125
Epoch 3, Loss(train) : 1.3880043840035796
Epoch 4, Loss(train) : 1.3786680148914456
Acc(test) : 0.9765625
Epoch 5, Loss(train) : 1.3733413200825453


In [15]:
model_test = MNISTCNN(IMG_SIZE).cuda()
model_test.load_state_dict(torch.load("./MNIST_models/CNN.pt"))
model_test.eval()
x,y = next(iter(test_loader))
x = x.cuda()
y_ = model_test(x)
_, argmax = torch.max(y_,dim=-1)
test_acc = compute_acc(argmax,y.numpy())

print("Acc(test) : {}".format(test_acc))

  model_test.load_state_dict(torch.load("./MNIST_models/CNN.pt"))


Acc(test) : 0.9765625


In [16]:
for i in model.parameters():
  print(i.size())

torch.Size([8, 1, 5, 5])
torch.Size([8])
torch.Size([8])
torch.Size([8])
torch.Size([8, 8, 5, 5])
torch.Size([8])
torch.Size([8])
torch.Size([8])
torch.Size([8, 8, 3, 3])
torch.Size([8])
torch.Size([10, 32])
torch.Size([10])
