In [None]:
import torch
import numpy
import matplotlib.pyplot as plt
import torch.nn as nn 
import torchvision
import torchvision.transforms as transforms
from torchsummary import summary

In [None]:

batch_size = 64
imput_size = 784 # 28x28
num_classes = 10
num_epochs = 10
learning_rate = 0.01
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
train_data = torchvision.datasets.MNIST(root='./data' , train=True,transform=transforms.ToTensor(),download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [None]:
test_data = torchvision.datasets.MNIST(root='./data' , train=False,transform=transforms.ToTensor(),download=True)

In [None]:
train_data_loader = torch.utils.data.DataLoader(dataset=train_data,batch_size=batch_size,shuffle=False)
test_data_loader = torch.utils.data.DataLoader(dataset=test_data,batch_size=batch_size,shuffle=False)


In [None]:
class LeNet(nn.Module):
  def __init__(self , num_classes):
    super(LeNet,self).__init__()
    self.layer1 = nn.Sequential(nn.Conv2d(1,6,kernel_size=5 , stride=1 , padding=2),
                                nn.BatchNorm2d(6),
                                nn.ELU(),
                                nn.MaxPool2d(kernel_size=2 , stride =2)
                                )
    self.layer2 = nn.Sequential(nn.Conv2d(6,16,kernel_size=5,stride=1,padding=0),
                                nn.BatchNorm2d(16),
                                nn.ELU(),
                                nn.MaxPool2d(kernel_size=2,stride=2),
                                )
    self.fc1 = nn.Linear(400,120)
    self.elu = nn.ELU()
    self.fc2 = nn.Linear(120,84)
    self.elu1 = nn.ELU()
    self.fc3 = nn.Linear(84,num_classes)

  def forward(self,x):
    out = self.layer1(x)
    out = self.layer2(out)
    out = out.reshape(out.size(0),-1)
    out = self.fc1(out)
    out = self.elu(out)
    out = self.fc2(out)
    out = self.elu1(out)
    out = self.fc3(out)
    return out


In [None]:
model = LeNet(num_classes).to(device)
cost_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)

total_steps = len(train_data_loader)



In [None]:
#training

for epoch in range(num_epochs):
  for i, (images,labels) in enumerate(train_data_loader):
    images = images.to(device)
    labels = labels.to(device)

    outputs = model(images)
    loss = cost_function(outputs , labels)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (i+1) % 400 == 0:
      print("Epoch [{}/{}], Step[{}/{}],Loss:{:.4f}".format(epoch+1, num_epochs,i+1,total_steps , loss.item()))

Epoch [1/10], Step[400/938],Loss:0.3481
Epoch [1/10], Step[800/938],Loss:0.0792
Epoch [2/10], Step[400/938],Loss:0.1938
Epoch [2/10], Step[800/938],Loss:0.1192
Epoch [3/10], Step[400/938],Loss:0.2349
Epoch [3/10], Step[800/938],Loss:0.0195
Epoch [4/10], Step[400/938],Loss:0.3017
Epoch [4/10], Step[800/938],Loss:0.0056
Epoch [5/10], Step[400/938],Loss:0.2916
Epoch [5/10], Step[800/938],Loss:0.0458
Epoch [6/10], Step[400/938],Loss:0.3254
Epoch [6/10], Step[800/938],Loss:0.0000
Epoch [7/10], Step[400/938],Loss:0.3128
Epoch [7/10], Step[800/938],Loss:0.0004
Epoch [8/10], Step[400/938],Loss:0.1164
Epoch [8/10], Step[800/938],Loss:0.0217
Epoch [9/10], Step[400/938],Loss:0.4303
Epoch [9/10], Step[800/938],Loss:0.0003
Epoch [10/10], Step[400/938],Loss:0.3227
Epoch [10/10], Step[800/938],Loss:0.0116


In [None]:
# testing

with torch.no_grad():
  total , correct = 0 , 0 
  for images, labels in test_data_loader:
    images = images.to(device)
    labels = labels.to(device)
    outputs = model(images)
    _,predicted = torch.max(outputs.data,1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  print("Accuracy if model: {} %".format(100*correct/total))





Accuracy if model: 98.03 %


In [None]:
summary(LeNet(10) ,(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
       BatchNorm2d-2            [-1, 6, 28, 28]              12
               ELU-3            [-1, 6, 28, 28]               0
         MaxPool2d-4            [-1, 6, 14, 14]               0
            Conv2d-5           [-1, 16, 10, 10]           2,416
       BatchNorm2d-6           [-1, 16, 10, 10]              32
               ELU-7           [-1, 16, 10, 10]               0
         MaxPool2d-8             [-1, 16, 5, 5]               0
            Linear-9                  [-1, 120]          48,120
              ELU-10                  [-1, 120]               0
           Linear-11                   [-1, 84]          10,164
              ELU-12                   [-1, 84]               0
           Linear-13                   [-1, 10]             850
Total params: 61,750
Trainable params: 