In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from torch.autograd import Variable


In [2]:
input_size = 784
hidden_size = 500
num_classes = 10
num_epochs = 5
batch_size = 10
learning_rate = 0.001

In [3]:
train_dataset = dsets.MNIST(root='./data', 
                            train=True, 
                            transform=transforms.ToTensor(),  
                            download=True)

test_dataset = dsets.MNIST(root='./data', 
                           train=False, 
transform=transforms.ToTensor())

In [4]:
# Data Loader (Input Pipeline)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
shuffle=False)

In [8]:
class Net(nn.Module):
    def __init__(self,num_classes):
        super(Net, self).__init__()
        self.main= nn.Sequential(nn.Conv2d(1,16,kernel_size=5,padding=2),
                                 nn.ReLU(),
                                 nn.MaxPool2d(2),
                                 nn.Conv2d(16,32,kernel_size=5,padding=2),
                                 nn.ReLU(),
                                 nn.MaxPool2d(2),
                                 Flatten(),
                                 nn.Linear(7*7*32, num_classes),
                                 nn.Sigmoid())
    
    def weight_init(self,):
        for m in self._modules:
            normal_init(self._modules[m],)
            
    
    
    def forward(self, x):
        inp=self.main(x)
        return inp

In [9]:
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size()[0], -1)

In [10]:
net = Net(num_classes)

In [11]:
def normal_init(m):
    if isinstance(m,nn.Linear):
        
        m.weight.data.xavier_normal()
        m.bias.data.zero_()

In [12]:
net=net.cuda()
net.weight_init()

print (net)

Net (
  (main): Sequential (
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU ()
    (2): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (3): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU ()
    (5): MaxPool2d (size=(2, 2), stride=(2, 2), dilation=(1, 1))
    (6): Flatten (
    )
    (7): Linear (1568 -> 10)
    (8): Sigmoid ()
  )
)


In [13]:
criterion = nn.CrossEntropyLoss()  
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)


In [14]:
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):  
        # Convert torch tensor to Variable
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())
        
        # Forward + Backward + Optimize
        optimizer.zero_grad()  # zero the gradient buffer
        outputs = net(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if (i+1) % 100 == 0:
            print ('Epoch [%d/%d], Step [%d/%d], Loss: %.4f' 
%(epoch+1, num_epochs, i+1, len(train_dataset)//batch_size, loss.data[0]))

Epoch [1/5], Step [100/6000], Loss: 1.5699
Epoch [1/5], Step [200/6000], Loss: 1.5471
Epoch [1/5], Step [300/6000], Loss: 1.5714
Epoch [1/5], Step [400/6000], Loss: 1.4878
Epoch [1/5], Step [500/6000], Loss: 1.5282
Epoch [1/5], Step [600/6000], Loss: 1.4820
Epoch [1/5], Step [700/6000], Loss: 1.5469
Epoch [1/5], Step [800/6000], Loss: 1.5521
Epoch [1/5], Step [900/6000], Loss: 1.5106
Epoch [1/5], Step [1000/6000], Loss: 1.4723
Epoch [1/5], Step [1100/6000], Loss: 1.6013
Epoch [1/5], Step [1200/6000], Loss: 1.4869
Epoch [1/5], Step [1300/6000], Loss: 1.4973
Epoch [1/5], Step [1400/6000], Loss: 1.4662
Epoch [1/5], Step [1500/6000], Loss: 1.5385
Epoch [1/5], Step [1600/6000], Loss: 1.5380
Epoch [1/5], Step [1700/6000], Loss: 1.5406
Epoch [1/5], Step [1800/6000], Loss: 1.4763
Epoch [1/5], Step [1900/6000], Loss: 1.4782
Epoch [1/5], Step [2000/6000], Loss: 1.4786
Epoch [1/5], Step [2100/6000], Loss: 1.4613
Epoch [1/5], Step [2200/6000], Loss: 1.5623
Epoch [1/5], Step [2300/6000], Loss: 1.66

Epoch [4/5], Step [800/6000], Loss: 1.4621
Epoch [4/5], Step [900/6000], Loss: 1.5748
Epoch [4/5], Step [1000/6000], Loss: 1.4616
Epoch [4/5], Step [1100/6000], Loss: 1.5016
Epoch [4/5], Step [1200/6000], Loss: 1.4612
Epoch [4/5], Step [1300/6000], Loss: 1.4612
Epoch [4/5], Step [1400/6000], Loss: 1.4653
Epoch [4/5], Step [1500/6000], Loss: 1.4612
Epoch [4/5], Step [1600/6000], Loss: 1.4612
Epoch [4/5], Step [1700/6000], Loss: 1.4612
Epoch [4/5], Step [1800/6000], Loss: 1.4620
Epoch [4/5], Step [1900/6000], Loss: 1.5663
Epoch [4/5], Step [2000/6000], Loss: 1.4678
Epoch [4/5], Step [2100/6000], Loss: 1.4614
Epoch [4/5], Step [2200/6000], Loss: 1.4736
Epoch [4/5], Step [2300/6000], Loss: 1.4612
Epoch [4/5], Step [2400/6000], Loss: 1.4785
Epoch [4/5], Step [2500/6000], Loss: 1.4619
Epoch [4/5], Step [2600/6000], Loss: 1.4613
Epoch [4/5], Step [2700/6000], Loss: 1.4612
Epoch [4/5], Step [2800/6000], Loss: 1.4648
Epoch [4/5], Step [2900/6000], Loss: 1.5892
Epoch [4/5], Step [3000/6000], Los

In [15]:
correct = 0
total = 0
for images, labels in test_loader:
    images = Variable(images.cuda())
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels.cuda()).sum()

print('Accuracy of the network on the 10000 test images: %d %%' % (100 * correct / total))

Accuracy of the network on the 10000 test images: 98 %
