In [1]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable

In [2]:
input_size = 28*28      # 784 -> image pixels
hidden_size = 400
out_size = 10   # 10 classes
epochs = 10     # how many times we pass our entire dataset into our network
batch_size = 100
learning_rate = 0.001

In [3]:
train_dataset = datasets.MNIST(root= './data',
                               train= True,
                               transform= transforms.ToTensor(),    # from image to Tensor
                               download= True)
test_dataset = datasets.MNIST(root='./data',
                              train= False,
                              transform=transforms.ToTensor(),
                              download= True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



In [4]:
# make data iterable by loadingit to a Loader
# Shuffle the training data to make it independent of the order
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size= batch_size,
                                           shuffle= True)

test_loader = torch.utils.data.DataLoader(dataset= test_dataset,
                                          batch_size= batch_size,
                                          shuffle=False)

In [11]:
class Net(nn.Module):

  def __init__(self, _input_size, _hidden_size, _out_size):
    super(Net, self).__init__()
    self.fc1 = nn.Linear(_input_size, _hidden_size)
    self.relu = nn.ReLU()
    self.fc2 = nn.Linear(_hidden_size, _hidden_size)
    self.fc3 = nn.Linear(_hidden_size, _out_size)

  def forward(self, x):
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    out = self.relu(out)
    out = self.fc3(out)
    return out

In [13]:
net = Net(input_size, hidden_size, out_size)
CUDA = torch.cuda.is_available()
if CUDA:
  print(CUDA)
  net = net.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr= learning_rate)

In [17]:
net.parameters

<bound method Module.parameters of Net(
  (fc1): Linear(in_features=784, out_features=400, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=400, out_features=400, bias=True)
  (fc3): Linear(in_features=400, out_features=10, bias=True)
)>

In [None]:
# Visualize 
for i, (image, labels) in enumerate(train_loader):
  print(image.size())
  image = image.view(-1, 784) # reshape
  print(image.size())
  

In [23]:
# Train the Network
correct_train = 0
total_train = 0

for epoch in range(epochs):
  for i, (image, labels) in enumerate(train_loader):
    images = Variable(image.view(-1, 28*28))
    labels = Variable(labels)

    if CUDA:
      images = images.cuda()
      labels = labels.cuda()
    
    optimizer.zero_grad()
    outputs = net(images)
    _, predicted = torch.max(outputs.data, 1)

    total_train += labels.size(0)
    if CUDA:
      correct_train += (predicted.cpu() == labels.cpu()).sum()
    else:
      correct_train += (predicted == labels).sum()
    
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    if(i+1) % 100 == 0:
      print('Epoch [{}/{}], Iteration [{}/{}], Training Loss: {}, Training Accuracy: {}'.format(epoch+1, epochs, i+1, len(train_dataset)//batch_size, loss.data, (100*correct_train/total_train)))
print("DONE")

Epoch [1/10], Iteration [100/600], Training Loss: 0.3961848020553589, Training Accuracy: 91.7300033569336
Epoch [1/10], Iteration [200/600], Training Loss: 0.23302489519119263, Training Accuracy: 92.9749984741211
Epoch [1/10], Iteration [300/600], Training Loss: 0.1857720911502838, Training Accuracy: 93.66666412353516
Epoch [1/10], Iteration [400/600], Training Loss: 0.2854788899421692, Training Accuracy: 94.1624984741211
Epoch [1/10], Iteration [500/600], Training Loss: 0.1553056389093399, Training Accuracy: 94.49400329589844
Epoch [1/10], Iteration [600/600], Training Loss: 0.14030049741268158, Training Accuracy: 94.76000213623047
Epoch [2/10], Iteration [100/600], Training Loss: 0.17832380533218384, Training Accuracy: 95.08285522460938
Epoch [2/10], Iteration [200/600], Training Loss: 0.09228362143039703, Training Accuracy: 95.3949966430664
Epoch [2/10], Iteration [300/600], Training Loss: 0.06529790163040161, Training Accuracy: 95.58000183105469
Epoch [2/10], Iteration [400/600], T

In [25]:
# Test the Network
correct = 0
total = 0

for images, labels in test_loader:
  images = Variable(images.view(-1, 784))
  if CUDA:
    images = images.cuda()

  outputs = net(images)

  _, predicted = torch.max(outputs.data, 1)
  total += labels.size(0)

  if CUDA:
    correct += (predicted.cpu() == labels.cpu()).sum()
  else:
    correct += (predicted == labels).sum()
  
print("Final Test Accuracy: %d %%" % (100*correct/total))

Final Test Accuracy: 97 %
