### Importing Libraries

In [1]:
import torch

In [2]:
import torchvision

In [3]:
from torchvision.datasets import MNIST

In [6]:
from torch.utils.data import DataLoader

In [7]:
from torchvision import transforms

In [28]:
import matplotlib.pyplot as plt

In [32]:
from torch import nn

In [33]:
import torch.nn.functional as F

In [37]:
import torch.optim as optim

In [100]:
import time

### Loading data

In [8]:
transform = transforms.Compose(
    [transforms.ToTensor(), 
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

In [9]:
train_data = MNIST(root = './dataset', train = True, download = True, transform = transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Processing...
Done!


In [10]:
train_loader = DataLoader(train_data, batch_size = 32, shuffle = True)

In [13]:
test_data = MNIST(root = './dataset', train = False, download = True, transform = transform)

In [14]:
test_loader = DataLoader(test_data, batch_size = 32, shuffle = False)

### Creating Network Class

In [34]:
class Network(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super(Network, self).__init__()
        self.num_inputs = num_inputs
        self.num_outputs = num_outputs
        self.linear_1 = nn.Linear(self.num_inputs, 512, bias = True)
        self.linear_2 = nn.Linear(512, 512)
        self.linear_3 = nn.Linear(512, 256)
        self.output = nn.Linear(256, 10)
    
    def forward(self, x):
        x = self.linear_1(x)
        x = F.relu(x)
        x = self.linear_2(x)
        x = F.relu(x)
        x = self.linear_3(x)
        x = F.relu(x)
        x = self.output(x)
        return x

In [36]:
# Model
net = Network(784, 10)
net

Network(
  (linear_1): Linear(in_features=784, out_features=512, bias=True)
  (linear_2): Linear(in_features=512, out_features=512, bias=True)
  (linear_3): Linear(in_features=512, out_features=256, bias=True)
  (output): Linear(in_features=256, out_features=10, bias=True)
)

In [38]:
# Optimzer and loss
optimizer = optim.Adam(net.parameters(), lr = 0.001)
criterion = nn.CrossEntropyLoss()

In [None]:
# Training the network
start_time = time.time()
epochs = 10
for e in range(epochs):
    running_loss = 0.0
    temp_time = time.time()
    for i, data in enumerate(train_loader):
        # loading data
        X, y = data
        X = X.view(-1, 784)
        # Zero the gradient
        optimizer.zero_grad()
        pred = net(X)
        loss = criterion(pred, y)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() 
        if i % 200 == 199:
            # Printing average loss of 200 batches
            current_time = time.time()
            print('[{}, {}] - loss = {} - time = {}sec'.format(e + 1, i + 1, running_loss / 200.0, current_time - temp_time))
            running_loss = 0.0
            temp_time = current_time
end_time = time.time()
print('Training Finished')
print('total time taken : {}s'.format(end_time - start_time))

[1, 200] - loss = 0.16793319454420272 - time = 3.2657690048217773sec
[1, 400] - loss = 0.092325203398741 - time = 3.4397964477539062sec
[1, 600] - loss = 0.07637373230540107 - time = 3.1547653675079346sec
[1, 800] - loss = 0.07129379412621346 - time = 3.15456485748291sec
[1, 1000] - loss = 0.06167441463418072 - time = 3.0880486965179443sec
[1, 1200] - loss = 0.0578938189028122 - time = 3.1827282905578613sec
[1, 1400] - loss = 0.06317535292835601 - time = 3.1503076553344727sec
[1, 1600] - loss = 0.058467808939603856 - time = 3.145587682723999sec
[1, 1800] - loss = 0.07261471884528874 - time = 3.1576807498931885sec
[2, 200] - loss = 0.03949392190054368 - time = 3.142097234725952sec
[2, 400] - loss = 0.04247501629341059 - time = 3.179394006729126sec
[2, 600] - loss = 0.04118446313662389 - time = 3.1731443405151367sec
[2, 800] - loss = 0.05601288870140706 - time = 3.2772364616394043sec
[2, 1000] - loss = 0.06030740708798476 - time = 3.3797154426574707sec
[2, 1200] - loss = 0.05333562532965

In [65]:
# Testing model
wrong_predictions = 0
for i, data in enumerate(test_loader):
    images, labels = data
    labels = labels.detach().numpy()
    images = images.view(-1, 784)
    pred = net(images)
    pred = torch.max(pred, 1)[1]
    pred = pred.detach().numpy()
    for p, l in zip(pred, labels):
        if p != l:
            wrong_predictions += 1

In [66]:
wrong_predictions

297

### Training the model on gpu

In [95]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [96]:
device

device(type='cuda', index=0)

In [97]:
# Network
gpu_net = Network(784, 10)
gpu_net = gpu_net.to(device)

In [98]:
# Optimzer and loss
gpu_optimizer = optim.Adam(gpu_net.parameters(), lr = 0.001)
gpu_criterion = nn.CrossEntropyLoss()

In [106]:
start_time = time.time()
# Training the network
epochs = 10
for e in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        # loading data
        X, y = data
        X = X.view(-1, 784)
        X = X.to(device)
        y = y.to(device)
        # Zero the gradient
        gpu_optimizer.zero_grad()
        pred = gpu_net(X)
        loss = gpu_criterion(pred, y)
        loss.backward()
        gpu_optimizer.step()
        
        running_loss += loss.item() 
        if i % 200 == 199:
            # Printing average loss of 200 batches
            print('[{}, {}] : loss = {}'.format(e + 1, i + 1, running_loss / 200.0))
            running_loss = 0.0
end_time = time.time()
print('Training Finished')
print('total time taken : {}s'.format(end_time - start_time))

[1, 200] : loss = 0.07696884298697114
[1, 400] : loss = 0.054524553585797546
[1, 600] : loss = 0.07331012662500143
[1, 800] : loss = 0.07188409036025405
[1, 1000] : loss = 0.07094056952744722
[1, 1200] : loss = 0.07267890341579913
[1, 1400] : loss = 0.07108556963503361
[1, 1600] : loss = 0.0754977654106915
[1, 1800] : loss = 0.08181849589571356
[2, 200] : loss = 0.0526874057110399
[2, 400] : loss = 0.05894044144079089
[2, 600] : loss = 0.055674785003066066
[2, 800] : loss = 0.06491296039894223
[2, 1000] : loss = 0.06255984475836157
[2, 1200] : loss = 0.06726265283301473
[2, 1400] : loss = 0.05991062329150736
[2, 1600] : loss = 0.06864715244621039
[2, 1800] : loss = 0.07819341415539384
[3, 200] : loss = 0.042140512503683565
[3, 400] : loss = 0.0751821862347424
[3, 600] : loss = 0.051086862850934264
[3, 800] : loss = 0.06155350729823113
[3, 1000] : loss = 0.05811148563399911
[3, 1200] : loss = 0.07065003241412342
[3, 1400] : loss = 0.05912720158696175
[3, 1600] : loss = 0.072044005990028

In [108]:
# Testing model
wrong_predictions = 0
for i, data in enumerate(test_loader):
    images, labels = data
    labels = labels.detach().numpy()
    images = images.view(-1, 784)
    pred = gpu_net.to('cpu')(images)
    pred = torch.max(pred, 1)[1]
    pred = pred.detach().numpy()
    for p, l in zip(pred, labels):
        if p != l:
            wrong_predictions += 1

In [109]:
wrong_predictions

243