# Pytorch Deep Net

In [10]:
import torch
import torch.nn as nn

from torchvision.datasets import MNIST
from torchvision import transforms

from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device('cuda:0' if use_cuda else 'cpu')

In [11]:
train = MNIST('data',train=True, transform=transforms.ToTensor(),download=True)
test =  MNIST('data',train=False, transform=transforms.ToTensor()) 
# ToTensor() normalizes pixels from [0, 255] to [0, 1] (but it does not take place in this step)

In [12]:
train_loader = torch.utils.data.DataLoader(train, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test, batch_size=128)

In [13]:
n_input, n_dense1, n_dense2, n_dense3, n_out = 784, 64, 64,64, 10

In [15]:
model = nn.Sequential(
    # First HL
    nn.Linear(n_input, n_dense1),
    nn.ReLU(),

    # Second HL
    nn.Linear(n_dense1, n_dense2),
    nn.ReLU(),

    # Third HL
    nn.Linear(n_dense2, n_dense3),
    nn.ReLU(),
    nn.Dropout(),

    # Ouptput Layer
    nn.Linear(n_dense3, n_out)
)
model

Sequential(
  (0): Linear(in_features=784, out_features=64, bias=True)
  (1): ReLU()
  (2): Linear(in_features=64, out_features=64, bias=True)
  (3): ReLU()
  (4): Linear(in_features=64, out_features=64, bias=True)
  (5): ReLU()
  (6): Dropout(p=0.5, inplace=False)
  (7): Linear(in_features=64, out_features=10, bias=True)
)

In [21]:
cost_function = nn.CrossEntropyLoss() # Includes Softmax activation
optimizer = torch.optim.Adam(model.parameters() , lr = 0.01)

In [22]:
def accuracy_pct(pred_y, true_y):
    _, prediction = torch.max(pred_y, 1)
    correct = (prediction == true_y).sum().item()
    return (correct/true_y.shape[0])*100

In [26]:
n_batches = len(train_loader)
n_epochs = 10
n_batches

469

In [28]:
for epoch in range(n_epochs):
  
  avg_cost = 0.0
  avg_accuracy = 0.0
  
  for i, (X, y) in enumerate(train_loader): # enumerate() provides count of iterations  
    
    # forward propagation:
    X_flat = X.view(X.shape[0], -1)
    y_hat = model(X_flat)
    cost = cost_function(y_hat, y)
    avg_cost += cost / n_batches
    
    # backprop and optimization via gradient descent: 
    optimizer.zero_grad() # set gradients to zero; .backward() accumulates them in buffers
    cost.backward()
    optimizer.step()
    
    # calculate accuracy metric:
    accuracy = accuracy_pct(y_hat, y)
    avg_accuracy += accuracy / n_batches
    
    if (i + 1) % 100 == 0:
      print('Step {}'.format(i + 1))
    
  print('Epoch {}/{} complete: Cost: {:.3f}, Accuracy: {:.1f}% \n'
        .format(epoch + 1, n_epochs, avg_cost, avg_accuracy)) 

print('Training complete.')

Step 100
Step 200
Step 300
Step 400
Epoch 1/10 complete: Cost: 0.106, Accuracy: 97.3% 

Step 100
Step 200
Step 300
Step 400
Epoch 2/10 complete: Cost: 0.101, Accuracy: 97.4% 

Step 100
Step 200
Step 300
Step 400
Epoch 3/10 complete: Cost: 0.104, Accuracy: 97.4% 

Step 100
Step 200
Step 300
Step 400
Epoch 4/10 complete: Cost: 0.101, Accuracy: 97.5% 

Step 100
Step 200
Step 300
Step 400
Epoch 5/10 complete: Cost: 0.110, Accuracy: 97.3% 

Step 100
Step 200
Step 300
Step 400
Epoch 6/10 complete: Cost: 0.102, Accuracy: 97.4% 

Step 100
Step 200
Step 300
Step 400
Epoch 7/10 complete: Cost: 0.097, Accuracy: 97.5% 

Step 100
Step 200
Step 300
Step 400
Epoch 8/10 complete: Cost: 0.093, Accuracy: 97.6% 

Step 100
Step 200
Step 300
Step 400
Epoch 9/10 complete: Cost: 0.107, Accuracy: 97.4% 

Step 100
Step 200
Step 300
Step 400
Epoch 10/10 complete: Cost: 0.107, Accuracy: 97.3% 

Training complete.


### Testing the model

In [29]:
n_test_batches = len(test_loader)
n_test_batches

79

In [31]:
model.eval() # disables dropout and batch norm

with torch.no_grad(): # disables autograd, reducing memory consumption
  
  avg_test_cost = 0.0
  avg_test_acc = 0.0
  
  for X, y in test_loader:
    
    # make predictions: 
    X_flat = X.view(X.shape[0], -1)
    y_hat = model(X_flat)
    
    # calculate cost: 
    cost = cost_function(y_hat, y)
    avg_test_cost += cost / n_test_batches
    
    # calculate accuracy:
    test_accuracy = accuracy_pct(y_hat, y)
    avg_test_acc += test_accuracy / n_test_batches

print(f'Test cost: {avg_test_cost:.3f}, Test accuracy: {avg_test_acc:.1f}')

# model.train() # 'undoes' model.eval()

Test cost: 0.257, Test accuracy: 96.2
