In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

Using cuda device


In [8]:
class NeuralNetwork(nn.Module):
  def __init__(self):
    super(NeuralNetwork, self).__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(28 * 28, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, 10),
    )

  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)

    return logits

In [9]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [10]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
# dim (int) – A dimension along which Softmax will be computed 
# (so every slice along dim will sum to 1).
# https://pytorch.org/docs/stable/generated/torch.nn.Softmax.html
pred_probab = nn.Softmax(dim = 1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([2], device='cuda:0')


In [11]:
# model layers
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


In [12]:
# nn.Flatten
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [13]:
# nn.Linear
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [15]:
# nn.ReLU
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.0088, -0.0321,  0.2508,  0.0856, -0.0330, -0.2658, -0.8396, -0.2361,
          0.2046,  0.3034, -0.1936, -0.3110, -0.1889, -0.0126,  0.1477, -0.5605,
         -0.1865,  0.4032, -0.2217, -0.3386],
        [-0.2866,  0.0239,  0.3479, -0.0628, -0.1986, -0.0583, -1.1124, -0.2655,
          0.1446,  0.1150, -0.3092, -0.0495, -0.3419,  0.5696,  0.0024, -0.3524,
          0.0647,  0.7077, -0.1835, -0.1974],
        [-0.0353, -0.1539, -0.1744, -0.0967, -0.1905,  0.0293, -0.7111, -0.1914,
          0.3506,  0.5363, -0.5544,  0.2104, -0.1805,  0.0980,  0.3908, -0.4590,
          0.2534,  0.3029, -0.2452, -0.1870]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0088, 0.0000, 0.2508, 0.0856, 0.0000, 0.0000, 0.0000, 0.0000, 0.2046,
         0.3034, 0.0000, 0.0000, 0.0000, 0.0000, 0.1477, 0.0000, 0.0000, 0.4032,
         0.0000, 0.0000],
        [0.0000, 0.0239, 0.3479, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.1446,
         0.1150, 0.0000, 0.0000, 0.0000, 0.5696, 0.00

In [16]:
# nn.Sequential
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [21]:
# nn.Softmax
"""
The last linear layer of the neural network returns logits - raw values in [-infty, infty]
which are passed to the nn.Softmax module. The logits are scaled to values [0, 1] representing 
the model’s predicted probabilities for each class.
dim parameter indicates the dimension along which the values must sum to 1.
"""
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
print(pred_probab)
print(pred_probab.argmax(1))

tensor([[0.0962, 0.0822, 0.1068, 0.1060, 0.0931, 0.1085, 0.1096, 0.1046, 0.1107,
         0.0823],
        [0.1000, 0.0896, 0.1254, 0.0933, 0.1146, 0.1046, 0.0962, 0.0972, 0.1037,
         0.0755],
        [0.0940, 0.0809, 0.1052, 0.1055, 0.1035, 0.1029, 0.1043, 0.1152, 0.1016,
         0.0868]], grad_fn=<SoftmaxBackward0>)
tensor([8, 2, 7])


In [18]:
# Model Parameters
"""
Many layers inside a neural network are parameterized, 
i.e. have associated weights and biases that are optimized during training. 
Subclassing nn.Module automatically tracks all fields defined inside your model object, 
and makes all parameters accessible using your model’s parameters() or named_parameters() methods.
"""
print("Model structure: ", model, "\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure:  NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
) 


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0143, -0.0194,  0.0307,  ...,  0.0261, -0.0078,  0.0301],
        [ 0.0051,  0.0336, -0.0338,  ..., -0.0037,  0.0120,  0.0048]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0206,  0.0221], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0182,  0.0069,  0.0342,  ..., -0.0190,  0.0398, -0.0149],
        [-0.0152,  0.0415,  0.0420,  ..., -0.0279, -0.0264, -0.0218]],
       device='cuda:0', grad_fn=<