# Building the Neural Network

Imports

In [None]:
import os

import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

Get Device for Training

In [None]:
device = (
    "cuda" if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {device} device")

Using cuda device


Define the class

In [None]:
class NeuralNetwork(nn.Module):
  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(28*28, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, 10)
    )

  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

In [None]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [None]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([0], device='cuda:0')


# Model Layers

In [None]:
input_image = torch.rand(3, 28, 28).to(device)
input_image.shape

torch.Size([3, 28, 28])

nn.Flatten

In [None]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
flat_image.shape

torch.Size([3, 784])

nn.Linear

In [None]:
layer1 = nn.Linear(in_features=28*28, out_features=20).to(device)
hidden1 = layer1(flat_image)
hidden1.shape

torch.Size([3, 20])

nn.ReLU

In [None]:
print(f"Before ReLU: {hidden1}\n\n")

hidden1 = nn.ReLU()(hidden1)

print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.4185,  0.4960,  0.1616, -0.1474,  0.4158,  0.8370, -0.6490, -0.5628,
          0.7104,  0.0267,  0.4604, -0.2891, -0.0854, -0.2335,  0.4901, -0.3287,
          0.4095, -0.1295,  0.1027,  0.3758],
        [ 0.1450,  0.4776, -0.0187,  0.2577,  0.0584,  0.6567, -0.3282, -0.3056,
          0.5562, -0.2168,  0.5527, -0.3961, -0.1897, -0.2760,  0.6786, -0.5368,
          0.3929,  0.0111, -0.0268, -0.0957],
        [-0.4294,  0.3412,  0.0177, -0.0206,  0.0946,  0.8043, -0.2534, -0.5857,
          0.4991, -0.1990,  0.1432, -0.5639, -0.1105, -0.6588,  0.5177, -0.2287,
          0.1861,  0.2886, -0.0171,  0.4434]], device='cuda:0',
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.4185, 0.4960, 0.1616, 0.0000, 0.4158, 0.8370, 0.0000, 0.0000, 0.7104,
         0.0267, 0.4604, 0.0000, 0.0000, 0.0000, 0.4901, 0.0000, 0.4095, 0.0000,
         0.1027, 0.3758],
        [0.1450, 0.4776, 0.0000, 0.2577, 0.0584, 0.6567, 0.0000, 0.0000, 0.5562,
         0.0000, 0.5527, 0.00

nn.Sequential

In [None]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
).to(device)

input_image = torch.rand(3, 28 , 28).to(device)
logits = seq_modules(input_image)

logits

tensor([[-0.0924, -0.1904,  0.5192,  0.5090,  0.1671,  0.0703,  0.3863, -0.4230,
         -0.1213, -0.3671],
        [-0.0654, -0.2836,  0.4985,  0.6372,  0.2440,  0.0568,  0.3282, -0.3646,
         -0.0862, -0.4271],
        [-0.0093, -0.2767,  0.3949,  0.3830,  0.1240,  0.2335,  0.2560, -0.2090,
         -0.0793, -0.3858]], device='cuda:0', grad_fn=<AddmmBackward0>)

nn.Softmax

In [None]:
softmax = nn.Softmax(dim=1).to(device)
pred_probab = softmax(logits)

pred_probab

tensor([[0.0826, 0.0749, 0.1522, 0.1507, 0.1070, 0.0972, 0.1333, 0.0593, 0.0802,
         0.0627],
        [0.0835, 0.0671, 0.1468, 0.1686, 0.1138, 0.0944, 0.1238, 0.0619, 0.0818,
         0.0582],
        [0.0917, 0.0702, 0.1374, 0.1358, 0.1048, 0.1169, 0.1196, 0.0751, 0.0855,
         0.0629]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

Model Parameters

In [None]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
  print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[ 0.0087,  0.0117, -0.0273,  ...,  0.0275, -0.0261, -0.0139],
        [-0.0115, -0.0328,  0.0076,  ..., -0.0243,  0.0032, -0.0124]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([-0.0202,  0.0026], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[ 0.0027,  0.0310, -0.0245,  ...,  0.0394, -0.0345, -0.0436],
        [ 0.0216,  0.0343,  0.0395,  ...,  0.0091,  0.0193, -0.0168]],
       device='cuda:0', grad_fn=<Slice