# Building the Neural Network

In [4]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

### Get device for Training

In [5]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)

print(f"Using {device} device")

Using cpu device


### Defining the Class

In [7]:
class NeuralNetwork(nn.Module):

  def __init__(self):
    super().__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(nn.Linear(28*28, 512), nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, 10))

  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

In [8]:
model = NeuralNetwork().to(device)

print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [9]:
X = torch.rand(1, 28, 28, device= device)
logits = model(X)
pred_probab = nn.Softmax(dim= 1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([4])


### Model Layers

In [11]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


#### nn.Flatten

In [12]:
flatten = nn.Flatten()
flat_image = flatten(input_image)

print(flat_image.size())

torch.Size([3, 784])


#### nn.Linear

In [13]:
layer1 = nn.Linear(in_features= 28*28, out_features= 20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


#### nn.ReLU

In [14]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.3014,  0.3142,  0.2015,  0.1847, -0.5056,  0.4410, -0.1729,  0.1386,
         -0.2414,  0.3319, -0.1646, -0.1264,  0.0933, -0.2703,  0.2437,  0.3254,
         -0.2469,  0.4109,  0.0400,  0.3164],
        [ 0.1090,  0.4150, -0.0785,  0.1174, -0.3066,  0.1365, -0.1369, -0.3825,
         -0.1310, -0.0031, -0.1913, -0.2858, -0.3301,  0.1953, -0.1730, -0.0991,
         -0.1091,  0.3606, -0.0347,  0.3382],
        [ 0.2264,  0.4255, -0.2299,  0.3342, -0.1717,  0.3099, -0.2914,  0.0380,
         -0.1796,  0.3442,  0.0191, -0.0120, -0.0294,  0.2803, -0.0608,  0.0409,
         -0.3275,  0.4263, -0.4107,  0.1195]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.3014, 0.3142, 0.2015, 0.1847, 0.0000, 0.4410, 0.0000, 0.1386, 0.0000,
         0.3319, 0.0000, 0.0000, 0.0933, 0.0000, 0.2437, 0.3254, 0.0000, 0.4109,
         0.0400, 0.3164],
        [0.1090, 0.4150, 0.0000, 0.1174, 0.0000, 0.1365, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000, 0.0000, 0.1953, 0.00

#### nn.Sequential

In [15]:
seq_modules = nn.Sequential(flatten, layer1, nn.ReLU(), nn.Linear(20, 10))
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)

#### nn.Softmax

In [17]:
softmax = nn.Softmax(dim= 1)
pred_probab = softmax(logits)

### Model Parameters

In [18]:
print(f"Model Parameters: {model}\n\n")

for name, param in model.named_parameters():
  print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n")

Model Parameters: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[ 0.0114, -0.0281, -0.0062,  ...,  0.0107,  0.0130, -0.0035],
        [ 0.0069,  0.0013,  0.0169,  ...,  0.0134, -0.0049,  0.0035]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([-0.0153, -0.0292], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[ 0.0220,  0.0263, -0.0405,  ...,  0.0301, -0.0362, -0.0173],
        [ 0.0432, -0.0306,  0.0173,  ...,  0.0313, -0.0098, -0.0240]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Si