<a href="https://colab.research.google.com/github/RanaweeraHK/Pytorch-Zero-to-Master/blob/main/Build_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [4]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
device

'cpu'

In [6]:
# define the neural network by subclassing the nn.module

class NeuralNetwork(nn.Module):
   def __init__(self):
      super().__init__()
      self.flatten = nn.Flatten()
      self.linear_relu_stack = nn.Sequential(
          nn.Linear(28*28, 512),
          nn.ReLU(),
          nn.Linear(512, 512),
          nn.ReLU(),
          nn.Linear(512, 10)
      )

    # How input data flows through the neural network to produce an output
   def forward(self, x):
      x = self.flatten(x)
      logits = self.linear_relu_stack(x)
      return logits



In [7]:
model = NeuralNetwork().to(device)
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [8]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([4])


# Model Layers

In [9]:
input_image = torch.rand(3,28,28)
input_image.size()

torch.Size([3, 28, 28])

In [10]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
flat_image.size()

torch.Size([3, 784])

In [11]:
layer1 = nn.Linear(28*28, 20)
hidden1 = layer1(flat_image)
hidden1.size()

torch.Size([3, 20])

In [12]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[ 0.3496,  0.6913,  0.1103, -0.6083, -0.2282, -0.3035, -0.1308, -0.0410,
         -0.2210,  0.0039,  0.0946,  0.4425,  0.2179, -0.0845,  0.2614, -0.1824,
          0.0398, -0.2629,  0.2354,  0.1400],
        [ 0.1226,  0.2799, -0.0451, -0.3118, -0.0156, -0.4869, -0.0266, -0.1657,
         -0.1760,  0.3327, -0.3663,  0.1167,  0.2244,  0.0533,  0.2598, -0.0761,
          0.2566, -0.4776,  0.0223,  0.1246],
        [ 0.3065,  0.6466,  0.1653, -0.5874, -0.1088, -0.3348, -0.0451, -0.1959,
         -0.1721,  0.2549,  0.3756,  0.3112,  0.2755, -0.0131,  0.3397,  0.0549,
          0.5235, -0.4119, -0.0959,  0.3300]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.3496, 0.6913, 0.1103, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0039, 0.0946, 0.4425, 0.2179, 0.0000, 0.2614, 0.0000, 0.0398, 0.0000,
         0.2354, 0.1400],
        [0.1226, 0.2799, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
         0.3327, 0.0000, 0.1167, 0.2244, 0.0533, 0.25

In [13]:
seq_model = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_model(input_image)

In [15]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

pred_probab

tensor([[0.0786, 0.0906, 0.0900, 0.0909, 0.1377, 0.0801, 0.1166, 0.1185, 0.1003,
         0.0967],
        [0.0711, 0.0799, 0.0997, 0.0866, 0.1081, 0.1040, 0.1144, 0.1264, 0.1019,
         0.1079],
        [0.0736, 0.0870, 0.0996, 0.0932, 0.1174, 0.0953, 0.1194, 0.1070, 0.1065,
         0.1009]], grad_fn=<SoftmaxBackward0>)

Model parameters = the values inside the model that are learned during the training

When comes to the neural network parameters = weight and biases

In [17]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0339,  0.0295,  0.0241,  ..., -0.0156, -0.0120, -0.0148],
        [ 0.0254, -0.0026, -0.0171,  ..., -0.0082, -0.0230,  0.0209]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0126,  0.0175], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0156,  0.0205, -0.0044,  ...,  0.0011,  0.0401,  0.0191],
        [ 0.0114,  0.0108, -0.0027,  ..., -0.0254, -0.0029, -0.0240]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | 