# PyTorch Neural Networks Components

Building blocks of deep learning models from simple to complex NN 
to define, train and optimize models

In [1]:
'''
Breakdown of simple Neural Network 

X  : input 
Wi : weights i
bi : bias i
A  : Activation function (provide non linarity)
Y  : output

To create a forward propagation path 

( Two layer with one neuron )
Z = A(W1.X + b1)
Z' = A(Z)
Y = W2.Z' + b2

Once process of forward propagation finish... 

Loss function: to optimize the weights
Backpropagation : To compute the gradients 
Optimizer: to compute the new weights 

Model Input, weights and bias, activation .. for forward propagation flow giving Y
Then compute Loss as numerical value, compute gradients by backpropagation and optimizer to change weights

'''
import torch

## Components of PyTorch

In [None]:
# Base class for defining custom models (architecture): torch.nn.Module

# To create fully connected layers and create connection, Fully connected (dense) layers : torch.nn.Linear

# Activation function (here focus on ReLU) : torch.nn.ReLU

# Optimizers : torch.optim (with lot of different optiomizers)

# Loss function: torch.nn.CrossEntropyLoss (example of possible loss)

# To load data in batches, usefull when working with visual data, you load in batches: torch.utils.data.DataLoader
# (DataLoader helps to load data efficiently, such as in batches or in GPU)

# .... There are different ways of creating a NN 
# 1. Function : (but harder to interpret)
# flexible way of building NN by directly applying operations on tensors (this allow complex architecture and custom operation)
# 2. Sequential : (simpler to interpret)
# More structured approach, layers stacked in linear order in a continual call nn.Sequential
# Used to define basic models with clear layers progression 


In [14]:
# Building a NN 

import torch.nn as nn # allow to acces the above mentioned components 
import torch.optim as optim # optimizers

In [15]:
# Simple NN with Functional API 

# Whenever creating a new PyTorch custom model it must inherit from nn.Module
class SimpleNN(nn.Module):
    # In init we will initialize the layers
    # input_size define the number of neurons in the first input layer 
    # hidden_size is the number of neurons in the intermediate layer 
    # output_size finally refers to the number of neurons in the final layer before output 
    # in pytorch when calling nn.Linear the shape of input,ouput of the current fully connected layer is defined 

    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__() 
        # Initializer of the super class nn.Module, ensuring init of it is called 

        # First just creating the components, not the architecture 

        self.fc1 = nn.Linear(input_size, hidden_size) # fully connected layer 1

        self.relu = nn.ReLU() # Activation function

        self.fc2 = nn.Linear(hidden_size, output_size) # fully connected layer 2


    # In a flow (forward propagation) we will go as X -> self.fc1(X) -> Z = self.relu -> self.fc2(Z) -> Y
    # The pipeline must be defined for the forward propagation 
    def forward(self, x): # x mus be of input_size 
        x = self.fc1(x)
        x = self.relu(x)
        # I override x during the forward process
        x = self.fc2(x)

        return x
    
# With this functional API I can define the architecture as I want, and then call it in the forward pass, I'm noty constrained by an order

In [16]:
# Simple NN with Sequenctial API 

class SimpleNNSequential(nn.Module):

    def __init__(self, input_size, hidden_size, output_size):
        super(SimpleNN, self).__init__() 

        # In sequential you don't define the single components inside of the init()
        # A pipeline is defined from simple components 
        # define in a sequence... The flow is constrained here in Sequential()
        self.network = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, output_size)
        )
        # The components are a single sequential object


    def forward(self, x): # x mus be of input_size 
        return self.network(x)

In [17]:
# When working with CNN or others complex architectures, not always sequenctial flow is used...
# sometimes additional flexibility is required, with output skipping layers or reusing inputs in next layers...
# Combination of multiple output as input of next layers or other operations are possible...
# For example ResNet rely on residual block, where sequential cannot be used !

# I require separate components to be used properly inside of the network definition 
# Use sequential for very simple architecture, but not when complex variables pass is required...

# Try to use the network for dummy training !

In [18]:
# First create dummy data

model_func = SimpleNN(input_size=4, hidden_size=8, output_size=3)
print(model_func)

SimpleNN(
  (fc1): Linear(in_features=4, out_features=8, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=8, out_features=3, bias=True)
)


In [19]:
# Create dummy data for training 
X = torch.randn(10, 4) # 10 samples with 4 features per sample 
Y = torch.randint(0,3, (10,)) # return 10 random output as label between 0,1,2 (class label)

# Compute the loss of the network
# CrossEntropyLoss uses softmax activation as last layer, pytorch handle automically this final activation
# softmax on output of the nn is added
criterion = nn.CrossEntropyLoss() # criterion of training 

optimizer = optim.Adam(model_func.parameters(), lr=0.01) # The Adam optimizer work on the NN model, it must optimize the model parameters with learning rate 0.01


In [20]:
print("Dataset X: ", X)
print("Dataset Y: ", Y)

# Remember that even if in the NN model the output has been specified as output_size=3, then a softmax is applyed on it
# the output neuron predict each label of the classifier, than softmax select just one class (maximum probability )
# Each neuron of output layer is responsible for each class 
 

Dataset X:  tensor([[-0.3689,  1.4739,  0.4202, -0.9198],
        [ 0.8597,  1.2753, -1.5125,  1.8800],
        [-1.0656, -0.5052,  1.0023, -1.2221],
        [-0.5868,  0.3564,  1.6431, -0.6329],
        [ 0.5844,  2.1104,  1.0745, -0.3469],
        [-0.4041, -0.6631,  0.4523,  0.1185],
        [-0.0886,  0.6753,  1.2095,  0.4182],
        [-0.3485,  0.1890, -0.0694,  0.3447],
        [ 0.9207, -1.2917,  0.0984, -1.6139],
        [ 1.0118, -0.3596, -0.7790,  0.1209]])
Dataset Y:  tensor([2, 1, 1, 2, 0, 1, 0, 2, 1, 2])


In [None]:
# Define the training law

epoch = 150

for e in range(epoch):
    # first clear the gradient stored on the model 
    optimizer.zero_grad() # tell the optimizer to clear the gradients
    outputs = model_func(X) # pass all values to the model
    # compute the loss
    loss = criterion(outputs, Y) # minimize loss from real values Y with respect to predicted outputs

    # after computing loss, the backpropagation is performed, calculating gradients 
    loss.backward() # it compute gradients by backward propagation applied on loss itself
    # once gradient are being computed, optimizer store it and perfrom theoptimization step 
    optimizer.step() # update all the weights based on the loss

    if (e + 1) % 10 == 0: # print current loss every 10th iteration 
        print(f"Epoch [{e+1}]/50, Loss : {loss.item() :.4f}")

# The model learn from data

Epoch [10]/50, Loss : 0.2576
Epoch [20]/50, Loss : 0.1688
Epoch [30]/50, Loss : 0.1104
Epoch [40]/50, Loss : 0.0754
Epoch [50]/50, Loss : 0.0543
Epoch [60]/50, Loss : 0.0410
Epoch [70]/50, Loss : 0.0323
Epoch [80]/50, Loss : 0.0264
Epoch [90]/50, Loss : 0.0220
Epoch [100]/50, Loss : 0.0188
Epoch [110]/50, Loss : 0.0162
Epoch [120]/50, Loss : 0.0142
Epoch [130]/50, Loss : 0.0126
Epoch [140]/50, Loss : 0.0112
Epoch [150]/50, Loss : 0.0101
