In [51]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

### In this part we will see how a input data goes through the different layers (Forward Propagation only) of a neural network.

#### The design of our network is:
- linear input layer (Now if you want to know, why the input layer is linear, [check this thread](https://datascience.stackexchange.com/questions/73324/why-is-the-input-to-an-activation-function-a-linear-combination-of-the-input-fea))
- 1*hidden layer with ReLU activation function ([check this link about activation functions, you will also find very informative blog posts](https://www.datacamp.com/tutorial/introduction-to-activation-functions-in-neural-networks))
- Linear Layer
- Softmax


For overall, a better understanding:
[ML_google_tutorial](https://developers.google.com/machine-learning/crash-course/neural-networks/nodes-hidden-layers)

<b>Step-0:</b> Flattening before input

*** Randomly generated input ***

In [52]:
## How Flatten works?
X = torch.rand(3, 12, 12)
print("before flattening:\n", X)
print(X.size())

## After Flattening
flatten = nn.Flatten()
flat_x = flatten(X)
print("after flattening:\n", flat_x)
print(flat_x.size())

before flattening:
 tensor([[[1.2036e-01, 3.5857e-01, 3.0722e-01, 6.0852e-01, 9.4305e-01,
          2.7720e-01, 3.9667e-01, 4.7321e-01, 2.1927e-01, 9.6870e-01,
          7.8115e-01, 9.9759e-01],
         [7.1010e-01, 6.7218e-01, 7.5192e-01, 4.6336e-02, 2.4287e-01,
          1.6409e-01, 8.3532e-01, 8.4018e-02, 1.8201e-01, 5.0194e-01,
          2.3946e-01, 4.7157e-01],
         [8.4941e-01, 8.7402e-01, 7.2627e-02, 3.0818e-02, 8.5465e-01,
          6.8261e-01, 7.5643e-01, 2.6868e-01, 5.5701e-02, 3.1213e-01,
          6.5913e-01, 6.4715e-01],
         [1.4572e-01, 5.0676e-01, 9.9729e-01, 8.4374e-01, 5.6391e-01,
          9.9580e-01, 2.0392e-01, 9.2289e-01, 7.8074e-01, 7.0441e-01,
          9.6406e-01, 5.1472e-01],
         [9.4192e-01, 1.2049e-01, 6.5736e-01, 2.2819e-02, 6.0878e-01,
          3.2500e-01, 1.3713e-01, 6.0726e-01, 8.3132e-02, 1.7710e-01,
          3.1849e-01, 3.0600e-01],
         [8.2168e-01, 5.6157e-02, 9.0684e-01, 1.5835e-01, 9.7183e-01,
          8.5267e-02, 6.8410e-01, 8

<b>Step-1:</b> input layer

In [53]:
## Linear layer to apply linear transformation

layer1 = nn.Linear(in_features=144, out_features=5)
hidden1_input = layer1(flat_x)
print(hidden1_input)
print(hidden1_input.size())

tensor([[-0.3726,  0.3136, -0.7488,  0.4268,  0.0794],
        [-0.2579,  0.0447, -0.4766,  0.3809,  0.1297],
        [-0.1521, -0.1533, -0.4644,  0.0299, -0.2617]],
       grad_fn=<AddmmBackward0>)
torch.Size([3, 5])


In [54]:
# ## For a better understanding of the above cell:
# 
# weights = torch.rand(144, 5)
# print(
#     f"input matrix * random weights: {flat_x.size()}*{weights.size()}"
# )
# hidden_layer_input = torch.matmul(input=flat_x, other=weights)
# print("after mat_mul:\n", hidden_layer_input)

<b>Step-2:</b> Hidden layer 1

In [55]:
print(f"Before ReLU: {hidden1_input}\n\n")
hidden1 = nn.ReLU()
hidden1_output = hidden1(hidden1_input)
print(f"After ReLU: {hidden1_output}\n\n")

Before ReLU: tensor([[-0.3726,  0.3136, -0.7488,  0.4268,  0.0794],
        [-0.2579,  0.0447, -0.4766,  0.3809,  0.1297],
        [-0.1521, -0.1533, -0.4644,  0.0299, -0.2617]],
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.3136, 0.0000, 0.4268, 0.0794],
        [0.0000, 0.0447, 0.0000, 0.3809, 0.1297],
        [0.0000, 0.0000, 0.0000, 0.0299, 0.0000]], grad_fn=<ReluBackward0>)




<b>Step-3:</b> Hidden layer 2

In [56]:
# Moving on to the last layer

hidden2 = nn.Linear(in_features=5, out_features=3)
hidden2_output = hidden2(hidden1_output)
print(hidden2_output.size())
print(f"output matrix: {hidden2_output}\n\n")

torch.Size([3, 3])
output matrix: tensor([[ 0.4140,  0.2501, -0.1245],
        [ 0.4198,  0.1970, -0.0360],
        [ 0.3581,  0.0234, -0.1719]], grad_fn=<AddmmBackward0>)




<b>Step-3.5:</b> Logits 

In [57]:
logits = hidden2_output # logits are basically the output before going through the last activation function
print(logits.size())
print(logits)

torch.Size([3, 3])
tensor([[ 0.4140,  0.2501, -0.1245],
        [ 0.4198,  0.1970, -0.0360],
        [ 0.3581,  0.0234, -0.1719]], grad_fn=<AddmmBackward0>)


<b>Step-4:</b> Output Layer with softmax activation 

In [58]:
### Last activation: Softmax
# Why softmax?
# because, he last linear layer of the neural network returns logits [-infty, infty], softmax scales the values/logits to [0,1]

softmax = nn.Softmax(dim=1)
soft_out = softmax(logits)

print(f"Before softmax: {logits}\nsize: {logits.size()}\n")
print(f"after softmax: {soft_out}\nsize: {soft_out.size()}\n")

Before softmax: tensor([[ 0.4140,  0.2501, -0.1245],
        [ 0.4198,  0.1970, -0.0360],
        [ 0.3581,  0.0234, -0.1719]], grad_fn=<AddmmBackward0>)
size: torch.Size([3, 3])

after softmax: tensor([[0.4111, 0.3490, 0.2399],
        [0.4108, 0.3288, 0.2604],
        [0.4340, 0.3105, 0.2555]], grad_fn=<SoftmaxBackward0>)
size: torch.Size([3, 3])



### All together:

In [59]:
seq_modules = nn.Sequential(
    nn.Flatten(),
    nn.Linear(in_features=144, out_features=5),
    nn.ReLU(),
    nn.Linear(5, 3),
    nn.Softmax(dim=1)

)

output = seq_modules(X)
print(output.size())
print(output)

torch.Size([3, 3])
tensor([[0.3864, 0.2482, 0.3654],
        [0.3831, 0.2565, 0.3604],
        [0.3935, 0.2610, 0.3455]], grad_fn=<SoftmaxBackward0>)
