In [2]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms


In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [8]:
device = torch.device("cpu")
model = NeuralNetwork().to(device)
print(model)


NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [9]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")


Predicted class: tensor([2])


we will take a sample minibatch of 3 images of size 28x28 and see what happens to it as we pass it through the network.

In [10]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


## nn.Flatten
We initialize the nn.Flatten layer to convert each 2D 28x28 image into a contiguous array of 784 pixel values ( the minibatch dimension (at dim=0) is maintained).

In [11]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


## nn.Linear
The linear layer is a module that applies a linear transformation on the input using its stored weights and biases.


In [12]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())


torch.Size([3, 20])


## nn.ReLU
Non-linear activations are what create the complex mappings between the model’s inputs and outputs. They are applied after linear transformations to introduce nonlinearity

In [13]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")
## Negative values are wipped out

Before ReLU: tensor([[ 0.5079,  0.1229,  0.6149, -0.2806, -0.1924,  0.0588,  0.0757,  0.0448,
         -0.0826,  0.0303,  0.1865, -0.5686,  0.0963, -0.4637,  0.2210, -0.5742,
         -0.2139,  0.0178, -0.2172, -0.0247],
        [ 0.5984, -0.2755,  0.5180,  0.1904, -0.1377,  0.1958,  0.5939, -0.1214,
          0.3412, -0.1079,  0.3435, -0.6904, -0.4994, -0.2509,  0.6898, -0.3310,
         -0.2819,  0.3719, -0.4006, -0.4906],
        [ 0.2627, -0.0422,  0.5321, -0.1013, -0.4908,  0.0263,  0.4892,  0.1293,
         -0.0230, -0.2083,  0.5317, -0.5872, -0.3247, -0.6283,  0.2043, -0.3535,
         -0.4203,  0.5153, -0.2848,  0.0361]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.5079, 0.1229, 0.6149, 0.0000, 0.0000, 0.0588, 0.0757, 0.0448, 0.0000,
         0.0303, 0.1865, 0.0000, 0.0963, 0.0000, 0.2210, 0.0000, 0.0000, 0.0178,
         0.0000, 0.0000],
        [0.5984, 0.0000, 0.5180, 0.1904, 0.0000, 0.1958, 0.5939, 0.0000, 0.3412,
         0.0000, 0.3435, 0.0000, 0.0000, 0.0000, 0.68

## nn.Sequential
nn.Sequential is an ordered container of modules. The data is passed through all the modules in the same order as defined. You can use sequential containers to put together a quick network like seq_modules.


In [14]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)


## nn.Softmax
The last linear layer of the neural network returns logits - raw values in [-infty, infty] - which are passed to the nn.Softmax module. The logits are scaled to values [0, 1] representing the model’s predicted probabilities for each class. dim parameter indicates the dimension along which the values must sum to 1.


In [18]:
softmax = nn.Softmax(dim=1)
print(logits.shape)
print(logits)
pred_probab = softmax(logits)
print(pred_probab)

torch.Size([3, 10])
tensor([[ 0.1346, -0.0827,  0.4219,  0.0521,  0.2393, -0.3767,  0.1364,  0.1110,
         -0.0614,  0.1177],
        [ 0.0200, -0.0054,  0.3861,  0.1703,  0.1177, -0.2686,  0.1571,  0.2017,
         -0.2281,  0.0653],
        [ 0.1122, -0.0452,  0.4417,  0.0712,  0.2580, -0.4598,  0.1525,  0.2620,
         -0.3048,  0.2278]], grad_fn=<AddmmBackward0>)
tensor([[0.1047, 0.0842, 0.1395, 0.0964, 0.1162, 0.0628, 0.1049, 0.1022, 0.0861,
         0.1029],
        [0.0943, 0.0919, 0.1360, 0.1096, 0.1040, 0.0707, 0.1082, 0.1131, 0.0736,
         0.0987],
        [0.1009, 0.0862, 0.1403, 0.0969, 0.1167, 0.0570, 0.1051, 0.1172, 0.0665,
         0.1133]], grad_fn=<SoftmaxBackward0>)


## Model Parameters
Many layers inside a neural network are parameterized
Subclassing nn.Module automatically tracks all fields defined inside your model object, and makes all parameters accessible using your model’s parameters() or named_parameters() methods.

In this example, we iterate over each parameter, and print its size and a preview of its values.

In [19]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0217,  0.0075, -0.0008,  ..., -0.0133,  0.0120, -0.0097],
        [-0.0148, -0.0008,  0.0315,  ..., -0.0151,  0.0065,  0.0179]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([0.0100, 0.0043], grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0034, -0.0139,  0.0072,  ..., -0.0183, -0.0248, -0.0077],
        [-0.0052, -0.0255, -0.0069,  ...,  0.0068,  0.0298,  0.0417]],
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Si