### Neural Networks

Neural networks are computational models inspired by the human brain, designed to recognize patterns and
make decisions based on data. They consist of interconnected layers of nodes, or "neurons," which process
and transform input information. Through training, neural networks learn to improve their accuracy in tasks like image recognition, language processing, and more.Neural networks comprise of layers that perform operations on data.

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([2], device='cuda:0')


In [6]:
input_image = torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [7]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [8]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [9]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-0.4461, -0.2075, -0.3369, -0.0779,  0.0479, -0.2148,  0.2286, -0.4109,
          0.0833,  0.4271,  0.1005,  0.3416,  0.0602,  0.4678, -0.5186, -0.3187,
         -0.5346, -0.1660,  0.1838,  0.4557],
        [-0.4497, -0.2121, -0.5537, -0.4902, -0.0048, -0.2151,  0.4252, -0.2756,
          0.2001,  0.4588,  0.2643,  0.0833, -0.0289,  0.1683, -0.8469, -0.5544,
         -0.2643, -0.0629,  0.4188,  0.4206],
        [-0.7544, -0.1338, -0.3325, -0.2390,  0.0297,  0.0525,  0.3409, -0.1381,
          0.0188,  0.2462,  0.2281, -0.0818, -0.2079,  0.0519, -0.5573, -0.3495,
         -0.7980, -0.2120,  0.3498,  0.2777]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.0000, 0.0000, 0.0479, 0.0000, 0.2286, 0.0000, 0.0833,
         0.4271, 0.1005, 0.3416, 0.0602, 0.4678, 0.0000, 0.0000, 0.0000, 0.0000,
         0.1838, 0.4557],
        [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.4252, 0.0000, 0.2001,
         0.4588, 0.2643, 0.0833, 0.0000, 0.1683, 0.00

In [10]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [11]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [12]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0335,  0.0187, -0.0201,  ..., -0.0250, -0.0295, -0.0334],
        [ 0.0306,  0.0265,  0.0043,  ...,  0.0131, -0.0296,  0.0282]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([ 0.0231, -0.0353], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0035,  0.0097, -0.0305,  ..., -0.0363,  0.0157, -0.0259],
        [-0.0118, -0.0065, -0.0381,  ...,  0.0411, -0.0046,  0.0346]],
       device='cuda:0', grad_fn=<Sl