In [3]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

device = (
    'cuda'
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(device)

cuda


In [4]:
class MyNN(nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
    
mdl = MyNN().to(device)
print(mdl)

MyNN(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1, 28, 28, device=device)
logits = mdl(X)
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([6], device='cuda:0')


In [6]:
input_image = torch.rand(3, 28, 28)
print(input_image.size())

torch.Size([3, 28, 28])


In [7]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [19]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [20]:
print(f"Before ReLu: {hidden1} \n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLu: {hidden1}")

Before ReLu: tensor([[ 0.3961, -0.0841, -0.1740, -0.2941,  0.3812,  0.4041, -0.0271,  0.0737,
          0.1949,  0.3117, -0.5616, -0.1617,  0.1671,  0.4034, -0.0678,  0.2882,
         -0.4800,  0.2789, -0.1058, -0.0517],
        [ 0.0505, -0.3894,  0.2720, -0.3516,  0.5594,  0.5060,  0.0286,  0.4855,
          0.2121,  0.0374, -0.4559,  0.0194,  0.2821,  0.3406, -0.1020,  0.1546,
         -0.3211,  0.2990, -0.0525, -0.2150],
        [ 0.2951, -0.1493,  0.2069, -0.6948,  0.4129,  0.0528,  0.0973,  0.3086,
          0.2495, -0.0786, -0.2183, -0.2233,  0.2052,  0.2084, -0.2835,  0.5842,
         -0.4503,  0.2257, -0.0205, -0.3017]], grad_fn=<AddmmBackward0>) 

After ReLu: tensor([[0.3961, 0.0000, 0.0000, 0.0000, 0.3812, 0.4041, 0.0000, 0.0737, 0.1949,
         0.3117, 0.0000, 0.0000, 0.1671, 0.4034, 0.0000, 0.2882, 0.0000, 0.2789,
         0.0000, 0.0000],
        [0.0505, 0.0000, 0.2720, 0.0000, 0.5594, 0.5060, 0.0286, 0.4855, 0.2121,
         0.0374, 0.0000, 0.0194, 0.2821, 0.3406, 0.00

In [23]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)
logits

tensor([[-0.1069,  0.1316, -0.1199,  0.2955, -0.1782,  0.2152,  0.2238, -0.2441,
         -0.3225,  0.0766],
        [-0.1052,  0.0214, -0.1426,  0.1970, -0.1733,  0.1306,  0.2449, -0.2566,
         -0.2846,  0.1321],
        [-0.1108,  0.0015, -0.1552,  0.1265, -0.2141,  0.2909,  0.3231, -0.2002,
         -0.2085,  0.1339]], grad_fn=<AddmmBackward0>)

In [24]:
logits.size()

torch.Size([3, 10])

In [25]:
softmax = nn.Softmax(dim=1)
pred_prob = softmax(logits)
pred_prob

tensor([[0.0882, 0.1120, 0.0871, 0.1319, 0.0822, 0.1218, 0.1228, 0.0769, 0.0711,
         0.1060],
        [0.0906, 0.1029, 0.0873, 0.1226, 0.0847, 0.1147, 0.1286, 0.0779, 0.0757,
         0.1149],
        [0.0879, 0.0983, 0.0840, 0.1114, 0.0792, 0.1313, 0.1356, 0.0803, 0.0797,
         0.1122]], grad_fn=<SoftmaxBackward0>)

In [26]:
for name, param in mdl.named_parameters():
    print(f"Layer: {name} | size: {param.size()} | value: {param[:5]} \n")

Layer: linear_relu_stack.0.weight | size: torch.Size([512, 784]) | value: tensor([[ 0.0214,  0.0337, -0.0333,  ...,  0.0275,  0.0243, -0.0080],
        [ 0.0245,  0.0241, -0.0202,  ...,  0.0098,  0.0324,  0.0004],
        [-0.0325, -0.0290,  0.0342,  ...,  0.0310,  0.0320,  0.0114],
        [-0.0201, -0.0062,  0.0033,  ...,  0.0028,  0.0029, -0.0028],
        [ 0.0304,  0.0126,  0.0007,  ...,  0.0176,  0.0064,  0.0246]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | size: torch.Size([512]) | value: tensor([ 0.0168,  0.0080,  0.0040,  0.0311, -0.0143], device='cuda:0',
       grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | size: torch.Size([512, 512]) | value: tensor([[ 0.0136, -0.0192, -0.0039,  ...,  0.0289, -0.0375,  0.0129],
        [ 0.0221,  0.0095, -0.0153,  ...,  0.0012, -0.0393, -0.0228],
        [-0.0056,  0.0257,  0.0008,  ...,  0.0315, -0.0127,  0.0327],
        [-0.0300,  0.0061,  0.0221,  ..., -0.0201,  0.0100, -0.043