In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [3]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
device

'mps'

In [4]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10),
        )
    
    def forward(self,x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [5]:
model=NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
X=torch.rand(1, 28, 28, device=device)
logits=model(X)
pred_prob=nn.Softmax(dim=1)(logits)
y_pred=pred_prob.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([8], device='mps:0')


In [8]:
logits

tensor([[-0.0908, -0.0356,  0.0735, -0.0848, -0.0020,  0.0652, -0.0049,  0.0414,
          0.0820,  0.0477]], device='mps:0', grad_fn=<LinearBackward0>)

In [7]:
pred_prob

tensor([[0.0903, 0.0955, 0.1065, 0.0909, 0.0987, 0.1056, 0.0984, 0.1031, 0.1074,
         0.1037]], device='mps:0', grad_fn=<SoftmaxBackward0>)

In [10]:
input_image=torch.rand(3,28,28)
print(input_image.size())

torch.Size([3, 28, 28])


In [13]:
flatten=nn.Flatten()
flat_image=flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [40]:
layer1=nn.Linear(in_features=28*28, out_features=20)
hidden1=layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [41]:
hidden1

tensor([[-2.8618e-01, -2.7003e-01,  3.8036e-01, -4.0436e-01,  1.2111e-01,
         -1.6538e-01, -1.7341e-03,  2.8336e-01, -8.2987e-01, -3.7235e-01,
          5.8963e-02,  6.4610e-01, -7.8755e-03, -2.7777e-01, -4.0547e-02,
         -8.5318e-01,  7.0742e-01,  6.9333e-02,  7.3088e-01, -4.0225e-01],
        [ 1.6365e-02, -1.2298e-01, -1.3899e-01, -6.3293e-01,  3.3174e-01,
         -1.2811e-01,  3.8430e-01, -6.0251e-03, -6.3923e-01, -3.7502e-01,
         -2.9661e-01,  4.4947e-01, -1.0839e-01, -7.6786e-02, -2.7290e-01,
         -5.8058e-01,  6.4755e-01,  4.7897e-01,  1.0033e+00, -3.7310e-01],
        [-1.4886e-01, -2.8638e-01,  3.4937e-01, -1.8374e-01,  3.5742e-01,
         -3.9600e-01,  3.9610e-02,  2.8279e-01, -5.2613e-01, -3.4500e-01,
          1.3639e-01,  9.3165e-01, -4.1353e-04, -1.0312e-01, -1.4611e-01,
         -4.8992e-01,  3.0751e-01,  3.3861e-01,  8.2630e-01, -5.9179e-02]],
       grad_fn=<AddmmBackward0>)

In [28]:
[x for x in layer1.parameters()]

[Parameter containing:
 tensor([[ 0.0117, -0.0289, -0.0284,  ...,  0.0186,  0.0177,  0.0188],
         [ 0.0234,  0.0264, -0.0233,  ..., -0.0149,  0.0061,  0.0126],
         [-0.0323, -0.0253,  0.0350,  ..., -0.0293, -0.0092,  0.0177],
         ...,
         [ 0.0127, -0.0138, -0.0182,  ...,  0.0097, -0.0026, -0.0020],
         [-0.0344, -0.0202,  0.0137,  ..., -0.0217,  0.0306, -0.0282],
         [-0.0038,  0.0305, -0.0242,  ...,  0.0276,  0.0007, -0.0141]],
        requires_grad=True),
 Parameter containing:
 tensor([ 0.0266, -0.0011, -0.0158,  0.0011,  0.0258, -0.0218,  0.0126, -0.0224,
          0.0013,  0.0207,  0.0163, -0.0345, -0.0051, -0.0071, -0.0155, -0.0299,
          0.0023, -0.0311, -0.0121,  0.0317], requires_grad=True)]

In [42]:
print(f"Before ReLU: {hidden1}\n\n")
hidden1=nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

Before ReLU: tensor([[-2.8618e-01, -2.7003e-01,  3.8036e-01, -4.0436e-01,  1.2111e-01,
         -1.6538e-01, -1.7341e-03,  2.8336e-01, -8.2987e-01, -3.7235e-01,
          5.8963e-02,  6.4610e-01, -7.8755e-03, -2.7777e-01, -4.0547e-02,
         -8.5318e-01,  7.0742e-01,  6.9333e-02,  7.3088e-01, -4.0225e-01],
        [ 1.6365e-02, -1.2298e-01, -1.3899e-01, -6.3293e-01,  3.3174e-01,
         -1.2811e-01,  3.8430e-01, -6.0251e-03, -6.3923e-01, -3.7502e-01,
         -2.9661e-01,  4.4947e-01, -1.0839e-01, -7.6786e-02, -2.7290e-01,
         -5.8058e-01,  6.4755e-01,  4.7897e-01,  1.0033e+00, -3.7310e-01],
        [-1.4886e-01, -2.8638e-01,  3.4937e-01, -1.8374e-01,  3.5742e-01,
         -3.9600e-01,  3.9610e-02,  2.8279e-01, -5.2613e-01, -3.4500e-01,
          1.3639e-01,  9.3165e-01, -4.1353e-04, -1.0312e-01, -1.4611e-01,
         -4.8992e-01,  3.0751e-01,  3.3861e-01,  8.2630e-01, -5.9179e-02]],
       grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0000, 0.0000, 0.3804, 0.0000, 0.1211,

In [44]:
seq_modules=nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20,10)
)
input_image=torch.rand(3,28,28)
logits=seq_modules(input_image)

In [50]:
softmax=nn.Softmax(dim=1)

In [46]:
logits

tensor([[-0.0253,  0.1728,  0.1363,  0.0151,  0.0356, -0.2704,  0.1745,  0.2337,
         -0.0165, -0.1377],
        [-0.0253,  0.1775,  0.1645, -0.1324, -0.0298, -0.1365,  0.2853,  0.2371,
         -0.0812, -0.1252],
        [ 0.0672,  0.1545,  0.1518, -0.1277, -0.0408, -0.1183,  0.2242,  0.2354,
         -0.0464, -0.1653]], grad_fn=<AddmmBackward0>)

In [51]:
pred_prob=softmax(logits)
pred_prob

tensor([[0.0935, 0.1139, 0.1098, 0.0973, 0.0993, 0.0731, 0.1141, 0.1211, 0.0943,
         0.0835],
        [0.0931, 0.1141, 0.1126, 0.0837, 0.0927, 0.0833, 0.1271, 0.1211, 0.0881,
         0.0843],
        [0.1024, 0.1117, 0.1114, 0.0842, 0.0919, 0.0850, 0.1198, 0.1211, 0.0914,
         0.0811]], grad_fn=<SoftmaxBackward0>)

In [52]:
print(f"Model structure: {model}\n\n")
for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values: {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values: tensor([[-0.0174, -0.0087,  0.0137,  ...,  0.0152, -0.0202,  0.0120],
        [ 0.0290, -0.0308, -0.0035,  ...,  0.0256, -0.0310,  0.0319]],
       device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values: tensor([ 0.0226, -0.0118], device='mps:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values: tensor([[ 0.0364,  0.0008, -0.0325,  ...,  0.0219,  0.0010,  0.0169],
        [ 0.0145, -0.0047,  0.0214,  ...,  0.0250,  0.0383, -0.0092]],
       device='mps:0', grad_fn=<SliceBac