In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10),
        )
    def forward(self,x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [5]:
X = torch.rand(1,28,28, device= device)
logits = model(X)
logits

tensor([[-0.0361,  0.0953,  0.0263, -0.0426,  0.0713,  0.0991,  0.0623, -0.0437,
         -0.0719,  0.0313]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [6]:
logits.shape

torch.Size([1, 10])

In [9]:
pred_probab = nn.Softmax(dim=1)(logits)
pred_probab

tensor([[0.0945, 0.1077, 0.1005, 0.0938, 0.1052, 0.1081, 0.1042, 0.0937, 0.0911,
         0.1010]], device='cuda:0', grad_fn=<SoftmaxBackward0>)

In [14]:
y_pred = pred_probab.argmax(1)
y_pred

tensor([5], device='cuda:0')

In [15]:
#Model layers

In [21]:
input_image = torch.rand(3,28,28)
input_image, input_image.shape

(tensor([[[0.5630, 0.8198, 0.9073,  ..., 0.6414, 0.1240, 0.2174],
          [0.3765, 0.0208, 0.2588,  ..., 0.7522, 0.0853, 0.8802],
          [0.7448, 0.7512, 0.8299,  ..., 0.1951, 0.5421, 0.9208],
          ...,
          [0.7374, 0.0806, 0.8148,  ..., 0.5743, 0.6956, 0.3558],
          [0.1547, 0.1559, 0.2966,  ..., 0.9012, 0.8821, 0.4616],
          [0.5801, 0.8329, 0.8312,  ..., 0.9971, 0.7949, 0.5000]],
 
         [[0.7294, 0.4325, 0.1240,  ..., 0.8375, 0.2004, 0.3430],
          [0.7431, 0.3354, 0.5874,  ..., 0.9683, 0.1034, 0.0388],
          [0.1005, 0.5159, 0.6566,  ..., 0.9845, 0.9911, 0.3242],
          ...,
          [0.8680, 0.7550, 0.8398,  ..., 0.2784, 0.1457, 0.4207],
          [0.1418, 0.7858, 0.6439,  ..., 0.7723, 0.4803, 0.4035],
          [0.9709, 0.0960, 0.5466,  ..., 0.2018, 0.6717, 0.7489]],
 
         [[0.2750, 0.5491, 0.5737,  ..., 0.7552, 0.5674, 0.9555],
          [0.7860, 0.3282, 0.2888,  ..., 0.3503, 0.7162, 0.9761],
          [0.0231, 0.6836, 0.7094,  ...,

In [22]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
flat_image, flat_image.shape

(tensor([[0.5630, 0.8198, 0.9073,  ..., 0.9971, 0.7949, 0.5000],
         [0.7294, 0.4325, 0.1240,  ..., 0.2018, 0.6717, 0.7489],
         [0.2750, 0.5491, 0.5737,  ..., 0.0397, 0.4197, 0.3737]]),
 torch.Size([3, 784]))

In [28]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
hidden1, hidden1.shape

(tensor([[ 0.5468, -0.1186,  0.0274,  0.2584, -0.1175, -0.0012,  0.1755, -0.2618,
          -0.3716, -0.4418,  0.0825,  0.6282,  0.5345, -0.2590,  0.6252, -0.0263,
           0.6365,  0.5201, -0.1029,  0.1213],
         [ 0.4532,  0.2686, -0.3085,  0.5251,  0.2064,  0.2917,  0.4025,  0.0355,
          -0.3103, -0.3376,  0.0282,  0.4535,  0.3486,  0.3498,  0.5596,  0.1356,
           0.5259,  0.5987,  0.2027, -0.0675],
         [ 0.0500, -0.2093, -0.2276,  0.0787,  0.0162,  0.3636,  0.2403, -0.4670,
          -0.2788, -0.5375,  0.0479,  0.2528,  0.5486,  0.0649,  0.7061,  0.0588,
           0.4527,  0.4405,  0.5737, -0.1102]], grad_fn=<AddmmBackward0>),
 torch.Size([3, 20]))

In [29]:
hidden1 = nn.ReLU()(hidden1)


In [33]:
print(f"After ReLu: {hidden1}")

After ReLu: tensor([[0.5468, 0.0000, 0.0274, 0.2584, 0.0000, 0.0000, 0.1755, 0.0000, 0.0000,
         0.0000, 0.0825, 0.6282, 0.5345, 0.0000, 0.6252, 0.0000, 0.6365, 0.5201,
         0.0000, 0.1213],
        [0.4532, 0.2686, 0.0000, 0.5251, 0.2064, 0.2917, 0.4025, 0.0355, 0.0000,
         0.0000, 0.0282, 0.4535, 0.3486, 0.3498, 0.5596, 0.1356, 0.5259, 0.5987,
         0.2027, 0.0000],
        [0.0500, 0.0000, 0.0000, 0.0787, 0.0162, 0.3636, 0.2403, 0.0000, 0.0000,
         0.0000, 0.0479, 0.2528, 0.5486, 0.0649, 0.7061, 0.0588, 0.4527, 0.4405,
         0.5737, 0.0000]], grad_fn=<ReluBackward0>)


In [34]:
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20,10)
)

input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

In [35]:
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)

In [36]:
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Model structure: NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[ 0.0164, -0.0099, -0.0259,  ..., -0.0203, -0.0339, -0.0179],
        [ 0.0035, -0.0268, -0.0086,  ...,  0.0254, -0.0008, -0.0020]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0356,  0.0352], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[-0.0072, -0.0056,  0.0426,  ...,  0.0295, -0.0042, -0.0073],
        [-0.0114, -0.0351, -0.0402,  ...,  0.0111, -0.0003,  0.0392]],
       device='cuda:0', grad_fn=<Sl