# Build the Neural Network

In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Current torch device: {device}")

Current torch device: cpu


In [3]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [4]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [None]:
X = torch.rand(1, 28, 28, device=device) # Create torch.Size([1, 28, 28])
logits = model(X) # torch.Size([1, 10])
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([9])


In [12]:
f = nn.Flatten()
f_result = f(X)
print(f"Flattem from X's shape: {X.shape} to shape: {f_result.shape}")

Flattem from X's shape: torch.Size([1, 28, 28]) to shape: torch.Size([1, 784])


In [17]:
input_image = torch.rand(3, 28, 28) # 3 images of 28 x 28 size
print(input_image.size())

torch.Size([3, 28, 28])


In [None]:
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

torch.Size([3, 784])


In [20]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

torch.Size([3, 20])


In [21]:
print(f"before apply ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"after apply ReLU: {hidden1}")

before apply ReLU: tensor([[ 0.5359,  0.1777,  0.1312, -0.3059, -0.2453,  0.0270, -0.0304, -0.1297,
         -0.4231,  0.1303, -0.1953, -0.0333,  0.5598, -0.1212,  0.3054,  0.7382,
         -0.5515,  0.1466,  0.7173, -0.3539],
        [ 0.3187,  0.0273,  0.2602, -0.5207, -0.3706,  0.0222, -0.1907, -0.2105,
         -0.4947,  0.1181, -0.1125, -0.1096,  0.7371,  0.3558,  0.2109,  0.7764,
         -0.6327,  0.2779,  0.5369, -0.4863],
        [ 0.4717,  0.1693,  0.2080, -0.2838, -0.3052,  0.0221,  0.0120, -0.1295,
         -0.3825,  0.3050, -0.0710,  0.1620,  0.5099, -0.0907, -0.1004,  0.6280,
         -0.7514,  0.2595,  0.3311, -0.1165]], grad_fn=<AddmmBackward0>)


after apply ReLU: tensor([[0.5359, 0.1777, 0.1312, 0.0000, 0.0000, 0.0270, 0.0000, 0.0000, 0.0000,
         0.1303, 0.0000, 0.0000, 0.5598, 0.0000, 0.3054, 0.7382, 0.0000, 0.1466,
         0.7173, 0.0000],
        [0.3187, 0.0273, 0.2602, 0.0000, 0.0000, 0.0222, 0.0000, 0.0000, 0.0000,
         0.1181, 0.0000, 0.0000, 0.7371, 

In [23]:
seq_modules = nn.Sequential(
    flatten, #input: _, output: 784
    layer1, #input: 784, output: 20
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3, 28, 28)
logit = seq_modules(input_image)

In [24]:
pred_probab

tensor([[0.0942, 0.1018, 0.1036, 0.1007, 0.1051, 0.0937, 0.0980, 0.0967, 0.1006,
         0.1055]], grad_fn=<SoftmaxBackward0>)

In [27]:
pred_probab.argmax(1)

tensor([9])

In [None]:
from transform