# Build Model

In [1]:
import torch
import os
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor, Lambda, Compose

In [2]:
# get device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Current device: {device}")

Current device: cuda


In [3]:
## import datasets
train_data = datasets.FashionMNIST(
    root="../StartGuide/data",
    train=True,
    download=False,
    transform=ToTensor()
)
test_data = datasets.FashionMNIST(
    root="../StartGuide/data",
    train=False,
    download=False,
    transform=ToTensor()
)

In [4]:
test_data[0][0].shape

torch.Size([1, 28, 28])

In [5]:
## define model

class NNModel(nn.Module):
    def __init__(self):
        super(NNModel, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(784, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
            nn.ReLU()
        )
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [6]:
model = NNModel().to(device)
print(model)

NNModel(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
    (5): ReLU()
  )
)


## Model Layers

- `nn.Flatten`
- `nn.Linear`
- `nn.ReLU`
- `nn.Sequential`
- `nn.Softmax`

In [7]:
# test data
tmp = torch.rand(3, 28, 28)
print(tmp.shape)

torch.Size([3, 28, 28])


### Flatten Layer

Flattens a contiguous range of dims into a tensor

In [8]:
flatten_layer = nn.Flatten(start_dim=1, end_dim=-1)
flattened_data = flatten_layer(tmp)
print(f"Flattened data shape: {flattened_data.shape}")

Flattened data shape: torch.Size([3, 784])


### Linear Layer

Applies a linear transformation on the input using its weights and biases

In [9]:
linear_layer = nn.Linear(in_features=784, out_features=20)
hidden_layer = linear_layer(flattened_data)
print(f"Hidden layer shape: {hidden_layer.shape}")

Hidden layer shape: torch.Size([3, 20])


In [29]:
print(f"Hidden data: \n{hidden_layer}")
print(f"Weight: \n{linear_layer.weight.shape}\nBias: \n{linear_layer.bias.shape}")
print(f"{flattened_data @ linear_layer.weight.T + linear_layer.bias}") # TODO float comparation?

Hidden data: 
tensor([[-0.0187,  0.6563,  0.1137,  0.2563, -0.3326,  0.1804,  0.4862,  0.1541,
         -0.0388,  0.3862, -0.2314,  0.3201,  0.2664,  0.4718,  0.2916, -0.2339,
          0.0493,  0.0479,  0.4649,  0.1129],
        [ 0.0721,  0.3049,  0.1401,  0.4665, -0.2594, -0.0516,  0.2249, -0.1089,
         -0.1758,  0.5431, -0.4015,  0.5934, -0.2683,  0.1119,  0.3522,  0.0236,
          0.0497, -0.3324,  0.6946, -0.1029],
        [-0.1265,  0.6517,  0.0087,  0.0375, -0.2542, -0.0066,  0.6874,  0.1504,
         -0.2874,  0.3833, -0.3095,  0.5220,  0.1034,  0.0204,  0.6119, -0.2515,
          0.4521, -0.3070,  0.6022, -0.3936]], grad_fn=<AddmmBackward>)
Weight: 
torch.Size([20, 784])
Bias: 
torch.Size([20])
tensor([[-0.0187,  0.6563,  0.1137,  0.2563, -0.3326,  0.1804,  0.4862,  0.1541,
         -0.0388,  0.3862, -0.2314,  0.3201,  0.2664,  0.4718,  0.2916, -0.2339,
          0.0493,  0.0479,  0.4649,  0.1129],
        [ 0.0721,  0.3049,  0.1401,  0.4665, -0.2594, -0.0516,  0.2249, -