In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Get Device for Training

In [2]:
# cuda :
# https://pytorch.org/docs/stable/cuda.html
# https://pytorch.org/docs/stable/notes/cuda.html
# mps :
# https://pytorch.org/docs/stable/notes/mps.html#mps-backend

device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.beckends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [3]:
# Check that MPS is available
if not torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("MPS not available because the current PyTorch install was not "
              "built with MPS enabled.")
    else:
        print("MPS not available because the current MacOS version is not 12.3+ "
              "and/or you do not have an MPS-enabled device on this machine.")

else:
    mps_device = torch.device("mps")

    # Create a Tensor directly on the mps device
    x = torch.ones(5, device=mps_device)
    # Or
    x = torch.ones(5, device="mps")

    # Any operation happens on the GPU
    y = x * 2

    # Move your model to mps just like any other device
    model = YourFavoriteNet()
    model.to(mps_device)

    # Now every call runs on the GPU
    pred = model(x)

MPS not available because the current PyTorch install was not built with MPS enabled.


# Define the Class

In [4]:
class NeuralNetwork(nn.Module) :
    def __init__(self) : 
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )
        
    def forward(self, x) :
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [5]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


Calling the model on the input returns a 2-dimensional tensor with dim=0 corresponding to each output of 10 raw predicted values for each class, <br>
and dim=1 corresponding to the individual values of each output. <br>
We get the prediction probabilities by passing it through an instance of the nn.Softmax module.<br>

-> model()'s input : $(m, N_x)$ = (#examples, #features)<br>
-> model()'s output : (#examples, #logits)

In [6]:
X = torch.rand(3, 1, 28, 28, device=device) # (1, 28, 28) image 3개
logits = model(X)
print(f"logits.shape : {logits.shape}") 

pred_prob = nn.Softmax(dim=1)(logits)
print(f"pred_prob : {pred_prob}")
# y_pred = pred_prob.argmax(1, keepdim=True)          # --> Softmax() class의 argmax() method 사용
y_pred = torch.argmax(pred_prob, dim=1, keepdim=True) # --> torch의 argmax() function 사용
print(f"Predicted class : {y_pred}")

logits.shape : torch.Size([3, 10])
pred_prob : tensor([[0.0968, 0.0965, 0.1153, 0.0989, 0.1041, 0.0919, 0.1027, 0.0976, 0.0976,
         0.0987],
        [0.0944, 0.0938, 0.1187, 0.0957, 0.0992, 0.0957, 0.1010, 0.0966, 0.0959,
         0.1089],
        [0.0930, 0.1019, 0.1093, 0.0973, 0.1051, 0.0908, 0.1009, 0.0999, 0.1047,
         0.0972]], device='cuda:0', grad_fn=<SoftmaxBackward0>)
Predicted class : tensor([[2],
        [2],
        [2]], device='cuda:0')


# Model Layers

In [7]:
input_image = torch.rand(3, 28, 28)
print(f"input_image.size : {input_image.size()}")
print(f"input_image.shape : {input_image.shape}")

input_image.size : torch.Size([3, 28, 28])
input_image.shape : torch.Size([3, 28, 28])


# nn.Flatten

In [8]:
flatten = nn.Flatten()
flat_image = flatten(input_image)  # (3, 28, 28) --> (3, 784)
print(f"flat_image.size : {flat_image.size()}") 

flat_image.size : torch.Size([3, 784])


# nn.Linear

In [18]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
print(f"layer1.weight.size : {layer1.weight.size()}")
affine1 = layer1(flat_image)
print(f"affine1.size : {affine1.size()}")
print(f"affine1 : {affine1}")
activation1 = nn.ReLU()(affine1)
print(f"activation1.size with ReLU: {activation1.size()}")
print(f"activation1 : {activation1}")

# flat_image  = A0 = (3, 784)
# layer1      = W1 = (784, 20)
# affine1     = Z1 = (A0 * W1 + b1)= (3, 20)
# activation1 = A1 = ReLU(affine1) = (3, 20)

layer1.weight.size : torch.Size([20, 784])
affine1.size : torch.Size([3, 20])
affine1 : tensor([[-0.3228, -0.1165,  0.0370,  0.3691,  0.7701, -0.6598,  0.1387,  0.1862,
          0.1999, -0.0622,  0.0410, -0.0116,  0.5156,  0.2619,  0.1465, -0.0186,
         -0.2221, -0.2543, -0.3593, -0.4873],
        [-0.0516,  0.2041, -0.2395,  0.4122,  0.3267, -0.6876,  0.0794,  0.5118,
         -0.1548,  0.5206,  0.3185, -0.1339,  0.4395,  0.1453,  0.1193,  0.2637,
         -0.3898, -0.2251, -0.1281, -0.3925],
        [-0.0721, -0.0810, -0.0568,  0.3889,  0.4611, -0.5054, -0.1218,  0.3241,
          0.1564,  0.1093,  0.2117, -0.1033,  0.7011,  0.0804,  0.1403,  0.3528,
         -0.6268, -0.3513, -0.3963, -0.2239]], grad_fn=<AddmmBackward0>)
activation1.size with ReLU: torch.Size([3, 20])
activation1 : tensor([[0.0000, 0.0000, 0.0370, 0.3691, 0.7701, 0.0000, 0.1387, 0.1862, 0.1999,
         0.0000, 0.0410, 0.0000, 0.5156, 0.2619, 0.1465, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000],
        [0.

# nn.Sequential

In [22]:
seq_modules = nn.Sequential(
    flatten,                # (3, 28, 28) --> (3, 784)
    layer1,                 # (3, 784) --> (3, 20)
    nn.ReLU(),              # (3, 20) --> (3, 20)
    nn.Linear(20, 10)       # (3, 20) --> (3, 10)
)

input_image = torch.rand(3, 28, 28)
logits = seq_modules(input_image)
print(f"logits.size : {logits.size()}")

logits.size : torch.Size([3, 10])


# nn.Softmax

In [26]:
softmax = nn.Softmax(dim=1)
pred_prob = softmax(logits) # dim=1 : sum of each row = 1
print(f"pred_prob.size : {pred_prob.size()}")
print(f"pred_prob : {pred_prob}")

print("")

softmax = nn.Softmax(dim=0)
pred_prob = softmax(logits) # dim=0 : sum of each column = 1
print(f"pred_prob.size : {pred_prob.size()}")
print(f"pred_prob : {pred_prob}")

pred_prob.size : torch.Size([3, 10])
pred_prob : tensor([[0.0719, 0.1059, 0.1227, 0.1010, 0.1222, 0.1094, 0.0932, 0.0924, 0.0974,
         0.0838],
        [0.0882, 0.1046, 0.0995, 0.0920, 0.1056, 0.1124, 0.0871, 0.1089, 0.1001,
         0.1015],
        [0.0802, 0.1014, 0.1288, 0.0884, 0.1235, 0.1103, 0.0845, 0.1047, 0.0937,
         0.0846]], grad_fn=<SoftmaxBackward0>)

pred_prob.size : torch.Size([3, 10])
pred_prob : tensor([[0.3001, 0.3404, 0.3500, 0.3596, 0.3484, 0.3302, 0.3527, 0.3028, 0.3354,
         0.3115],
        [0.3614, 0.3301, 0.2786, 0.3219, 0.2955, 0.3330, 0.3238, 0.3502, 0.3383,
         0.3704],
        [0.3385, 0.3295, 0.3714, 0.3185, 0.3561, 0.3368, 0.3235, 0.3470, 0.3264,
         0.3181]], grad_fn=<SoftmaxBackward0>)


# Model Parameters

In [28]:
print(f"Model structure : {model}")

for name, param in model.named_parameters() :
    print(f"Layer : {name} | Size : {param.size()} | Values : {param[:2]} \n")

Model structure : NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)
Layer : linear_relu_stack.0.weight | Size : torch.Size([512, 784]) | Values : tensor([[ 0.0090,  0.0008,  0.0309,  ..., -0.0298,  0.0097, -0.0284],
        [-0.0215,  0.0191,  0.0116,  ...,  0.0250,  0.0236, -0.0118]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer : linear_relu_stack.0.bias | Size : torch.Size([512]) | Values : tensor([ 0.0065, -0.0335], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer : linear_relu_stack.2.weight | Size : torch.Size([512, 512]) | Values : tensor([[ 0.0412, -0.0058, -0.0211,  ..., -0.0380, -0.0230, -0.0007],
        [-0.0231,  0.0331, -0.0067,  ...,  0.0153,  0.0271,  0.0223]],
       device='cuda:0', grad_f

# Further Reading, torch.nn : https://pytorch.org/docs/stable/nn.html