In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
device

'cuda'

In [5]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )
    
    def forward(self, x):
        x = self.flatten(x) # reshaping into a one-dimensional tensor.
        logits = self.linear_relu_stack(x)
        
        return logits
        
        

In [6]:
model = NeuralNetwork().to(device)
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [7]:
X = torch.rand(1,28,28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
y_pred

tensor([4], device='cuda:0')

In [8]:
input_image = torch.rand(3,28,28)

In [10]:
flatten = nn.Flatten()
flat_image = flatten(input_image)

In [12]:
flat_image.size()

torch.Size([3, 784])

In [13]:
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
hidden1.size()

torch.Size([3, 20])

In [14]:
hidden1

tensor([[ 0.7813, -0.0109, -0.3009, -0.1772,  0.0075,  0.1917, -0.2727,  0.3089,
         -0.3267,  0.2577,  0.1391, -0.3267, -0.2309,  0.1797,  0.1829, -0.2247,
         -0.5332,  0.9296, -0.5203, -0.1793],
        [ 0.9275, -0.0492, -0.0188, -0.3519, -0.1292,  0.1534, -0.1091, -0.0127,
         -0.2173,  0.0867,  0.2852, -0.0727, -0.1840,  0.2838,  0.0091, -0.5561,
         -0.4566,  0.7620, -0.4660,  0.1218],
        [ 0.5505,  0.0734,  0.1540,  0.0242,  0.0023, -0.0682, -0.4043, -0.2651,
         -0.1139,  0.3381,  0.1207, -0.4564, -0.3164,  0.0700, -0.1543, -0.0711,
         -0.2271,  0.6964, -0.4854,  0.2805]], grad_fn=<AddmmBackward0>)

In [15]:
hidden1 = nn.ReLU()(hidden1)
hidden1

tensor([[0.7813, 0.0000, 0.0000, 0.0000, 0.0075, 0.1917, 0.0000, 0.3089, 0.0000,
         0.2577, 0.1391, 0.0000, 0.0000, 0.1797, 0.1829, 0.0000, 0.0000, 0.9296,
         0.0000, 0.0000],
        [0.9275, 0.0000, 0.0000, 0.0000, 0.0000, 0.1534, 0.0000, 0.0000, 0.0000,
         0.0867, 0.2852, 0.0000, 0.0000, 0.2838, 0.0091, 0.0000, 0.0000, 0.7620,
         0.0000, 0.1218],
        [0.5505, 0.0734, 0.1540, 0.0242, 0.0023, 0.0000, 0.0000, 0.0000, 0.0000,
         0.3381, 0.1207, 0.0000, 0.0000, 0.0700, 0.0000, 0.0000, 0.0000, 0.6964,
         0.0000, 0.2805]], grad_fn=<ReluBackward0>)

In [18]:
# sequential: 순서를 갖는 모듈의 컨테이너
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20,10) # input size, output size
)

input_image = torch.rand(2,28,28)
logits = seq_modules(input_image)

In [19]:
softmax = nn.Softmax(dim=1)
pre_probab = softmax(logits)

In [20]:
model

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)

In [24]:
for name, param in model.named_parameters():
    print(f"Layer: {name}, Size: {param.size()}, values: {param[:2]}")

Layer: linear_relu_stack.0.weight, Size: torch.Size([512, 784]), values: tensor([[ 0.0037,  0.0056,  0.0024,  ..., -0.0332, -0.0268,  0.0115],
        [ 0.0223, -0.0281,  0.0275,  ...,  0.0273,  0.0068,  0.0131]],
       device='cuda:0', grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.0.bias, Size: torch.Size([512]), values: tensor([-0.0056, -0.0297], device='cuda:0', grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.2.weight, Size: torch.Size([512, 512]), values: tensor([[-0.0391, -0.0197, -0.0266,  ..., -0.0205,  0.0031, -0.0023],
        [-0.0105, -0.0152, -0.0332,  ..., -0.0169, -0.0081, -0.0155]],
       device='cuda:0', grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.2.bias, Size: torch.Size([512]), values: tensor([-0.0437,  0.0375], device='cuda:0', grad_fn=<SliceBackward0>)
Layer: linear_relu_stack.4.weight, Size: torch.Size([10, 512]), values: tensor([[ 0.0357,  0.0194,  0.0359,  ..., -0.0076,  0.0201, -0.0002],
        [-0.0323, -0.0054,  0.0035,  ..., -0.0024, -0.0099