# Convolutional Neural Network 모델 정의

In [7]:
import torch
import torch.nn as nn

device = "mps" if torch.backends.mps.is_available() else "cpu"
print(device)
device = 'cpu'

mps


In [11]:
layer = nn.Conv2d(in_channels=4,  # 입력데이터의 channel 수
                  out_channels=2, # filter(kernel) 수
                  kernel_size=3,  # filter 크기(3:h, 3:w)
                  # stride=1,  # 이동크기 => 좌우, 상하 1칸씩 이동하면서 계산
                  # padding=0  # 패딩크기
                  # pading='same'
                 )
print(layer)

Conv2d(4, 2, kernel_size=(3, 3), stride=(1, 1))


In [10]:
input_data = torch.ones(1, 4, 3, 3)  # (데이터개수, channel, height, width)
feature_map = layer(input_data)
print(feature_map.shape)
# [1:데이터개수, 2:out_channels, 1:height, 1:width]
feature_map

torch.Size([1, 2, 1, 1])


tensor([[[[ 0.2992]],

         [[-0.8028]]]], grad_fn=<ConvolutionBackward0>)

In [14]:
# 파라미터 weight
weight = layer.weight  # conv2d의 weight 조회 => filter 조회
weight.shape
# [2:필터개수, 4:필터채널개수, 3:필터height, 3:필터width]

torch.Size([2, 4, 3, 3])

In [17]:
# 파라미터 bias
bias = layer.bias
bias.shape  # [2:필터개수]
bias

Parameter containing:
tensor([0.0547, 0.0255], requires_grad=True)

In [19]:
# 첫번째 필터 계산
input_data.shape, weight[0].shape

(torch.Size([1, 4, 3, 3]), torch.Size([4, 3, 3]))

In [21]:
ch1 = torch.sum(input_data[0, 0] * weight[0, 0])  # [0: 첫번째 필터, 0: 첫번째 채널]
ch2 = torch.sum(input_data[0, 1] * weight[0, 1])  # [0: 첫번째 필터, 1: 두번째 채널]
ch3 = torch.sum(input_data[0, 2] * weight[0, 2])  # [0: 첫번째 필터, 2: 세번째 채널]
ch4 = torch.sum(input_data[0, 3] * weight[0, 3])  # [0: 첫번째 필터, 3: 네번째 채널]
result = ch1 + ch2 + ch3 + ch4 + bias[0]
result

tensor(-0.4290, grad_fn=<AddBackward0>)

In [22]:
ch1 = torch.sum(input_data[0, 0] * weight[1, 0])  # [0: 첫번째 필터, 0: 첫번째 채널]
ch2 = torch.sum(input_data[0, 1] * weight[1, 1])  # [0: 첫번째 필터, 1: 두번째 채널]
ch3 = torch.sum(input_data[0, 2] * weight[1, 2])  # [0: 첫번째 필터, 2: 세번째 채널]
ch4 = torch.sum(input_data[0, 3] * weight[1, 3])  # [0: 첫번째 필터, 3: 네번째 채널]
result = ch1 + ch2 + ch3 + ch4 + bias[1]
result

tensor(-0.2111, grad_fn=<AddBackward0>)

In [24]:
p_layer= nn.MaxPool2d(kernel_size=2,  # 최대값을 추출할 영역 크기
                      stride=2        # 이동 size => kernel_size와 stride는 같은 값을 지정해서 영역이 겹치지 않도록 함
                     )
# kernel_size보다 작은 영역에서는 최대값을 추출 X => padding을 지정해서 zero_padding을 추가해서 작은 영역에서도 추출가능하게 함
input_data2 = torch.rand(1, 4, 4)
input_data2

tensor([[[0.5867, 0.7289, 0.4778, 0.0056],
         [0.1397, 0.0298, 0.4811, 0.2341],
         [0.5840, 0.4845, 0.5535, 0.7211],
         [0.3153, 0.3078, 0.4132, 0.3489]]])

In [29]:
result2 = p_layer(input_data2)
result2.shape

torch.Size([1, 2, 2])

In [30]:
result2

tensor([[[0.7289, 0.4811],
         [0.5840, 0.7211]]])

# MNIST

In [33]:
import os 
os.chdir('/Users/hongdagyeong/Documents/Pd')

In [34]:
import os

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import models, datasets, transforms
from torchinfo import summary

import matplotlib.pyplot as plt
import numpy as np

from module.data import load_mnist_dataset, load_fashion_mnist_dataset
from module.train import fit

device = "mps" if torch.backends.mps.is_available() else "cpu"


In [35]:
# 하이퍼파라미터
N_EPOCH=1
BATCH_SIZE=256
LR=0.001

## Data 준비

In [36]:
# MNIST
train_loader = load_mnist_dataset('08_datasets', BATCH_SIZE, True)  # 저장경로, batch크기, Trainset여부
test_loader = load_mnist_dataset('08_datasets', BATCH_SIZE, False)

In [52]:
# Fashion MNISt
train_loader = load_fashion_mnist_dataset('08_datasets', BATCH_SIZE, True)  # 저장경로, batch크기, Trainset여부
test_loader = load_fashion_mnist_dataset('08_datasets', BATCH_SIZE, False)

In [53]:
train_loader.dataset

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: 08_datasets
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [54]:
test_loader.dataset

Dataset FashionMNIST
    Number of datapoints: 10000
    Root location: 08_datasets
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
           )

## CNN 모델 정의

In [55]:
# CNN -> Convolution Layer: filter 개수(out_channels로 설정) 뒤로 갈수록 크게 잡는다
#        Maxpooling layer를 이용해서 출력 결과(Feature map)의 size(height, width)는 줄여나간다 (보통 절반씩 줄인다)

# conv block
## 1. Conv + ReLU + MaxPooling
## 2. Conv + BatchNorm + ReLU + MaxPooling
## 3. Conv + BatchNorm + ReLU + Dropout + MaxPooling

class MNISTCNNMODEL(nn.Module):
    def __init__(self):
        super().__init__()
        self.b1 = nn.Sequential(
            # Conv2d(): 3 x 3 필터, stride=1, padding=1 => same padding (입력 size와 출력 size가 동일)
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),  # channel을 기준으로 정규화 -> 입력 channel수 지정
            nn.ReLU(),
            nn.Dropout2d(p=0.3),
            nn.MaxPool2d(kernel_size=2, stride=2)
            # kernel_size와 stride가 같을 경우에는 stride 생략
            # MaxPool2d() 에서도 padding 지정
        )
        self.b2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding='same'),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout2d(p=0.3),
            nn.MaxPool2d(kernel_size=2)
        )
        self.b3 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding='same'),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout2d(p=0.3),
            nn.MaxPool2d(kernel_size=2, padding=1)  # 입려력: 7 x 7 => 1/2 줄이면 -> 3.5 -> 0.5를 살리기 위해 padding 지정
        )
        
        # 결과출력 LAYER => linear() 사용
        self.output_block = nn.Sequential(
            # MaxPool2d() 출력결과 입력으로 받음 => 4차원 (batch, ch, h, w)
            # 3차원 -> 1차원
            nn.Flatten(),
            nn.Linear(in_features=128*4*4, out_features=512),
            nn.ReLU(),
            nn.Dropout(p=0.3),
            nn.Linear(512, 10)  # out => 클래스 개수
        )
        
        
    def forward(self, X):
        out = self.b1(X)
        out = self.b2(out)
        out = self.b3(out)
        out = self.output_block(out)
        
        return out

In [56]:
model = MNISTCNNMODEL()
summary(model, (1, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
MNISTCNNMODEL                            [1, 10]                   --
├─Sequential: 1-1                        [1, 32, 14, 14]           --
│    └─Conv2d: 2-1                       [1, 32, 28, 28]           320
│    └─BatchNorm2d: 2-2                  [1, 32, 28, 28]           64
│    └─ReLU: 2-3                         [1, 32, 28, 28]           --
│    └─Dropout2d: 2-4                    [1, 32, 28, 28]           --
│    └─MaxPool2d: 2-5                    [1, 32, 14, 14]           --
├─Sequential: 1-2                        [1, 64, 7, 7]             --
│    └─Conv2d: 2-6                       [1, 64, 14, 14]           18,496
│    └─BatchNorm2d: 2-7                  [1, 64, 14, 14]           128
│    └─ReLU: 2-8                         [1, 64, 14, 14]           --
│    └─Dropout2d: 2-9                    [1, 64, 14, 14]           --
│    └─MaxPool2d: 2-10                   [1, 64, 7, 7]             --
├─Sequent

## Train

In [57]:
# Fashion MNIST
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
device = 'cpu'

result = fit(train_loader, test_loader, model, loss_fn, optimizer, N_EPOCH,
             save_best_model=False, early_stopping=True, device=device, mode='multi')

Epoch[1/1] - Train loss: 0.33341 Train Accucracy: 0.87657 || Validation Loss: 0.35633 Validation Accuracy: 0.86810
116.07930612564087 초
