참고한 사이트
- https://github.com/dansuh17/alexnet-pytorch/blob/master/model.py
- https://ingu627.github.io/code/alexnet_pytorch/

In [12]:
import numpy as np
import pandas as pd

from torchvision import datasets
from torchvision import transforms

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [13]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [14]:
# 데이터 다운로드
train_dataset = datasets.FashionMNIST(root='./data', download=True, transform=transforms.Compose([transforms.Resize((256, 256)), # image size = 256x256
                                                                                                  transforms.RandomCrop(227, 227), # input image size = 227x227
                                                                                                 transforms.ToTensor()]))
test_dataset = datasets.FashionMNIST(root='./data', download=True, train=False, transform=transforms.Compose([transforms.Resize((256,256)),
                                                                                                             transforms.ToTensor()]))

In [15]:
type(train_dataset[0])

tuple

In [29]:
(train_dataset[0][0].shape)

# 1번째 인자 - tensor

torch.Size([1, 227, 227])

In [None]:
# 데이터 로더
batch_size=16
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

## 모델 생성
- Feature extraction을 거친 후 out_size 계산
    
    <image src="https://drive.google.com/uc?id=1upDstrlLWjurddlbs5hzBTf3TvoIUxk2&export=download" width="300"/>
- padding 계산(아래 사진 참고)
    - Convolution layer 1,2 후에는 MaxPooling layer가 있음 -> MaxPooling layer를 생각하면, padding은 논문에 나와있지 않지만, 역으로 계산 가능하다. 

    <image src="https://drive.google.com/uc?id=199Lq3fl_EcueOo0PxpocRkWyVRXH2i2I&export=download" width="500"/>

- MaxPooling layer : kernel_size=3, stride=2 / 본문 3.4에 써있음
- 마지막 Convolution layer 후에 Maxpooling을 한 번 더 하는데, 그 결과로 나오는 Feature map 크기는 6x6 => 따라서 256 * 6 * 6으로 flatten 


In [None]:
class Alexnet_FashionMNIST(nn.Module):
    def __init__(self, num_class = 10):
        super().__init__()

        # Convolution Layer
        self.net = nn.Sequential(
            # conv layer 1 / Fashion MNIST는 흑백 이미지이기 때문에 chennel=1
            nn.Conv2d(in_channels=1, out_channels=96, kernel_size=11,stride=4, padding=0), 
            nn.ReLU(),
             nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2), # 논문 3.3 참고
            nn.MaxPool2d(kernel_size=3, stride=2), # 논문 3.4 참고

            # conv layer 2
            nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2), # padding값이 왜 2가 되는지는 위의 사진 참고
            nn.ReLU(),
            nn.LocalResponseNorm(size=5, alpha=0.0001, beta=0.75, k=2),
            nn.MaxPool2d(kernel_size=3, stride=2),

            # conv layer 3
            nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),

            # conv layer 4
            nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            
            # conv layer 5
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2)
            )
        
        # Fully connected layer
        self.classifier = nn.Sequential(
            # fc layer 1
            nn.Dropout(p=0.5, inplace=True), # dropout 확률 = 50%
            nn.Linear(in_features=256*6*6, out_features=4096),
            nn.ReLU(),

            # fc layer 2
            nn.Dropout(p=0.5, inplace=True),
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(),

            # predict label
            nn.Linear(in_features=4096, out_features=10)
        )
    

    def forward(self, x): # 순전파
        out = self.net(x)
        out = out.view(-1, 256*6*6) # flatten
        out = self.classifier(out)
        return out



In [None]:
# 모델 객체 생성 - GPU 사용
model = Alexnet_FashionMNIST().to(device) 

In [None]:
# ! pip install torchsummary

In [None]:
from torchsummary import summary

In [None]:
# 모델 정보 확인
summary(model=model,
        input_size=(1, 227, 227),
        batch_size=batch_size)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [16, 96, 55, 55]          11,712
              ReLU-2           [16, 96, 55, 55]               0
 LocalResponseNorm-3           [16, 96, 55, 55]               0
         MaxPool2d-4           [16, 96, 27, 27]               0
            Conv2d-5          [16, 256, 27, 27]         614,656
              ReLU-6          [16, 256, 27, 27]               0
 LocalResponseNorm-7          [16, 256, 27, 27]               0
         MaxPool2d-8          [16, 256, 13, 13]               0
            Conv2d-9          [16, 384, 13, 13]         885,120
             ReLU-10          [16, 384, 13, 13]               0
           Conv2d-11          [16, 384, 13, 13]       1,327,488
             ReLU-12          [16, 384, 13, 13]               0
           Conv2d-13          [16, 256, 13, 13]         884,992
             ReLU-14          [16, 256,

In [None]:
# 학습에 필요한 정보 지정
loss = nn.CrossEntropyLoss()
optim = optim.Adam(model.parameters()) # 옵티마이저 설정 시 업데이트 할 파라미터들을 인자로 받음