In [95]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torchvision import models

In [96]:
# CIFAR-10 데이터셋의 변환 설정
transform = transforms.Compose(
    [transforms.Resize((224, 224)),  # VGG16은 224x224 입력을 기대합니다.
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))])

# 학습 및 테스트 데이터셋 로드
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32,
                                        shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                    download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32,
                                        shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


# 모듈 구현

In [97]:
# BasicConv2d 구현
#   Inception 모듈로 들어가기 전 까지

class BasicConv2d(nn.Module):

    def __init__(self, in_channels, out_channels, **kwargs):
        super().__init__()

        self.conv_block = nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=out_channels,
                bias = False,
                **kwargs # **{ a : 1, b : 2} -> a=1, b=2
                ),
            nn.BatchNorm2d(
                out_channels, 
                eps=0.001
            ),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.conv_block(x)
        return x

In [98]:
# Inception 모듈 구현
class Inception(nn.Module):

  def __init__(self, in_channels, # 공통적으로 필요한 입력 특징 맵의 채널
                     ch1x1,  # 1x1 Conv의 필터 개수
                     ch3x3_bottleneck, # 3x3 Conv를 하기 전의 BottleNeck 채널 수(필터 개수)
                     ch3x3, # 3x3 Conv의 필터 개수
                     ch5x5_bottleneck, # 5x5 Conv를 하기 전의 BottleNeck 채널 수(필터 개수)
                     ch5x5, # 5x5 Conv의 필터 개수
                     pool_proj # MaxPooling의 결과물을 1x1 Conv할 때의 채널 수
               ):
    super().__init__()

    self.branch1 = BasicConv2d(in_channels, ch1x1, kernel_size=1)

    self.branch2 = nn.Sequential(
        BasicConv2d(in_channels, ch3x3_bottleneck, kernel_size=1),
        BasicConv2d(ch3x3_bottleneck, ch3x3, kernel_size=3, padding=1)
    )

    self.branch3 = nn.Sequential(
        BasicConv2d(in_channels, ch5x5_bottleneck, kernel_size=1),
        BasicConv2d(ch5x5_bottleneck, ch5x5, kernel_size=5, padding=2)
    )

    self.branch4 = nn.Sequential(
        nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
        BasicConv2d(in_channels, pool_proj, kernel_size=1)
    )

  def forward(self, x):
    branch1_out = self.branch1(x) #(N, ch1x1, H_out, W_out)
    branch2_out = self.branch2(x) #(N, ch3x3, H_out, W_out)
    branch3_out = self.branch3(x) #(N, ch4x4, H_out, W_out)
    branch4_out = self.branch4(x) #(N, pool_proj, H_out, W_out)

    # dim=1로 설정해서 채널 방향으로 Output Feature Map을 이어준다.
    outputs = torch.cat([branch1_out, branch2_out, branch3_out, branch4_out], dim=1)

    return outputs

In [99]:
# 보조 분류기 (Aux Classifier)

class InceptionAux(nn.Module):

    def __init__(self, in_channels, num_classes, dropout_p = 0.7):
        super().__init__()

        # Feature Extraction

        self.avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
        self.conv = BasicConv2d(in_channels=in_channels, out_channels=128, kernel_size=1)

        # Fully Conneceted Layer
    
        # Hidden Layer
        self_fc1 = nn.Linear(2048, 1024)
        self_activation = nn.ReLU()
        self.dropout = nn.Dropout(p=dropout_p)

        # Output Layer
        self.fc2 = nn.Linear(1024, num_classes)

    def forward(self, x):

        # aux1 : N x 512 x 14 x 14
        # aux2 : N x 528 x 14 x 14
        
        x = self.avgpool(x)
        # -> aux1 : N x 512 x 4 x 4
        # -> aux2 : N x 528 x 4 x 4

        x = self.conv(x)
        # -> N x 128 x 4 x 4
        
        x = torch.flatten(x, 1)
        # -> N x 2048

        x = self.fc1(x)
        x = self.activation(x)
        x = self.dropout(x)
        # -> N x 1024

        y = self.fc2(x)

        # N x num_classes

        return y

# 모델 구현

In [100]:
class Inception_V1(nn.Module):

    def __init__(self, num_classes=1000, use_aux=True, init_weights=None, drop_p=0.4, drop_p_aux=0.7):
        super().__init__()

        self.use_aux = use_aux

        # Inception Module에 들어가기 전 작업
        self.conv1 = BasicConv2d(in_channels=3, out_channels=64, kernel_size=7, stride=2, padding=3)
        self.maxpool1 = nn.MaxPool2d(3, stride=2, padding=1)

        self.conv2_bottleneck = BasicConv2d(64, 64, kernel_size=1)
        self.conv2 = BasicConv2d(64, 192, kernel_size=3, padding=1)
        self.maxpool2 = nn.MaxPool2d(3, stride=2, padding=1)

        # Incpetion 정의
        self.inception_3a = Inception(192, 64, 96, 128, 16, 32, 32)
        self.inception_3b = Inception(256, 128, 128, 192, 32, 96, 64)
        self.maxpool3 = nn.MaxPool2d(3, stride=2, padding=1)

        self.inception_4a = Inception(480, 192, 96, 208, 16, 48, 64)
        self.inception_4b = Inception(512, 160, 112, 224, 24, 64, 64)
        self.inception_4c = Inception(512, 128, 128, 256, 24, 64, 64)
        self.inception_4d = Inception(512, 112, 144, 288, 32, 64, 64)
        self.inception_4e = Inception(528, 256, 160, 320, 32, 128, 128)

        self.maxpool4 = nn.MaxPool2d(3, stride=2, padding=1)
        self.inception_5a = Inception(832, 256, 160, 320, 32, 128, 128)
        self.inception_5b = Inception(832, 384, 192, 384, 48, 128, 128)

        # Aux Classifier
        if use_aux:
            self.aux1 = InceptionAux(512, num_classes, dropout_p=drop_p_aux)
            self.aux2 = InceptionAux(528, num_classes, dropout_p=drop_p_aux)
        else:
            self.aux1 = None
            self.aux2 = None

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # Global Average Pooling. 평탄화를 위해
        self.dropout = nn.Dropout(p=drop_p)
        self.fc = nn.Linear(1024, num_classes)

    def forward(self, x):
        # N x 3 x 224 x 224
        x = self.conv1(x)
        # N x 64 x 112 x 112
        x = self.maxpool1(x)
        # N x 64 x 56 x 56
        x = self.conv2_bottleneck(x)
        # N x 64 x 56 x 56
        x = self.conv2(x)
        # N x 192 x 56 x 56
        x = self.maxpool2(x)

        # N x 192 x 28 x 28
        x = self.inception_3a(x)
        # N x 256 x 28 x 28
        x = self.inception_3b(x)
        # N x 480 x 28 x 28
        x = self.maxpool3(x)
        # N x 480 x 14 x 14
        x = self.inception_4a(x)
        # N x 512 x 14 x 14
        if self.aux1 is not None and self.training:
            aux1 = self.aux1(x)
        else:
            aux1 = None  # 뭐라도 넣어놔야 not defined error 안 뜸

        x = self.inception_4b(x)
        # N x 512 x 14 x 14
        x = self.inception_4c(x)
        # N x 512 x 14 x 14
        x = self.inception_4d(x)
        # N x 528 x 14 x 14
        if self.aux2 is not None and self.training:
            aux2 = self.aux2(x)
        else:
            aux2 = None

        x = self.inception_4e(x)
        # N x 832 x 14 x 14
        x = self.maxpool4(x)
        # N x 832 x 7 x 7
        x = self.inception_5a(x)
        # N x 832 x 7 x 7
        x = self.inception_5b(x)
        # N x 1024 x 7 x 7

        x = self.avgpool(x)
        # N x 1024 x 1 x 1
        x = torch.flatten(x, 1)
        # N x 1024
        x = self.dropout(x)
        x = self.fc(x)
        # N x 1000 (num_classes)

        return x, aux2, aux1

In [101]:
model = Inception_V1()

In [102]:
!pip install torchinfo



In [103]:
from torchinfo import summary
summary(model, input_size=(2, 3, 224, 224))

Layer (type:depth-idx)                        Output Shape              Param #
Inception_V1                                  [2, 1000]                 2,183,632
├─BasicConv2d: 1-1                            [2, 64, 112, 112]         --
│    └─Sequential: 2-1                        [2, 64, 112, 112]         --
│    │    └─Conv2d: 3-1                       [2, 64, 112, 112]         9,408
│    │    └─BatchNorm2d: 3-2                  [2, 64, 112, 112]         128
│    │    └─ReLU: 3-3                         [2, 64, 112, 112]         --
├─MaxPool2d: 1-2                              [2, 64, 56, 56]           --
├─BasicConv2d: 1-3                            [2, 64, 56, 56]           --
│    └─Sequential: 2-2                        [2, 64, 56, 56]           --
│    │    └─Conv2d: 3-4                       [2, 64, 56, 56]           4,096
│    │    └─BatchNorm2d: 3-5                  [2, 64, 56, 56]           128
│    │    └─ReLU: 3-6                         [2, 64, 56, 56]           --
├─Bas