In [None]:
import torch
import torch.nn as nn

# VGG implementation

In [None]:
class VGG16(nn.Module):
    """
    VGG16 아키텍처의 구현
    features와 classifier 부분으로 나누어져 있으며,
    features는 convolutional layers를, classifier는 fully connected layers를 포함합니다.
    """
    def __init__(self, num_classes=1000):
        super(VGG16, self).__init__()

        # features 부분: 컨볼루션 레이어들의 시퀀스
        # nn.Sequential을 사용하여 레이어들을 순서대로 묶습니다
        self.features = nn.Sequential(
            # Stage 1: 64채널, 2개의 컨볼루션 레이어
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True), # inplace = True - 메모리 효율
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Stage 2: 128채널, 2개의 컨볼루션 레이어
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Stage 3: 256채널, 3개의 컨볼루션 레이어
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Stage 4: 512채널, 3개의 컨볼루션 레이어
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            # Stage 5: 512채널, 3개의 컨볼루션 레이어
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # classifier 부분: 완전연결 레이어들의 시퀀스
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_classes)
        )

        # 가중치 초기화
        self._initialize_weights()

    def forward(self, x):
        # 특징 추출
        x = self.features(x)
        # 특징 맵을 1차원으로 평탄화
        x = x.view(x.size(0), -1)
        # 분류
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        """
        가중치 초기화
        conv : he 초기화,
        linear : 정규분포로 초기화
        """
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [None]:
model = VGG16()
list(model.features.children())

[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
 Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
 ReLU(inplace=True),
 Conv2d(512, 512, kernel_size=(3, 3), stride=(1

In [None]:
print("Features의 children 확인:")
for i, layer in enumerate(model.features.children()):
    print(f"Layer {i}: {layer}")

Features의 children 확인:
Layer 0: Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Layer 1: ReLU(inplace=True)
Layer 2: Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Layer 3: ReLU(inplace=True)
Layer 4: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Layer 5: Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Layer 6: ReLU(inplace=True)
Layer 7: Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Layer 8: ReLU(inplace=True)
Layer 9: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
Layer 10: Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Layer 11: ReLU(inplace=True)
Layer 12: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Layer 13: ReLU(inplace=True)
Layer 14: Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
Layer 15: ReLU(inplace=True)
Layer 16: MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_m

In [None]:
features = list(model.features.children())
enc1 = nn.Sequential(*features[:5])
enc1

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)

# FCN implementation

In [None]:
class FCN8s(nn.Module):
    """
    FCN-8s implementation for semantic segmentation
    """

    def __init__(self, num_classes):
        super(FCN8s, self).__init__()

        vgg = VGG16()
        features = list(vgg.features.children())

        # Encoder
        self.enc1 = nn.Sequential(*features[0:5])   # -> pool1
        self.enc2 = nn.Sequential(*features[5:10])   # -> pool2
        self.enc3 = nn.Sequential(*features[10:17])  # -> pool3
        self.enc4 = nn.Sequential(*features[17:24]) # -> pool4
        self.enc5 = nn.Sequential(*features[24:])   # -> pool5

        # fc6, fc7
        self.fc6 = nn.relu(nn.Conv2d(512, 4096, kernel_size=7))
        self.fc7 = nn.relu(nn.Conv2d(4096, 4096, kernel_size=1))

        # Final scoring layer
        self.score = nn.Conv2d(4096, num_classes, kernel_size=1)

        # additional layers for skip connection
        self.score_pool4 = nn.Conv2d(512, num_classes, kernel_size=1)
        self.score_pool3 = nn.Conv2d(256, num_classes, kernel_size=1)

        # deconvolution : 2배 업샘플링 - 정교한 업샘플
        self.upsample_2x = nn.ConvTranspose2d(
            num_classes,
            num_classes,
            kernel_size=4,
            stride=2,
            padding=1,
            bias=False
        )

        # interpolation : 8배 업샘플링 - 계산 효율
        self.upsample_8x = nn.Upsample(
            scale_factor=8,
            mode='bilinear',
            align_corners=True
        )

    def forward(self, x):
        pool3 = self.enc3(self.enc2(self.enc1(x)))
        pool4 = self.enc4(pool3)
        pool5 = self.enc5(pool4)

        conv6 = self.fc6(pool5)
        conv7 = self.fc7(conv6)

        score = self.score(conv7)

        # 첫 번째 2배 업샘플링 (ConvTranspose2d 사용)
        score2 = self.upsample_2x(score)
        score_pool4 = self.score_pool4(pool4)
        score2 = score2 + score_pool4

        # 두 번째 2배 업샘플링 (ConvTranspose2d 사용)
        score3 = self.upsample_2x(score2)
        score_pool3 = self.score_pool3(pool3)
        score4 = score3 + score_pool3

        # 마지막 8배 업샘플링 (interpolation 사용)
        out = self.upsample_8x(score4)

        return out