# ResNet

In [2]:
import torch
from torch import Tensor
import torch.nn as nn
import torch.nn.functional as F

In [3]:
class BasicBlock(nn.Module):
    expansion_factor = 1
    def __init__(self, in_channels: int, out_channels: int, stride: int = 1):
        # 여기에 코드를 작성해주세요
        super(BasicBlock, self).__init__()
        self.conv1= nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1= nn.BatchNorm2d(out_channels)
        self.conv2= nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2= nn.BatchNorm2d(out_channels)

        #Shortcut (stride가 1이 아니거나 채널 다를 때 맞춰줌)
        self.shortcut= nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut= nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )


    def forward(self, x: Tensor) -> Tensor:
        residual=x # 잔차
        x= F.relu(self.bn1(self.conv1(x)))
        x= self.bn2(self.conv2(x))
        x += self.shortcut(residual) # 지름길 더하기
        x= F.relu(x)

        # 여기에 코드를 작성해주세요
        return x


In [4]:
class BottleNeck(nn.Module):
    expansion_factor = 4 # 숫자를 작성해주세요
    def __init__(self, in_channels: int, out_channels: int, stride: int = 1):
        # 여기에 코드를 작성해주세요
        super(BottleNeck, self).__init__()
        mid_channels= out_channels
        self.conv1= nn.Conv2d(in_channels, mid_channels, kernel_size=1, stride=1, bias=False)
        self.bn1= nn.BatchNorm2d(mid_channels)
        self.conv2= nn.Conv2d(mid_channels, mid_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn2= nn.BatchNorm2d(mid_channels)
        self.conv3= nn.Conv2d(mid_channels, out_channels*self.expansion_factor, kernel_size=1, stride=1, bias=False)
        self.bn3= nn.BatchNorm2d(out_channels*self.expansion_factor)

        self.shortcut= nn.Sequential()
        if stride != 1 or in_channels != out_channels*self.expansion_factor:
            self.shortcut= nn.Sequential(
                nn.Conv2d(in_channels, out_channels*self.expansion_factor, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*self.expansion_factor)
            )

    def forward(self, x:Tensor) -> Tensor:
        residual= x
        x= F.relu(self.bn1(self.conv1(x)))
        x= F.relu(self.bn2(self.conv2(x)))
        x= self.bn3(self.conv3(x))
        x += self.shortcut(residual)
        x= F.relu(x)
        # 여기에 코드를 작성해주세요
        return x


In [5]:
class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        # 여기에 코드를 작성해주세요
        super(ResNet, self).__init__()
        self.in_channels=64

        #초기 layer
        self.conv1= nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1= nn.BatchNorm2d(64)
        self.maxpool= nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        #ResNet의 4개 stage
        self.layer1= self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2= self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3= self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4= self._make_layer(block, 512, num_blocks[3], stride=2)

        self.avgpool= nn.AdaptiveAvgPool2d((1, 1))
        self.fc= nn.Linear(512*block.expansion_factor, num_classes)
        self._init_layer()

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion_factor
        return nn.Sequential(*layers)

    def _init_layer(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def forward(self, x: Tensor) -> Tensor:
        x= F.relu(self.bn1(self.conv1(x)))
        x= self.maxpool(x)

        x= self.layer1(x)
        x= self.layer2(x)
        x= self.layer3(x)
        x= self.layer4(x)
        x= self.avgpool(x)
        x= torch.flatten(x, 1)
        x= self.fc(x)

        # 여기에 코드를 작성해주세요
        return x


In [6]:
class Model:
    def resnet18(self):
        res18= ResNet(BasicBlock, [2, 2, 2, 2])

        return res18
        # 여기에 코드를 작성해주세요

    def resnet34(self):
        res34= ResNet(BasicBlock, [3, 4, 6, 3])

        return res34
        # 여기에 코드를 작성해주세요

    def resnet50(self):
        res50= ResNet(BottleNeck, [3, 4, 6, 3])

        return res50
        # 여기에 코드를 작성해주세요

    def resnet101(self):
        res101= ResNet(BottleNeck, [3, 4, 23, 3])

        return res101
        # 여기에 코드를 작성해주세요

    def resnet152(self):
        res152= ResNet(BottleNeck, [3, 8, 36, 3])
        return res152
        # 여기에 코드를 작성해주세요


In [7]:
model = Model().resnet152()
y = model(torch.randn(1, 3, 224, 224))
print(y.size())


torch.Size([1, 10])


In [8]:
import os
import torch
import torch.nn as nn
import numpy as np
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# hyper-parameters
learning_rate = 0.01
weight_decay = 1e-4
momentum = 0.9
batch_size = 128
num_epochs = 1

In [9]:
def do_transform(train_mean, train_std, test_mean, test_std):
    train_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(train_mean, train_std),
    ])

    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize(224),
        transforms.Normalize(test_mean, test_std),
    ])

    return train_transform, test_transform

def do_mean_std(train_data, test_data):
    train_mean_rgb = [np.mean(x.numpy(), axis=(1,2)) for x, _ in train_data]
    train_std_rgb  = [np.std(x.numpy(), axis=(1,2)) for x, _ in train_data]

    test_mean_rgb = [np.mean(x.numpy(), axis=(1,2)) for x, _ in test_data]
    test_std_rgb  = [np.std(x.numpy(), axis=(1,2)) for x, _ in test_data]

    train_mean = np.mean(train_mean_rgb, axis=0).tolist()
    train_std  = np.mean(train_std_rgb, axis=0).tolist()
    test_mean  = np.mean(test_mean_rgb, axis=0).tolist()
    test_std   = np.mean(test_std_rgb, axis=0).tolist()

    return train_mean, train_std, test_mean, test_std


In [10]:
def get_dataloader(train_data, test_data):
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    # 여기에 코드를 작성해주세요
    test_loader  = DataLoader(test_data, batch_size=batch_size, shuffle=False)
    # 여기에 코드를 작성해주세요
    return train_loader, test_loader

In [11]:
train_data = datasets.STL10(
    root='./data',
    split='train',
    download=True,
    transform=transforms.ToTensor()
)

test_data = datasets.STL10(
    root='./data',
    split='test',
    download=True,
    transform=transforms.ToTensor()
)


100%|██████████| 2.64G/2.64G [02:46<00:00, 15.8MB/s]


In [12]:
train_mean, train_std, test_mean, test_std = do_mean_std(train_data, test_data)
train_transform, test_transform = do_transform(train_mean, train_std, test_mean, test_std)

train_data.transform = train_transform
test_data.transform  = test_transform

train_loader, test_loader = get_dataloader(train_data, test_data)

In [13]:
np.random.seed(123)
torch.manual_seed(123)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [14]:
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

model = Model().resnet152().to(device)
# 여기에 코드를 작성해주세요
criterion = nn.CrossEntropyLoss()
# 여기에 코드를 작성해주세요

optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
# 여기에 코드를 작성해주세요

scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)
# 여기에 코드를 작성해주세요


In [16]:
for epoch in range(num_epochs):
    # -------- Train --------
    model.train()
    correct, count = 0, 0
    train_loss = 0.0

    for step, (images, labels) in enumerate(train_loader, start=1):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        train_loss= criterion(model(images), labels)
        train_loss.backward()
        optimizer.step()

        count += len(labels)
        correct += (model(images).argmax(dim=1) == labels).sum().item()

        # 여기에 코드를 작성해주세요

        print(
            f"[Train] Epoch {epoch} "
            f"Step {step}/{len(train_loader)} "
            f"Acc {(correct/count)*100:.2f}% "
            f"Loss {(train_loss/count):.4f}"
        )

    # -------- Validation --------
    model.eval()
    correct, count = 0, 0
    valid_loss = 0.0

    with torch.no_grad():
        for step, (images, labels) in enumerate(test_loader, start=1):
            images, labels = images.to(device), labels.to(device)
            # 여기에 코드를 작성해주세요

            outputs=model(images)

            loss= criterion(outputs, labels)
            valid_loss += loss.item()

            count+= labels.size(0)
            correct += (outputs.argmax(dim=1) == labels).sum().item()


            print(
                f"[Valid] Step {step}/{len(test_loader)} "
                f"Acc {(correct/count)*100:.2f}% "
                f"Loss {(valid_loss/count):.4f}"
            )

    scheduler.step(valid_loss)

[Train] Epoch 0 Step 1/40 Acc 20.31% Loss 0.0188
[Train] Epoch 0 Step 2/40 Acc 18.75% Loss 0.0098
[Train] Epoch 0 Step 3/40 Acc 17.19% Loss 0.0062
[Train] Epoch 0 Step 4/40 Acc 16.21% Loss 0.0047
[Train] Epoch 0 Step 5/40 Acc 15.94% Loss 0.0038
[Train] Epoch 0 Step 6/40 Acc 15.23% Loss 0.0029
[Train] Epoch 0 Step 7/40 Acc 14.96% Loss 0.0027
[Train] Epoch 0 Step 8/40 Acc 14.65% Loss 0.0022
[Train] Epoch 0 Step 9/40 Acc 14.06% Loss 0.0020
[Train] Epoch 0 Step 10/40 Acc 13.91% Loss 0.0019
[Train] Epoch 0 Step 11/40 Acc 14.28% Loss 0.0016
[Train] Epoch 0 Step 12/40 Acc 13.93% Loss 0.0018
[Train] Epoch 0 Step 13/40 Acc 13.88% Loss 0.0016
[Train] Epoch 0 Step 14/40 Acc 13.95% Loss 0.0015
[Train] Epoch 0 Step 15/40 Acc 14.17% Loss 0.0011
[Train] Epoch 0 Step 16/40 Acc 14.16% Loss 0.0013
[Train] Epoch 0 Step 17/40 Acc 14.34% Loss 0.0010
[Train] Epoch 0 Step 18/40 Acc 14.41% Loss 0.0010
[Train] Epoch 0 Step 19/40 Acc 14.47% Loss 0.0009
[Train] Epoch 0 Step 20/40 Acc 14.57% Loss 0.0009
[Train] E