필요한 도구를 임포트하자.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm.auto import tqdm

초기 transform를 설정하고 데이터를 불러오자.

In [None]:
transform = transforms.Compose([
                                transforms.ToTensor()
])

In [None]:
train_data = torchvision.datasets.CIFAR10('./data', train=True, transform=transform, download=True)
test_data = torchvision.datasets.CIFAR10('./data', train=False, transform=transform, download=True)

Files already downloaded and verified
Files already downloaded and verified


불러온 데이터를 확인해보자.

In [None]:
print(f'train 데이터 개수: {len(train_data)}')
print(f'test 데이터 개수: {len(test_data)}')

train 데이터 개수: 50000
test 데이터 개수: 10000


In [None]:
print('이미지 구성')
print(train_data[0][0].size())
print(train_data[0][0])

이미지 구성
torch.Size([3, 32, 32])
tensor([[[0.2314, 0.1686, 0.1961,  ..., 0.6196, 0.5961, 0.5804],
         [0.0627, 0.0000, 0.0706,  ..., 0.4824, 0.4667, 0.4784],
         [0.0980, 0.0627, 0.1922,  ..., 0.4627, 0.4706, 0.4275],
         ...,
         [0.8157, 0.7882, 0.7765,  ..., 0.6275, 0.2196, 0.2078],
         [0.7059, 0.6784, 0.7294,  ..., 0.7216, 0.3804, 0.3255],
         [0.6941, 0.6588, 0.7020,  ..., 0.8471, 0.5922, 0.4824]],

        [[0.2431, 0.1804, 0.1882,  ..., 0.5176, 0.4902, 0.4863],
         [0.0784, 0.0000, 0.0314,  ..., 0.3451, 0.3255, 0.3412],
         [0.0941, 0.0275, 0.1059,  ..., 0.3294, 0.3294, 0.2863],
         ...,
         [0.6667, 0.6000, 0.6314,  ..., 0.5216, 0.1216, 0.1333],
         [0.5451, 0.4824, 0.5647,  ..., 0.5804, 0.2431, 0.2078],
         [0.5647, 0.5059, 0.5569,  ..., 0.7216, 0.4627, 0.3608]],

        [[0.2471, 0.1765, 0.1686,  ..., 0.4235, 0.4000, 0.4039],
         [0.0784, 0.0000, 0.0000,  ..., 0.2157, 0.1961, 0.2235],
         [0.0824, 0.0000, 0

이미지 데이터를 normalize하기 위해 train 데이터의 이미지에 대한 RGB 평균을 구해보자.

In [None]:
train_meanRGB = pd.DataFrame([np.mean(image.numpy(), axis=(1, 2)) for image, _ in train_data], columns=['R', 'G', 'B'])
train_stdRGB = pd.DataFrame([np.std(image.numpy(), axis=(1, 2)) for image, _ in train_data], columns=['R', 'G', 'B'])

In [None]:
train_meanRGB.head()

Unnamed: 0,R,G,B
0,0.553745,0.412155,0.251126
1,0.510585,0.511236,0.511677
2,0.523468,0.530009,0.519941
3,0.392076,0.326585,0.225153
4,0.360869,0.40157,0.454913


In [None]:
train_meanR = np.mean(train_meanRGB['R'])
train_meanG = np.mean(train_meanRGB['G'])
train_meanB = np.mean(train_meanRGB['B'])
train_stdR = np.mean(train_stdRGB['R'])
train_stdG = np.mean(train_stdRGB['G'])
train_stdB = np.mean(train_stdRGB['B'])

새로운 transform을 정의하고 train, test의 transform으로 지정해주자

In [None]:
train_transform = transforms.Compose([
                                      transforms.ToTensor(),
                                      transforms.Resize(224),
                                      transforms.Normalize([train_meanR, train_meanG, train_meanB],[train_stdR, train_stdG, train_stdB]),
                                      transforms.RandomHorizontalFlip()
])

test_transform = transforms.Compose([
                                      transforms.ToTensor(),
                                      transforms.Resize(224),
                                      transforms.Normalize([train_meanR, train_meanG, train_meanB],[train_stdR, train_stdG, train_stdB]),
])

In [None]:
train_data.transforms = train_transform
test_data.transforms = test_transform

DataLoader를 생성하자.

In [None]:
train_loader = DataLoader(train_data, batch_size = 128, shuffle=True)
test_loader = DataLoader(test_data, batch_size = 128, shuffle=True)

아래는 ResNet18, 34를 구성하는데 사용되는 block이다.

In [None]:
class BasicBlock(nn.Module):
    expansion = 1
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
          nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
          nn.BatchNorm2d(out_channels),
          nn.ReLU(out_channels),
          nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False),
          nn.BatchNorm2d(out_channels)
        )

        self.shortcut = nn.Sequential()
        self.relu = nn.ReLU()

        if stride != 1:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )
    
    def forward(self, x):
        out = self.residual_function(x)
        out += self.shortcut(x)
        out = self.relu(out)
        return out

아래는 50, 101, 152 등 더 깊은 구조에 사용되는 BottleNeck block을 정의한 코드다.

In [None]:
class BottleNeck(nn.Module):
    expansion = 4
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        self.residual_function = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels * BottleNeck.expansion, kernel_size=1, stride=1, bias=False),
            nn.BatchNorm2d(out_channels * BottleNeck.expansion),
        )

        self.shortcut = nn.Sequential()

        self.relu = nn.ReLU()

        if stride != 1 or in_channels != out_channels * BottleNeck.expansion:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels*BottleNeck.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels*BottleNeck.expansion)
            )
            
    def forward(self, x):
        x = self.residual_function(x) + self.shortcut(x)
        x = self.relu(x)
        return x

위에서 정의한 두 개의 block을 사용해 구현한 ResNet 코드다.

In [None]:
class ResNet(nn.Module):
    def __init__(self, block_class, num_blocks, num_classes=10, init_weights=True):
        super(ResNet, self).__init__()
        self.in_channels = 64
        
        self.conv0 = nn.Sequential(
            nn.Conv2d(3, self.in_channels, kernel_size=7, stride=2, padding = 3),
            nn.BatchNorm2d(self.in_channels),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )

        self.layer1 = self.make_layer(block_class, 64, num_blocks[0], stride=1)
        self.layer2 = self.make_layer(block_class, 128, num_blocks[1], stride=2)
        self.layer3 = self.make_layer(block_class, 256, num_blocks[2], stride=2)
        self.layer4 = self.make_layer(block_class, 512, num_blocks[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.linear = nn.Linear(512 * block_class.expansion, num_classes)
        
        if init_weights: self.initialize_weights()
    
    def make_layer(self, block_class, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks-1)
        layers = []
        for i in range(num_blocks):
            layers.append(block_class(self.in_channels, out_channels, strides[i]))
            self.in_channels = block_class.expansion * out_channels
        return nn.Sequential(*layers)
    
    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
                if m.bias is not None: nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

    def forward(self, x):
        out = self.conv0(x)
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

ResNet 모델은 다음과 같이 함수를 이용해 정의할 수 있다.

In [None]:
def resnet18():
    return ResNet(BasicBlock, [2,2,2,2])

def resnet34():
    return ResNet(BasicBlock, [3,4,6,3])

def resnet50():
    return ResNet(BottleNeck, [3,4,6,3])

def resnet101():
    return ResNet(BottleNeck, [3,4,23,3])

def resnet152():
    return ResNet(BottleNeck, [3,8,36,3])

이제 resnet50 모델을 사용해 CIFAR10을 학습시켜보자.

먼저 GPU를 설정해주자.

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
torch.manual_seed(42)
if device == 'cuda':
  torch.cuda.manual_seed(42)
  torch.cuda.manual_seed_all(42)

모델을 선언하고 모델의 conv구조를 확인해보자.

In [None]:
model = resnet18().to(device)

In [None]:
for i in model.modules():
  if isinstance(i, nn.Conv2d):
    print(i)

Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2), bias=False)
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2), bias=False)
Conv2d(256, 256, 

optimizer와 criterion을 정의하자.

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

모델을 학습시켜보자.

In [None]:
model.train()
epochs = 20
total_image = len(train_data)
total_batch = len(train_loader)
for epoch in tqdm(range(epochs)):
    accuracy = 0; avg_cost = 0
    for x, y in train_loader:
        x = x.to(device)
        y = y.to(device)

        hypothesis = model(x)
        cost = criterion(hypothesis, y)

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        _, prediction = torch.max(hypothesis, 1)
        correct_prediction = (prediction == y)
        accuracy += correct_prediction.sum() / len(train_data)
        avg_cost += cost/total_batch
  
    print('[Epoch: {:>4}] cost = {:>.9} / accuracy = {:>.9}'.format(epoch + 1, avg_cost, accuracy.item()))

  0%|          | 0/20 [00:00<?, ?it/s]

[Epoch:    1] cost = 1.3344568 / accuracy = 0.519079924
[Epoch:    2] cost = 0.974424124 / accuracy = 0.655119896
[Epoch:    3] cost = 0.799245358 / accuracy = 0.720419884
[Epoch:    4] cost = 0.670898616 / accuracy = 0.764020324
[Epoch:    5] cost = 0.570317447 / accuracy = 0.80241996
[Epoch:    6] cost = 0.476588219 / accuracy = 0.832140565
[Epoch:    7] cost = 0.38901794 / accuracy = 0.863079607
[Epoch:    8] cost = 0.320282996 / accuracy = 0.886760414
[Epoch:    9] cost = 0.254639179 / accuracy = 0.909620464
[Epoch:   10] cost = 0.202904463 / accuracy = 0.927700341
[Epoch:   11] cost = 0.170034647 / accuracy = 0.939359963
[Epoch:   12] cost = 0.141937166 / accuracy = 0.951259553
[Epoch:   13] cost = 0.128294945 / accuracy = 0.955278993
[Epoch:   14] cost = 0.109717488 / accuracy = 0.961738944
[Epoch:   15] cost = 0.0996840298 / accuracy = 0.96439898
[Epoch:   16] cost = 0.0853468478 / accuracy = 0.970439136
[Epoch:   17] cost = 0.0887302458 / accuracy = 0.968378961
[Epoch:   18] co

모델의 성능을 확인해보자.

In [None]:
model.eval()

total_image = len(test_data)
with torch.no_grad():
    accuracy = 0
    for x, y in test_loader:
        x = x.to(device)
        y = y.to(device)

        _, prediction = torch.max(model(x), 1)
        correct_prediction = (prediction == y)
        accuracy += correct_prediction.sum() / total_image
    print('Accuracy:', accuracy.item())

Accuracy: 0.7408000826835632


resnet50의 경우 데이터에 비해 모델이 너무 복잡해 학습이 잘 되지 않았다. <br>
resnet 18은 epoch가 15회인 경우 train 데이터에 대해 loss는 0.096, accuracy는 0.96에 도달했지만 test 데이터에 대해서는 0.72의 정확도 밖에 내지 못했다. <br>
과적합 문제인가 해서 epoch 10회 수행한 결과 0.73의 정확도를 보였다. <br>
학습이 덜 됐다고 판단하여 20회를 수행해준 결과 0.74의 정확도를 보였고 epoch를 30까지 늘려보았다. <br>
하지만 20번 이후에는 loss및 accuracy값이 발산하는 모습을 보여 학습의 성과가 좋진 않았다.

참고 출처: [github.com/weiaicunzai/pytorch-cifar100/blob/master/models/resnet.py](https://github.com/weiaicunzai/pytorch-cifar100/blob/master/models/resnet.py)