Bagic Block 정의

In [None]:
import torch
import torch.nn as nn

In [None]:
class BasicBlock(nn.Module):
  def __init__(self,in_channel,out_channel,hidden_dim) -> None:
    super(BasicBlock,self).__init__()
    # 합성곱층
    # 입력이미지가 3 x 32 x 32 인 컬러이미지 -> (3,32,32) ->(batchsize, 3, 32,32)
    self.conv1 = nn.Conv2d(in_channel,hidden_dim, kernel_size=3,padding=1)  # 입력과 출력의 크기를 동일
    # (batchsize, 3, 32,32) -> (batchsize, hidden_dim, 32,32)

    self.conv2 = nn.Conv2d(hidden_dim,out_channel, kernel_size=3,padding=1)  # 입력과 출력의 크기를 동일
    # (batchsize, hidden_dim, 32,32) -> (batchsize, out_channel, 32,32)

    self.relu = nn.ReLU()
    self.pool = nn.MaxPool2d(kernel_size=2,stride=2)  # 크기를 1/2로 줄임
    # (batchsize, out_channel, 32,32) - > (batchsize, out_channel, 16,16)
  def forward(self,x):
    # x = self.conv1(x)
    # x = self.relu(x)
    # x = self.conv2(x)
    # x = self.relu(x)
    x = self.relu(self.conv1(x))
    x = self.relu(self.conv2(x))
    x = self.pool(x)
    return x
# image size = W x H
# in_channel,out_channel,hidden_dim -> (batchsize, out_channel, W/2,H/2)

VGG 모델 정의 - CNN

In [None]:
class CNN(nn.Module):
  def __init__(self, num_class):
    super(CNN, self).__init__()
    # 기본블럭 정의
    #(3,32,32) 입력데이터 모양
    self.block1 = BasicBlock(3,32,16)  # (32,16,16)
    self.block2 = BasicBlock(32,128,64) # (128,8,8)
    self.block3 = BasicBlock(128,256,128) # (256,4,4)  256*4*4 --> 4096
    # 분류기 정의
    self.fc1 = nn.Linear(in_features = 256*4*4 , out_features=2048)
    self.fc2 = nn.Linear(in_features = 2048 , out_features=1024)
    self.fc3 = nn.Linear(in_features = 1024 , out_features=num_class)
    self.relu = nn.ReLU()

  def forward(self,x):
    x = self.block1(x)
    x = self.block2(x)
    x = self.block3(x)

    x = torch.flatten(x,start_dim=1)  # (batchsize, 256,4,4)

    x = self.relu(self.fc1(x))
    x = self.relu(self.fc2(x))

    x = self.fc3(x)
    return x

이미지 데이터 로드

In [None]:
from torch.utils.data.dataloader import DataLoader
from torchvision.datasets.cifar import CIFAR10
from torchvision.transforms import Compose,RandomCrop, RandomHorizontalFlip
from torchvision.transforms import ToTensor,RandomVerticalFlip,Resize,Normalize
from torch.optim.adam import Adam

In [None]:
# CNN_VGB.pptx 참고
transforms = Compose([
 RandomCrop((32,32),padding=4),
 RandomHorizontalFlip(p=0.5),
 RandomVerticalFlip(p=0.5),
 ToTensor(),
 Normalize(mean=(125.30691805, 122.95039414, 113.86538318),std=(62.99321928, 62.08870764, 66.70489964))
])

In [None]:
train_dataset = CIFAR10(root="./",train=True, download=True,transform=transforms)
test_dataset = CIFAR10(root="./",train=False, download=True,transform=transforms)
train_loader = DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=32,shuffle=False)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 45124917.57it/s]


Extracting ./cifar-10-python.tar.gz to ./
Files already downloaded and verified


In [None]:
rgb_mean = train_dataset.data.mean(axis=(0,1,2))
rgb_std = train_dataset.data.std(axis=(0,1,2))
train_dataset.data.shape, rgb_mean, rgb_std

((50000, 32, 32, 3),
 array([125.30691805, 122.95039414, 113.86538318]),
 array([62.99321928, 62.08870764, 66.70489964]))

In [None]:
train_dataset.data[ :, :, :, 0] / 255.0

array([[[0.23137255, 0.16862745, 0.19607843, ..., 0.61960784,
         0.59607843, 0.58039216],
        [0.0627451 , 0.        , 0.07058824, ..., 0.48235294,
         0.46666667, 0.47843137],
        [0.09803922, 0.0627451 , 0.19215686, ..., 0.4627451 ,
         0.47058824, 0.42745098],
        ...,
        [0.81568627, 0.78823529, 0.77647059, ..., 0.62745098,
         0.21960784, 0.20784314],
        [0.70588235, 0.67843137, 0.72941176, ..., 0.72156863,
         0.38039216, 0.3254902 ],
        [0.69411765, 0.65882353, 0.70196078, ..., 0.84705882,
         0.59215686, 0.48235294]],

       [[0.60392157, 0.49411765, 0.41176471, ..., 0.35686275,
         0.34117647, 0.30980392],
        [0.54901961, 0.56862745, 0.49019608, ..., 0.37647059,
         0.30196078, 0.27843137],
        [0.54901961, 0.54509804, 0.45098039, ..., 0.30980392,
         0.26666667, 0.2627451 ],
        ...,
        [0.68627451, 0.61176471, 0.60392157, ..., 0.16470588,
         0.23921569, 0.36470588],
        [0.6

In [None]:
import torch
device = "cuda" if torch.cuda.is_available() else 'cpu'

In [None]:
from tqdm import tqdm
model = CNN(10)
model.to(device)
lr = 1e-3
optim = Adam(model.parameters(), lr=lr)
for epoch in range(5):
  iterator = tqdm(train_loader)
  for data, label in iterator:
    optim.zero_grad()
    pred = model(data.to(device))
    loss = nn.CrossEntropyLoss()(pred,label.to(device))
    loss.backward()
    optim.step()
    iterator.set_description(f'epoch:{epoch+1}  loss:{loss.item()} ')

epoch:1  loss:2.300086498260498 : 100%|██████████| 1563/1563 [00:52<00:00, 29.75it/s]
epoch:2  loss:2.303483486175537 : 100%|██████████| 1563/1563 [00:52<00:00, 29.88it/s]
epoch:3  loss:2.301692247390747 : 100%|██████████| 1563/1563 [00:52<00:00, 30.04it/s]
epoch:4  loss:2.299988269805908 : 100%|██████████| 1563/1563 [00:53<00:00, 29.42it/s]
epoch:5  loss:2.2998297214508057 : 100%|██████████| 1563/1563 [00:49<00:00, 31.83it/s]


평가

In [None]:
num_corr = 0
with torch.no_grad():
  for data, label in test_loader:
    output = model(data.to(device))
    preds = output.data.max(1)[1]
    corr = preds.eq(label.to(device).data).sum().item()
    num_corr += corr
  print(f"accuracy : {num_corr / len(test_dataset)}")

accuracy : 0.0971


VGG 전이학습

In [None]:
import torch
import torch.nn as nn
from torchvision.models.vgg import vgg16,VGG16_Weights

model = vgg16(weights=VGG16_Weights.DEFAULT)
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
fc = nn.Sequential(
    nn.Linear(in_features=25088, out_features=4096, bias=True),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=4096, out_features=4096, bias=True),
    nn.ReLU(inplace=True),
    nn.Dropout(p=0.5, inplace=False),
    nn.Linear(in_features=4096, out_features=10, bias=True)
)
model.classifier = fc
model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [None]:
# 데이터 로드

transforms = Compose([
    Resize(224),
    RandomCrop((32,32),padding=4),
    RandomHorizontalFlip(p=0.5),
    ToTensor(),
    Normalize(mean=(0.4914,0.4822,0.4465),std=(0.247,0.243,0.261))
])

train_dataset = CIFAR10(root="./",train=True, download=True,transform=transforms)
test_dataset = CIFAR10(root="./",train=False, download=True,transform=transforms)
train_loader = DataLoader(train_dataset,batch_size=64,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=64,shuffle=False)


# 모델 학습
from tqdm import tqdm
lr = 1e-3
optim = Adam(model.parameters(), lr=lr)
for epoch in range(5):
  iterator = tqdm(train_loader)
  for data, label in iterator:
    optim.zero_grad()
    pred = model(data.to(device))
    loss = nn.CrossEntropyLoss()(pred,label.to(device))
    loss.backward()
    optim.step()

    iterator.set_description(f'epoch:{epoch+1}  loss:{loss.item()} ')

Files already downloaded and verified
Files already downloaded and verified


epoch:1  loss:2.302002429962158 : 100%|██████████| 782/782 [01:57<00:00,  6.68it/s]
epoch:2  loss:2.2970173358917236 : 100%|██████████| 782/782 [01:57<00:00,  6.68it/s]
epoch:3  loss:2.303152084350586 : 100%|██████████| 782/782 [01:57<00:00,  6.67it/s]
epoch:4  loss:2.306368589401245 : 100%|██████████| 782/782 [01:56<00:00,  6.69it/s]
epoch:5  loss:2.3011648654937744 : 100%|██████████| 782/782 [01:57<00:00,  6.66it/s]
