In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets, models

In [2]:
USE_CUDA=torch.cuda.is_available()
DEVICE=torch.device("cuda" if USE_CUDA else "cpu")

EPOCHS=300
BATCH_SIZE=128

In [3]:
train_loader=torch.utils.data.DataLoader(
    datasets.CIFAR10('./.data',
                     train=True,
                     download=True,
                     transform=transforms.Compose([
                                                   #과적합 방지하기위해 RandomCrop과 RandomHorizontalFlip 같은 noise 추가
                                                   transforms.RandomCrop(32,padding=4),
                                                   transforms.RandomHorizontalFlip(),
                                                   transforms.ToTensor(),
                                                   transforms.Normalize((0.5,0.5,0.5),
                                                                        (0.5,0.5,0.5))
                     ])),
                     batch_size=BATCH_SIZE,shuffle=True
)
test_loader=torch.utils.data.DataLoader(
    datasets.CIFAR10('./.data',
                     train=False,
                     transform=transforms.Compose([
                                                   transforms.ToTensor(),
                                                   transforms.Normalize((0.5,0.5,0.5),
                                                   (0.5,0.5,0.5))
                     ])),
batch_size=BATCH_SIZE,shuffle=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./.data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./.data/cifar-10-python.tar.gz to ./.data


In [4]:
# 여러 단계의 신경망을 거치며 최초 입력 이미지에 대한 정보가 소실
# ResNet의 핵심: 네트워크를 작은 블록인 Residual block으로 나눈 것
# Residual block에 입력이었던 x를 더함으로써 모델을 훨씬 깊게 설계 가능(입력과 출력의 차이를 따로 학습하는 것이 성능에 더 좋다)
class BasicBlock(nn.Module):
  def __init__(self,in_planes,planes,stride=1):
    super(BasicBlock,self).__init__()
    self.conv1=nn.Conv2d(in_planes,planes,kernel_size=3,stride=stride,padding=1,bias=False)
    self.bn1=nn.BatchNorm2d(planes)
    self.conv2=nn.Conv2d(planes,planes,kernel_size=3,stride=1,padding=1,bias=False)
    self.bn2=nn.BatchNorm2d(planes)

    self.shortcut=nn.Sequential()
    if stride !=1 or in_planes!=planes:
      self.shortcut=nn.Sequential(
          nn.Conv2d(in_planes,planes,kernel_size=1,stride=stride,bias=False),
          nn.BatchNorm2d(planes)
          )
  def forward(self,x):
    out=F.relu(self.bn1(self.conv1(x)))
    out=self.bn2(self.conv2(out))
    out+=self.shortcut(x)
    out=F.relu(out)
    return out

    

In [5]:
class ResNet(nn.Module):
  def __init__(self,num_classes=10):
    super(ResNet,self).__init__()
    self.in_planes=16

    self.conv1=nn.Conv2d(3,16,kernel_size=3,
                         stride=1,padding=1,bias=False)
    self.bn1=nn.BatchNorm2d(16)
    self.layer1=self._make_layer(16,2,stride=1)
    self.layer2=self._make_layer(32,2,stride=2)
    self.layer3=self._make_layer(64,2,stride=2)
    self.linear=nn.Linear(64,num_classes)

  def _make_layer(self,planes,num_blocks,stride):
    strides=[stride]+[1]*(num_blocks-1)
    layers=[]
    for stride in strides:
      layers.append(BasicBlock(self.in_planes,planes,stride))
      self.in_planes=planes
    return nn.Sequential(*layers)
  
  def forward(self,x):
    out=F.relu(self.bn1(self.conv1(x)))
    out=self.layer1(out)
    out=self.layer2(out)
    out=self.layer3(out)
    out=F.avg_pool2d(out,8)
    out=out.view(out.size(0),-1)
    out=self.linear(out)
    return out

In [6]:
model=ResNet().to(DEVICE)
optimizer=optim.SGD(model.parameters(),lr=0.1,
                    momentum=0.9,weight_decay=0.005)
scheduler=optim.lr_scheduler.StepLR(optimizer,step_size=50,gamma=0.1)

In [7]:
def train(model,train_loader,optimizer,epoch):
  model.train()
  for batch_idx,(data,target) in enumerate(train_loader):
    data,target=data.to(DEVICE),target.to(DEVICE)
    optimizer.zero_grad()
    output=model(data)
    loss=F.cross_entropy(output,target)
    loss.backward()
    optimizer.step()

def evaluate(model,test_loader):
  model.eval()
  test_loss=0
  correct=0
  with torch.no_grad():
    for data,target in test_loader:
      data,target=data.to(DEVICE),target.to(DEVICE)
      output=model(data)

      #배치 오차를 합산
      test_loss+=F.cross_entropy(output,target,reduction='sum').item()

      #가장 높은 값을 가진 인덱스가 바로 예측값
      pred=output.max(1,keepdim=True)[1]
      correct+=pred.eq(target.view_as(pred)).sum().item()

  test_loss/=len(test_loader.dataset)
  test_accuracy=100.*correct/len(test_loader.dataset)
  return test_loss,test_accuracy



In [8]:
for epoch in range(1,EPOCHS+1):
  scheduler.step()
  train(model,train_loader,optimizer,epoch)
  test_loss,test_accuracy=evaluate(model,test_loader)

  print('[{}] Test Loss: {:.4f}, Accuracy: {:.2f}%'.format(epoch,test_loss,test_accuracy))

KeyboardInterrupt: ignored