In [1]:
import torch
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import torch.nn.init
import torch.nn as nn
import torch.optim as optim

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 랜덤 시드 고정
torch.manual_seed(777)

# GPU 사용 가능일 경우 랜덤 시드 고정
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [3]:
epochs = 30
learning_rate=0.001
batch_size=256

In [4]:
cifar10_train=datasets.CIFAR10(root='data/cifar10_data/',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)
cifar10_test=datasets.CIFAR10(root='data/cifar10_data/',
                           train=False,
                           transform=transforms.ToTensor(),
                           download=True)

Files already downloaded and verified
Files already downloaded and verified


In [5]:
print(cifar10_train)
print(cifar10_test)

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: data/cifar10_data/
    Split: Train
    StandardTransform
Transform: ToTensor()
Dataset CIFAR10
    Number of datapoints: 10000
    Root location: data/cifar10_data/
    Split: Test
    StandardTransform
Transform: ToTensor()


In [6]:
train_loader=DataLoader(dataset=cifar10_train, 
                       batch_size=batch_size,
                       shuffle=True, 
                       drop_last=True)

test_loader=DataLoader(dataset=cifar10_test, 
                       batch_size=batch_size,
                       shuffle=True, 
                       drop_last=True)

In [7]:
for X, Y in train_loader:
    print(X.size())
    print(Y.size())
    break

for X, Y in test_loader:
    print(X.size())
    print(Y.size())
    break

torch.Size([256, 3, 32, 32])
torch.Size([256])
torch.Size([256, 3, 32, 32])
torch.Size([256])


In [8]:
class CNN(nn.Module):
  def __init__(self):
    super(CNN, self).__init__()

    self.layer1=nn.Sequential(
      nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2) #(32,16,16)
    )
    self.layer2=nn.Sequential(
      nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2) #(64,8,8)
    )
    self.layer3=nn.Sequential(
      nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
      nn.ReLU(),
      nn.MaxPool2d(kernel_size=2, stride=2) #(128,4,4)
    )

    self.fc1=nn.Linear(4*4*128,128, bias=True)
    self.fc2=nn.Linear(128, 64)
    self.fc3=nn.Linear(64,10)
    #nn.init.uniform_(self.fc1.weight)

  def forward(self, x):
    out=self.layer1(x)
    out=self.layer2(out)
    out=self.layer3(out)
    out=out.view(out.size(0), -1) #[256, 128,4,4] => [256, 128*4*4]
    out=self.fc1(out)
    out=self.fc2(out)
    out=self.fc3(out)
    return out

In [9]:
model=CNN().to(device)
crit=nn.CrossEntropyLoss().to(device)
optimizer=optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
cnt_train_batch=len(train_loader)
cnt_test_batch=len(test_loader)
print(cnt_train_batch, cnt_test_batch)

195 39


In [11]:
for epoch in range(epochs):
  avg_cost=0

  for X, Y in train_loader:
    X=X.to(device)
    Y=Y.to(device)

    y_hat=model(X)
    cost=crit(y_hat, Y)
    
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    avg_cost+=cost/cnt_train_batch
  print('epoch:{}, cost:{}'.format(epoch, avg_cost.item()))

epoch:0, cost:1.7127306461334229
epoch:1, cost:1.3015518188476562
epoch:2, cost:1.1125946044921875
epoch:3, cost:0.9720243811607361
epoch:4, cost:0.8836045265197754
epoch:5, cost:0.8035370111465454
epoch:6, cost:0.7481309175491333
epoch:7, cost:0.6990618705749512
epoch:8, cost:0.6563477516174316
epoch:9, cost:0.5969754457473755
epoch:10, cost:0.5704761147499084
epoch:11, cost:0.5278379321098328
epoch:12, cost:0.483417272567749
epoch:13, cost:0.4427681267261505
epoch:14, cost:0.4257531464099884
epoch:15, cost:0.38029295206069946
epoch:16, cost:0.3520752191543579
epoch:17, cost:0.3307543694972992
epoch:18, cost:0.2998584508895874
epoch:19, cost:0.2724444270133972
epoch:20, cost:0.2565607726573944
epoch:21, cost:0.2289971113204956
epoch:22, cost:0.21110428869724274
epoch:23, cost:0.18857702612876892
epoch:24, cost:0.1870376318693161
epoch:25, cost:0.16421228647232056
epoch:26, cost:0.15190686285495758
epoch:27, cost:0.13823173940181732
epoch:28, cost:0.13128876686096191
epoch:29, cost:0.1

In [12]:
with torch.no_grad():
  accuracys=0
  for X, Y in test_loader:
    X=X.to(device)
    Y=Y.to(device)
    pred=model(X)
    accuracy=(pred.argmax(dim=1)==Y).sum()
    accuracys+=accuracy
    print(pred.argmax(dim=1))
    print(Y)
    print(accuracy)
print(accuracys/(cnt_test_batch*batch_size))

tensor([1, 5, 2, 8, 3, 3, 0, 2, 7, 8, 5, 0, 7, 3, 9, 2, 4, 1, 2, 4, 8, 2, 4, 9,
        7, 3, 3, 4, 4, 7, 6, 3, 9, 4, 3, 0, 3, 0, 8, 5, 5, 9, 3, 9, 8, 0, 4, 7,
        3, 9, 2, 7, 0, 5, 7, 8, 7, 9, 5, 3, 3, 0, 5, 7, 1, 6, 4, 7, 7, 9, 8, 8,
        0, 3, 8, 5, 0, 1, 0, 4, 6, 6, 2, 9, 5, 9, 4, 3, 7, 6, 8, 0, 1, 5, 6, 6,
        7, 7, 5, 5, 0, 8, 7, 9, 5, 9, 2, 4, 3, 9, 0, 2, 4, 4, 0, 1, 1, 9, 9, 2,
        2, 9, 1, 8, 4, 9, 3, 6, 7, 5, 6, 9, 5, 0, 5, 3, 0, 6, 5, 3, 7, 4, 9, 5,
        2, 2, 9, 3, 7, 1, 2, 5, 9, 2, 6, 5, 5, 6, 4, 6, 5, 8, 9, 3, 0, 3, 4, 2,
        5, 9, 5, 2, 7, 9, 9, 3, 3, 1, 9, 5, 2, 5, 7, 1, 7, 4, 0, 0, 6, 5, 8, 2,
        0, 1, 1, 9, 1, 8, 1, 4, 7, 3, 5, 8, 1, 8, 1, 4, 0, 9, 9, 6, 9, 8, 2, 2,
        6, 5, 3, 2, 6, 9, 0, 8, 5, 6, 9, 9, 2, 5, 6, 2, 6, 6, 1, 4, 8, 6, 4, 5,
        2, 9, 2, 9, 5, 5, 8, 1, 9, 2, 0, 5, 2, 2, 0, 7])
tensor([1, 5, 2, 8, 3, 7, 0, 2, 5, 8, 3, 7, 7, 7, 9, 4, 4, 1, 6, 2, 8, 4, 4, 9,
        5, 3, 3, 4, 4, 7, 6, 3, 9, 4, 4, 0, 3, 0, 8, 5, 5, 9, 3