## 오늘 할 내용

- cifar10 이미지 데이터를 사용한 모델링
   - convolution layer (cnn : 합성곱 신경망)
   - 직접 모델링 & 학습 




In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]
)

In [3]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [4]:
type(testset.data)

numpy.ndarray

In [5]:
# 자동 손쉬운 변환법
# batch_size : 몇 개의 데이터를 한 번에 태울 것인가? (참고 : 밑바닥 딥러닝, p.115, p.239)
# shuffle : 비복원 추출 여부 (구슬을 주머니에서 꺼낼 것인가?)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False)

In [6]:
trainloader.batch_sampler.sampler.data_source

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
           )

In [7]:
import matplotlib.pyplot as plt
import numpy as np

def imshow(img):
    # numpy array를 그림으로 바꿔주는 함수
    plt.imshow(img)
    plt.show()

    
for i in range(5):    
    imshow(trainset.data[i])


<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

<Figure size 640x480 with 1 Axes>

In [8]:
trainset.data[0]

array([[[ 59,  62,  63],
        [ 43,  46,  45],
        [ 50,  48,  43],
        ...,
        [158, 132, 108],
        [152, 125, 102],
        [148, 124, 103]],

       [[ 16,  20,  20],
        [  0,   0,   0],
        [ 18,   8,   0],
        ...,
        [123,  88,  55],
        [119,  83,  50],
        [122,  87,  57]],

       [[ 25,  24,  21],
        [ 16,   7,   0],
        [ 49,  27,   8],
        ...,
        [118,  84,  50],
        [120,  84,  50],
        [109,  73,  42]],

       ...,

       [[208, 170,  96],
        [201, 153,  34],
        [198, 161,  26],
        ...,
        [160, 133,  70],
        [ 56,  31,   7],
        [ 53,  34,  20]],

       [[180, 139,  96],
        [173, 123,  42],
        [186, 144,  30],
        ...,
        [184, 148,  94],
        [ 97,  62,  34],
        [ 83,  53,  34]],

       [[177, 144, 116],
        [168, 129,  94],
        [179, 142,  87],
        ...,
        [216, 184, 140],
        [151, 118,  84],
        [123,  92,  72]]

## Training

In [9]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # 3개의 input chnnel, 6개의 convolution filter, 5x5의 kernel size
        self.conv1 = nn.Conv2d(in_channels=3,
                               out_channels=6,
                               kernel_size=5) # 이 표현식과 같음 : kernel_size=(5,5)
        # 2 x 2 max pooling
        self.pool = nn.MaxPool2d(2, 2)
        # input 6, output 16
        self.conv2 = nn.Conv2d(6, 16, 3)
        
        self.fc1 = nn.Linear(16*6*6, 100)
        self.fc2 = nn.Linear(100, 60)
        self.fc3 = nn.Linear(60, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.view(-1, 16*6*6)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # 마지막 레이어에선 activation function을 넣지 않음
        x = self.fc3(x)

        return x

### 네트워크가 어떻게 진행되는지 하나씩 실행해보기
- .size()를 통해서 어떤 방식으로 dimension이 변화하는지 check
- cifar10 network
![image](https://miro.medium.com/max/3294/1*vkQ0hXDaQv57sALXAJquxA.jpeg)

In [None]:
x = trainset.data[:4]

print(x.shape)
# plt.imshow(x)
# plt.show()


In [None]:
# pytorch의 경우 (batch_size, channel, width, height) 순서를 가짐
# batch size를 맞춰주기 위해 앞에 1로 reshape
x = x.reshape(-1, 3, 32, 32)

# numpy를 torch tensor로 변환
x = torch.tensor(x).float()
x.size()

In [None]:
# RGB 3 channel이 input으로 들어옴
# out_channels : 몇 개의 채널로 나갈 것인가?
# kernel_size : 가로, 세로 크기?
conv1 = nn.Conv2d(in_channels=3,
                  out_channels=6,
                  kernel_size=5)
conv1


In [None]:
x = conv1(x)
x.size()

In [None]:
# pooling : 이미지의 차원 축소 (밑바닥 딥러닝, p.240)
pool = nn.MaxPool2d(2, 2)
pool

In [None]:
x = pool(x)
x.size()

In [None]:
conv2 = nn.Conv2d(6, 16, 3)
conv2

In [None]:
x = conv2(x)
x.size()

In [None]:
x = pool(x)
x.size()

In [None]:
# 뒷 채널과 맞춰주기 위한 reshape
# Flatten
x = x.view(-1, 16*6*6)
x.size()

In [None]:
fc1 = nn.Linear(16*6*6, 100)
fc1

In [None]:
x = fc1(x)
x.size()

In [None]:

fc2 = nn.Linear(100, 60)
fc3 = nn.Linear(60, 10)

In [None]:
x = fc2(x)
x = fc3(x)


In [None]:
x.size()
x

In [None]:
F.softmax(x, dim=1)

### 실제 만든 network에 데이터 태우기

In [10]:
net = Net()

In [None]:
test = trainset.data[:4]

test.shape

test = test.reshape(-1, 3, 32, 32)
test = torch.tensor(test).float()

In [None]:
test.size()

In [None]:
probs = net(test)

In [None]:
probs.size()

In [None]:
probs

In [26]:
import torch.optim as optim


# 교차엔트로피 오차 (밑바닥 딥러닝 p.113)
# 어떤 손실 함수를 쓸 것인가?
criterion = nn.CrossEntropyLoss()

# SGD : Stochastic Gradient Descent
# 어떤 방식으로 파라미터를 학습할 것인가? (밑바닥 딥러닝 p.189 읽어보기)
# lr = learning rate (밑바닥 딥러닝 p.131)
# 학습 도중에 loss가 너무 튀거나 너무 적게 움직이면 조절해볼 수 있음
# https://copycode.tistory.com/166

# optimizer = optim.Adam(net.parameters(), lr=0.001)
optimizer = optim.SGD(net.parameters(), lr=0.001) 


In [25]:
trainloader

<torch.utils.data.dataloader.DataLoader at 0x7fa377ca7a90>

In [27]:
running_loss = 0.0
for i, data in enumerate(trainloader):
    inputs, labels = data
    
    # 매번 iteration마다 gradient 초기화
    optimizer.zero_grad()
    
    outputs = net(inputs)
    
    # loss 계산 (cross entropy loss)
    loss = criterion(outputs, labels)
    # 학습해야하는 모든 parameter들의 gradient 계산
    loss.backward()
    # backpropagation 진행 (파라미터 업데이트)
    optimizer.step()
    
    running_loss += loss.item()
    if i % 1000 == 999:
        print(f'[{i+1}] loss : {running_loss / 1000}')
        running_loss = 0.0

[1000] loss : 2.3027240235805513
[2000] loss : 2.300682072877884
[3000] loss : 2.29876478433609
[4000] loss : 2.2957096543312074
[5000] loss : 2.2921512386798857
[6000] loss : 2.284779597759247
[7000] loss : 2.271058345556259
[8000] loss : 2.248588036894798
[9000] loss : 2.198488947272301
[10000] loss : 2.1421441687345504
[11000] loss : 2.1028348878622056
[12000] loss : 2.063841153383255


In [15]:
for i, data in enumerate(trainloader):
    if i == 0:
        inputs, labels = data
        break

In [16]:
inputs[0]

tensor([[[ 0.0510, -0.1922, -0.0588,  ...,  0.3333, -0.0275,  0.0588],
         [-0.0510, -0.4510, -0.4118,  ...,  0.0667, -0.0431, -0.0667],
         [-0.3804, -0.7490, -0.7020,  ..., -0.1137,  0.3020, -0.0353],
         ...,
         [ 0.1765,  0.2784,  0.2784,  ..., -0.2157, -0.3098, -0.2471],
         [ 0.2706,  0.3647,  0.3255,  ...,  0.0667, -0.0196, -0.1922],
         [ 0.3412,  0.3176,  0.2784,  ...,  0.2314, -0.0196, -0.0667]],

        [[ 0.2549,  0.0118,  0.1373,  ...,  0.3569,  0.2392,  0.3569],
         [ 0.0745, -0.4431, -0.3961,  ...,  0.0039,  0.1451,  0.2627],
         [-0.3569, -0.7490, -0.7098,  ..., -0.0824,  0.3333,  0.2314],
         ...,
         [ 0.4510,  0.5059,  0.4980,  ..., -0.3098, -0.4196, -0.3490],
         [ 0.4980,  0.5686,  0.5216,  ..., -0.0824, -0.1686, -0.3569],
         [ 0.4980,  0.5137,  0.4745,  ...,  0.1216, -0.1922, -0.1843]],

        [[-0.1843, -0.4667, -0.2471,  ...,  0.0667, -0.4588, -0.3804],
         [-0.3098, -0.7098, -0.5451,  ..., -0

In [17]:
outputs = net(inputs)
outputs

tensor([[ 0.0152, -0.0907, -0.0668, -0.0836, -0.0629, -0.0522, -0.1290, -0.1026,
         -0.0321, -0.0910],
        [ 0.0245, -0.0798, -0.0692, -0.0951, -0.0696, -0.0477, -0.1089, -0.1036,
         -0.0227, -0.0878],
        [ 0.0169, -0.0786, -0.0664, -0.0889, -0.0648, -0.0409, -0.1148, -0.0943,
         -0.0288, -0.0820],
        [ 0.0094, -0.0960, -0.0595, -0.0872, -0.0694, -0.0488, -0.1341, -0.0977,
         -0.0376, -0.0955]], grad_fn=<AddmmBackward>)

In [18]:
labels

tensor([6, 4, 4, 1])

In [19]:
loss = criterion(outputs, labels)
loss

tensor(2.3253, grad_fn=<NllLossBackward>)

In [20]:
loss.backward()

In [24]:
net.parameters

<bound method Module.parameters of Net(
  (conv1): Conv2d(3, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=60, bias=True)
  (fc3): Linear(in_features=60, out_features=10, bias=True)
)>