<a href="https://colab.research.google.com/github/Deep-of-Machine/AI_Academy/blob/main/5_4_mlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## GPU로 학습
런타임 - 런타임 유형 변경 - GPU    
이후 신경망을 학습할 때는 전부 런타임 유형을 GPU를 사용합니다.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time

import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F
from torch import optim

In [2]:
print(torch.cuda.is_available())

True


In [3]:
device = torch.device('cuda')
print(device)

cuda


In [5]:
transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5), (0.5))] )
train_dataset = datasets.MNIST(root = './data', train = True, download = True, transform = transform)
test_dataset = datasets.MNIST(root = './data', train = False, download = True, transform = transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [6]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [7]:
class MyNet(nn.Module):
    def __init__(self):
        super(MyNet, self).__init__()
        self.layer1 = nn.Linear(784, 128)
        self.layer2 = nn.Linear(128, 32)
        self.layer3 = nn.Linear(32, 10)
    
    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.relu(x)
        out = self.layer3(x)

        return out


.to(device)를 사용해서 모델, 손실 함수, 데이터를 GPU에 할당

In [8]:
model = MyNet().to(device)

print(model)

MyNet(
  (layer1): Linear(in_features=784, out_features=128, bias=True)
  (layer2): Linear(in_features=128, out_features=32, bias=True)
  (layer3): Linear(in_features=32, out_features=10, bias=True)
)


In [9]:
loss_func = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(model.parameters(), lr = 0.001)

In [13]:
epochs = 20

for epoch in range(epochs):
    running_loss = 0.0
    start_time = time.time()

    for inputs, labels in train_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model(inputs)

        loss = loss_func(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    end_time = time.time()

    print('Epoch : {}, Loss : {}, time : {}'.format(epoch + 1, running_loss/len(train_loader), end_time - start_time))


Epoch : 1, Loss : 0.4245168231030517, time : 15.054803133010864
Epoch : 2, Loss : 0.40886643970571857, time : 14.90153455734253
Epoch : 3, Loss : 0.3960065868522313, time : 14.810235261917114
Epoch : 4, Loss : 0.3854086296136445, time : 14.796237707138062
Epoch : 5, Loss : 0.3764024623898047, time : 14.84169626235962
Epoch : 6, Loss : 0.3683288246552065, time : 14.849615573883057
Epoch : 7, Loss : 0.3612538923078509, time : 14.828006982803345
Epoch : 8, Loss : 0.3546877834779113, time : 14.819313049316406
Epoch : 9, Loss : 0.34868133504158144, time : 14.897125482559204
Epoch : 10, Loss : 0.3435527858640085, time : 14.978192806243896
Epoch : 11, Loss : 0.33847475675409283, time : 14.748807430267334
Epoch : 12, Loss : 0.33412315308857066, time : 14.836584568023682
Epoch : 13, Loss : 0.3296830621498353, time : 14.745555639266968
Epoch : 14, Loss : 0.3256849224077486, time : 14.87225890159607
Epoch : 15, Loss : 0.3218736618455412, time : 14.666774988174438
Epoch : 16, Loss : 0.318292004808

In [14]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        predicted = torch.argmax(outputs, dim = 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # 아래에 추가 설명

    print(total)
    print(100*correct / total)

10000
91.56


## 모델 저장과 불러오기
연동한 구글 드라이브 경로에 저장하면, 이 후에 학습된 모델을 불러와서 사용 가능

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [15]:
PATH = '/content/drive/MyDrive/day_5/Mnist_models.pth'
print(PATH)

/content/drive/MyDrive/day_5/Mnist_models.pth


In [16]:
torch.save(model.state_dict(), PATH)

model2를 새로 생성하고, model과 비교




In [17]:
model2 = MyNet()
print(list(model.parameters())[1][:10])  # 위에서 학습한 model의 파라미터 일부
print(list(model2.parameters())[1][:10]) # 새로 생성한 model2의 파라미터 일부

tensor([ 0.0391,  0.0099,  0.0136,  0.0273, -0.0233,  0.0291,  0.0323, -0.0026,
        -0.0011, -0.0178], device='cuda:0', grad_fn=<SliceBackward>)
tensor([-0.0308, -0.0029, -0.0232,  0.0174, -0.0251,  0.0156,  0.0244,  0.0175,
        -0.0282, -0.0191], grad_fn=<SliceBackward>)


In [20]:
model2 = model2.to(device)

In [21]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model2(inputs)
        predicted = torch.argmax(outputs, dim = 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # 아래에 추가 설명

    print(total)
    print(100*correct / total)

10000
7.23


PATH에 저장된 가중치를 model2에 로드

In [22]:
model2.load_state_dict(torch.load(PATH))
print(list(model2.parameters())[1][:10])

tensor([ 0.0391,  0.0099,  0.0136,  0.0273, -0.0233,  0.0291,  0.0323, -0.0026,
        -0.0011, -0.0178], device='cuda:0', grad_fn=<SliceBackward>)


In [23]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model2(inputs)
        predicted = torch.argmax(outputs, dim = 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # 아래에 추가 설명

    print(total)
    print(100*correct / total)

10000
91.56


## 학습 가능한 파라미터 수 계산

In [24]:
model.parameters()

<generator object Module.parameters at 0x7fa6bf24fe50>

In [25]:
for i in model.parameters():
    print(i)
    break

Parameter containing:
tensor([[ 0.0203,  0.0161,  0.0051,  ...,  0.0233,  0.0130,  0.0025],
        [-0.0202,  0.0307,  0.0271,  ..., -0.0331, -0.0319,  0.0152],
        [ 0.0015, -0.0115, -0.0267,  ...,  0.0234,  0.0164, -0.0153],
        ...,
        [ 0.0218, -0.0197, -0.0231,  ...,  0.0265,  0.0114, -0.0140],
        [-0.0271, -0.0251,  0.0068,  ..., -0.0216,  0.0216,  0.0022],
        [ 0.0327,  0.0228,  0.0061,  ...,  0.0132, -0.0330, -0.0232]],
       device='cuda:0', requires_grad=True)


In [26]:
print(i.requires_grad == True)

True


In [27]:
print(i.shape)

torch.Size([128, 784])


In [28]:
print(i.shape[0] * i.shape[1])

100352


In [29]:
print(i.numel())

100352


In [30]:
n = 0
for i in model.parameters():
    if i.requires_grad == True:
        n += i.numel()

print(n)

104938


직접 계산

In [31]:
128*784 + 128 + 32*128 + 32 + 10*32 + 10

104938

## 모델 평가

In [None]:
for mini_batch in test_loader:
    first_batch = mini_batch
    break

inputs = first_batch[0]
labels = first_batch[1]
outputs = model(inputs.view(inputs.shape[0], -1).to(device))

In [None]:
sample_number = 0
plt.imshow(inputs[sample_number].squeeze().numpy(), cmap = 'gray')
plt.show()
print('예측 : {}'.format(torch.argmax(outputs[sample_number]).item()))
print('라벨 : {}'.format(labels[sample_number].item()))

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        predicted = torch.argmax(outputs, dim = 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(total)
    print(100*correct / total)