<a href="https://colab.research.google.com/github/Deep-of-Machine/AI_Academy/blob/main/Deep_or_wide.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## GPU로 학습
런타임 - 런타임 유형 변경 - GPU    
이후 신경망을 학습할 때는 전부 런타임 유형을 GPU를 사용합니다.

In [16]:
import numpy as np
import matplotlib.pyplot as plt
import time

import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from torch import nn
from torch.nn import functional as F
from torch import optim

In [17]:
print(torch.cuda.is_available())

True


In [18]:
device = torch.device('cuda')
print(device)

cuda


In [19]:
transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.5), (0.5))] )
train_dataset = datasets.MNIST(root = './data', train = True, download = True, transform = transform)
test_dataset = datasets.MNIST(root = './data', train = False, download = True, transform = transform)

In [20]:
train_dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: ./data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=0.5, std=0.5)
           )

In [21]:
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [22]:
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [33]:
class Deep_net(nn.Module):
    def __init__(self):
        super(Deep_net, self).__init__()
        self.layer1 = nn.Linear(784, 200)
        self.layer2 = nn.Linear(200, 200)
        self.layer5 = nn.Linear(200, 200)
        self.layer6 = nn.Linear(200, 100)
        self.layer9 = nn.Linear(100, 100)
        self.layer10 = nn.Linear(100, 10)
       
    
    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        x = self.layer2(x)
        x = F.relu(x)
        x = F.relu(x)
        x = self.layer5(x)
        x = F.relu(x)
        x = self.layer6(x)
        x = F.relu(x)
        x = self.layer9(x)
        x = F.relu(x)
        out = self.layer10(x)
        return out


.to(device)를 사용해서 모델, 손실 함수, 데이터를 GPU에 할당

In [34]:
model = Deep_net().to(device)

print(model)

Deep_net(
  (layer1): Linear(in_features=784, out_features=200, bias=True)
  (layer2): Linear(in_features=200, out_features=200, bias=True)
  (layer5): Linear(in_features=200, out_features=200, bias=True)
  (layer6): Linear(in_features=200, out_features=100, bias=True)
  (layer9): Linear(in_features=100, out_features=100, bias=True)
  (layer10): Linear(in_features=100, out_features=10, bias=True)
)


In [35]:
loss_func = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = 0.01)

In [36]:
epochs = 20

for epoch in range(epochs):
    running_loss = 0.0
    start_time = time.time()

    for inputs, labels in train_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model(inputs)

        loss = loss_func(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    end_time = time.time()

    print('Epoch : {}, Loss : {}, time : {}'.format(epoch + 1, running_loss/len(train_loader), end_time - start_time))


Epoch : 1, Loss : 0.6319234291595945, time : 15.11596941947937
Epoch : 2, Loss : 0.4105009633833284, time : 15.066384315490723
Epoch : 3, Loss : 0.37137569620538113, time : 14.862892866134644
Epoch : 4, Loss : 0.3530712251120539, time : 14.874088525772095
Epoch : 5, Loss : 0.35126022899201687, time : 14.898571252822876
Epoch : 6, Loss : 0.32834353202632244, time : 15.020339965820312
Epoch : 7, Loss : 0.33492428227178833, time : 15.000990152359009
Epoch : 8, Loss : 0.35445038131527556, time : 14.805065155029297
Epoch : 9, Loss : 0.3473804171548596, time : 14.935123920440674
Epoch : 10, Loss : 0.34905021613848997, time : 14.883912324905396
Epoch : 11, Loss : 0.34872392777091404, time : 15.095586776733398
Epoch : 12, Loss : 0.37169171903909903, time : 14.836329936981201
Epoch : 13, Loss : 0.36016530942703995, time : 14.78528118133545
Epoch : 14, Loss : 0.602942379124002, time : 14.671258926391602
Epoch : 15, Loss : 0.9938453582049941, time : 14.787569999694824
Epoch : 16, Loss : 0.5921468

In [37]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        predicted = torch.argmax(outputs, dim = 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # 아래에 추가 설명

    print(total)
    print(100*correct / total)

10000
88.26


In [38]:
class WIde_net(nn.Module):
    def __init__(self):
        super(WIde_net, self).__init__()
        self.layer1 = nn.Linear(784, 3000)
        self.layer3 = nn.Linear(3000, 10)
    
    def forward(self, x):
        x = self.layer1(x)
        x = F.relu(x)
        out = self.layer3(x)

        return out

In [39]:
model = WIde_net().to(device)

print(model)

WIde_net(
  (layer1): Linear(in_features=784, out_features=3000, bias=True)
  (layer3): Linear(in_features=3000, out_features=10, bias=True)
)


In [40]:
loss_func = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = 0.01)

In [41]:
epochs = 20

for epoch in range(epochs):
    running_loss = 0.0
    start_time = time.time()

    for inputs, labels in train_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model(inputs)

        loss = loss_func(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    end_time = time.time()

    print('Epoch : {}, Loss : {}, time : {}'.format(epoch + 1, running_loss/len(train_loader), end_time - start_time))

Epoch : 1, Loss : 0.6603460444379717, time : 15.174087047576904
Epoch : 2, Loss : 0.26054990094607827, time : 15.232318878173828
Epoch : 3, Loss : 0.24336057305554432, time : 15.22301459312439
Epoch : 4, Loss : 0.23815322038072195, time : 15.159773588180542
Epoch : 5, Loss : 0.22388558222382055, time : 15.26331877708435
Epoch : 6, Loss : 0.219382869030065, time : 15.350952625274658
Epoch : 7, Loss : 0.21693759791052608, time : 15.368610620498657
Epoch : 8, Loss : 0.21212219063446783, time : 15.395427227020264
Epoch : 9, Loss : 0.21347045892877364, time : 15.380370616912842
Epoch : 10, Loss : 0.2152462761614845, time : 15.214811086654663
Epoch : 11, Loss : 0.2084739117849388, time : 15.238700151443481
Epoch : 12, Loss : 0.2077753596466535, time : 15.246835470199585
Epoch : 13, Loss : 0.20622095010920502, time : 15.195438623428345
Epoch : 14, Loss : 0.19324525080319407, time : 15.285901069641113
Epoch : 15, Loss : 0.20177922773409263, time : 15.271769285202026
Epoch : 16, Loss : 0.202591

In [42]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        predicted = torch.argmax(outputs, dim = 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # 아래에 추가 설명

    print(total)
    print(100*correct / total)

10000
93.75


In [None]:
PATH = '/content/drive/MyDrive/day_5/Mnist_models.pth'
print(PATH)

/content/drive/MyDrive/day_5/Mnist_models.pth


In [None]:
torch.save(model.state_dict(), PATH)

model2를 새로 생성하고, model과 비교




In [None]:
model2 = MyNet()
print(list(model.parameters())[1][:10])  # 위에서 학습한 model의 파라미터 일부
print(list(model2.parameters())[1][:10]) # 새로 생성한 model2의 파라미터 일부

tensor([ 0.0391,  0.0099,  0.0136,  0.0273, -0.0233,  0.0291,  0.0323, -0.0026,
        -0.0011, -0.0178], device='cuda:0', grad_fn=<SliceBackward>)
tensor([-0.0308, -0.0029, -0.0232,  0.0174, -0.0251,  0.0156,  0.0244,  0.0175,
        -0.0282, -0.0191], grad_fn=<SliceBackward>)


In [None]:
model2 = model2.to(device)

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model2(inputs)
        predicted = torch.argmax(outputs, dim = 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # 아래에 추가 설명

    print(total)
    print(100*correct / total)

10000
7.23


PATH에 저장된 가중치를 model2에 로드

In [None]:
model2.load_state_dict(torch.load(PATH))
print(list(model2.parameters())[1][:10])

tensor([ 0.0391,  0.0099,  0.0136,  0.0273, -0.0233,  0.0291,  0.0323, -0.0026,
        -0.0011, -0.0178], device='cuda:0', grad_fn=<SliceBackward>)


In [None]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model2(inputs)
        predicted = torch.argmax(outputs, dim = 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()  # 아래에 추가 설명

    print(total)
    print(100*correct / total)

10000
91.56


## 학습 가능한 파라미터 수 계산

In [None]:
model.parameters()

<generator object Module.parameters at 0x7fa6bf24fe50>

In [None]:
for i in model.parameters():
    print(i)
    break

Parameter containing:
tensor([[ 0.0203,  0.0161,  0.0051,  ...,  0.0233,  0.0130,  0.0025],
        [-0.0202,  0.0307,  0.0271,  ..., -0.0331, -0.0319,  0.0152],
        [ 0.0015, -0.0115, -0.0267,  ...,  0.0234,  0.0164, -0.0153],
        ...,
        [ 0.0218, -0.0197, -0.0231,  ...,  0.0265,  0.0114, -0.0140],
        [-0.0271, -0.0251,  0.0068,  ..., -0.0216,  0.0216,  0.0022],
        [ 0.0327,  0.0228,  0.0061,  ...,  0.0132, -0.0330, -0.0232]],
       device='cuda:0', requires_grad=True)


In [None]:
print(i.requires_grad == True)

True


In [None]:
print(i.shape)

torch.Size([128, 784])


In [None]:
print(i.shape[0] * i.shape[1])

100352


In [None]:
print(i.numel())

100352


In [None]:
n = 0
for i in model.parameters():
    if i.requires_grad == True:
        n += i.numel()

print(n)

104938


직접 계산

In [None]:
128*784 + 128 + 32*128 + 32 + 10*32 + 10

104938

## 모델 평가

In [None]:
for mini_batch in test_loader:
    first_batch = mini_batch
    break

inputs = first_batch[0]
labels = first_batch[1]
outputs = model(inputs.view(inputs.shape[0], -1).to(device))

In [None]:
sample_number = 0
plt.imshow(inputs[sample_number].squeeze().numpy(), cmap = 'gray')
plt.show()
print('예측 : {}'.format(torch.argmax(outputs[sample_number]).item()))
print('라벨 : {}'.format(labels[sample_number].item()))

In [None]:
correct = 0
total = 0

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.view(inputs.shape[0], -1).to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        predicted = torch.argmax(outputs, dim = 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    print(total)
    print(100*correct / total)