<a href="https://colab.research.google.com/github/ElaYJ/Study_Deep_Learning/blob/main/Framework/2_PyTorch/24_%EB%AA%A8%EB%8D%B8%20%ED%95%99%EC%8A%B5%20%EB%B0%8F%20%EC%A0%80%EC%9E%A5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 모델 학습 model train

In [1]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F

from torchvision import datasets, transforms

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
batch_size = 32

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('dataset/', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(mean=(0.5,), std=(0.5,))
                   ])),
    batch_size=batch_size,
    shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 16337137.44it/s]


Extracting dataset/MNIST/raw/train-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 470808.35it/s]


Extracting dataset/MNIST/raw/train-labels-idx1-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 4469937.26it/s]


Extracting dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 18695317.73it/s]

Extracting dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to dataset/MNIST/raw






In [10]:
x, y = next(iter(train_loader))
x.shape, y.shape

(torch.Size([32, 1, 28, 28]), torch.Size([32]))

In [4]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channel, out_channel):
        super(ResidualBlock, self).__init__()

        self.in_channel, self.out_channel = in_channel, out_channel

        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=1, padding=0)
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(out_channel, out_channel, kernel_size=1, padding=0)

        if in_channel != out_channel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channel, out_channel, kernel_size=1, padding=0)
            )
        else:
            self.shortcut = nn.Sequential()

    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.relu(self.conv2(out))
        out = F.relu(self.conv3(out))
        out = out + self.shortcut(x)
        return out

class ResNet(nn.Module):
    def __init__(self, color='gray'):
        super(ResNet, self).__init__()
        if color == "gray":
            self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        elif color == "rgb":
            self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)

        self.resblock1 = ResidualBlock(32, 64)
        self.resblock2 = ResidualBlock(64, 64)

        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(64, 64)
        self.fc2 = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = self.resblock1(x)
        x = self.resblock2(x)
        x = self.avgpool(x)
        x = torch.flatten(x,1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

In [5]:
model = ResNet().to(device)

## Training Logic

### - Learning Rate Scheduler

- Train Loop을 epoch 만큼 도는 동안 loss 값이 변동이 없다고 판단되면

    scheduler가 learning_rate를 일정 비율로 줄여 준다.

In [6]:
optimizer = optim.SGD(model.parameters(), lr=0.003)

In [7]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

# scheduler = ReduceLROnPlateau(optimizer, mode='min', verbose=True)
# UserWarning: The verbose parameter is deprecated.
# Please use get_last_lr() to access the learning rate.


# Loss를 사용할 경우, mode='min'
# Accuracy를 사용할 경우, mode='max'
scheduler = ReduceLROnPlateau(optimizer, mode='min')
scheduler

<torch.optim.lr_scheduler.ReduceLROnPlateau at 0x7c5f7e89c7f0>

In [8]:
def train_loop(dataloader, model, loss_fn, optimizer, scheduler, epoch):
    model.train()
    size = len(dataloader)
    for batch, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)

        pred = model(x)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 500 == 0:
            print(f"Epoch {epoch+1} : [{batch}/{size}] loss : {loss.item()}")

    scheduler.step(loss)

    return loss.item()

In [9]:
for epoch in range(10):
    loss = train_loop(train_loader, model, F.nll_loss, optimizer, scheduler, epoch)
    print(f"epoch:{epoch+1} --> loss:{loss}" )


Epoch 1 : [0/1875] loss : 2.3129355907440186
Epoch 1 : [100/1875] loss : 2.3093440532684326
Epoch 1 : [200/1875] loss : 2.2986299991607666
Epoch 1 : [300/1875] loss : 2.310380697250366
Epoch 1 : [400/1875] loss : 2.306818962097168
Epoch 1 : [500/1875] loss : 2.293048858642578
Epoch 1 : [600/1875] loss : 2.305344581604004
Epoch 1 : [700/1875] loss : 2.2863781452178955
Epoch 1 : [800/1875] loss : 2.293748378753662
Epoch 1 : [900/1875] loss : 2.3091046810150146
Epoch 1 : [1000/1875] loss : 2.3005316257476807
Epoch 1 : [1100/1875] loss : 2.2996487617492676
Epoch 1 : [1200/1875] loss : 2.288085699081421
Epoch 1 : [1300/1875] loss : 2.3020105361938477
Epoch 1 : [1400/1875] loss : 2.3154802322387695
Epoch 1 : [1500/1875] loss : 2.2936341762542725
Epoch 1 : [1600/1875] loss : 2.2988884449005127
Epoch 1 : [1700/1875] loss : 2.3006489276885986
Epoch 1 : [1800/1875] loss : 2.29475998878479
epoch:1 --> loss:2.304104804992676
Epoch 2 : [0/1875] loss : 2.29803729057312
Epoch 2 : [100/1875] loss : 2.

# 모델 저장 model save

### - weights만 저장

In [12]:
torch.save(model.state_dict(), 'model_weights.pth')

In [13]:
model.load_state_dict(torch.load('model_weights.pth'))

<All keys matched successfully>

### - 구조도 함께 저장

In [14]:
torch.save(model, 'model.pth')

In [15]:
model = torch.load('model.pth')

In [16]:
import torchsummary

torchsummary.summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 14, 14]           2,112
            Conv2d-3           [-1, 64, 14, 14]          36,928
            Conv2d-4           [-1, 64, 14, 14]           4,160
            Conv2d-5           [-1, 64, 14, 14]           2,112
     ResidualBlock-6           [-1, 64, 14, 14]               0
            Conv2d-7           [-1, 64, 14, 14]           4,160
            Conv2d-8           [-1, 64, 14, 14]          36,928
            Conv2d-9           [-1, 64, 14, 14]           4,160
    ResidualBlock-10           [-1, 64, 14, 14]               0
AdaptiveAvgPool2d-11             [-1, 64, 1, 1]               0
           Linear-12                   [-1, 64]           4,160
           Linear-13                   [-1, 10]             650
Total params: 95,690
Trainable params: 

### - Training

In [None]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

optimizer = optim.SGD(model.parameters(), lr=0.003)
scheduler = ReduceLROnPlateau(optimizer, mode='min')

def train_loop(dataloader, model, loss_fn, optimizer, scheduler, epoch):
    model.train()
    size = len(dataloader)
    for batch, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)

        pred = model(x)
        loss = loss_fn(pred, y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 500 == 0:
            print(f"Epoch {epoch+1} : [{batch}/{size}] loss : {loss.item()}")

    scheduler.step(loss)

    return loss.item()

# 학습
for epoch in range(1):
    loss = train_loop(train_loader, model, F.nll_loss, optimizer, scheduler, epoch)
    print(f"epoch:{epoch+1} --> loss:{loss}" )

## Save, Load and Resuming Training

In [17]:
checkpoint_path = 'checkpoint.pth'

In [18]:
torch.save(obj={
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': loss
}, f=checkpoint_path)

In [19]:
model = ResNet().to(device)
optimizer = optim.SGD(params=model.parameters(), lr=0.003)

In [20]:
checkpoint = torch.load(checkpoint_path)
checkpoint #--> dict type

{'epoch': 0,
 'model_state_dict': OrderedDict([('conv1.weight',
               tensor([[[[-0.2972, -0.1145, -0.1495],
                         [ 0.1710, -0.1052,  0.1524],
                         [-0.3200, -0.1475, -0.3139]]],
               
               
                       [[[ 0.0175,  0.1060, -0.1014],
                         [-0.3445,  0.1334,  0.3303],
                         [-0.0819, -0.1433, -0.2550]]],
               
               
                       [[[-0.0597,  0.1087, -0.4019],
                         [ 0.0588, -0.0049, -0.1224],
                         [-0.0538,  0.3970,  0.0740]]],
               
               
                       [[[-0.2486,  0.3459,  0.2764],
                         [ 0.1423, -0.2708, -0.2418],
                         [-0.0896,  0.0239, -0.0816]]],
               
               
                       [[[-0.2803, -0.2782, -0.4065],
                         [ 0.0997, -0.1734,  0.2472],
                         [ 0.4975,  0.0396, 

In [21]:
checkpoint.keys()

dict_keys(['epoch', 'model_state_dict', 'optimizer_state_dict', 'loss'])

In [22]:
model.load_state_dict(checkpoint['model_state_dict'])
model

ResNet(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (resblock1): ResidualBlock(
    (conv1): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (shortcut): Sequential(
      (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
    )
  )
  (resblock2): ResidualBlock(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (shortcut): Sequential()
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc1): Linear(in_features=64, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)

In [23]:
epoch = checkpoint['epoch']
epoch

0

In [24]:
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
optimizer

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.003
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

In [25]:
loss = checkpoint['loss']
loss

1.7948617935180664

👆 위 정보를 가지고 이어서 학습 시킬 수 있다.