# Model Save

In [1]:
import torch
from torch import nn 
from torch import optim
import torch.nn.functional as F

from torchvision import datasets, transforms

##### 디바이스 설정

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

##### 데이터 로더 설정

In [3]:
batch_size = 32

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('dataset/', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(mean=(0.5,), std=(0.5,))
                   ])),
    batch_size=batch_size,
    shuffle=True)

##### 딥러닝 모델 구축

In [4]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channel, out_channel):
        super(ResidualBlock, self).__init__()
        
        self.in_channel, self.out_channel = in_channel, out_channel
        
        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=1, padding=0)
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(out_channel, out_channel, kernel_size=1, padding=0)
        
        if in_channel != out_channel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channel, out_channel, kernel_size=1, padding=0)
            )
        else:
            self.shortcut = nn.Sequential()
    
    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.relu(self.conv2(out))
        out = F.relu(self.conv3(out))
        out = out + self.shortcut(x)
        return out

class ResNet(nn.Module):
    def __init__(self, color='gray'):
        super(ResNet, self).__init__()
        if color == "gray":
            self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        elif color == "rgb":
            self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
            
        self.resblock1 = ResidualBlock(32, 64)
        self.resblock2 = ResidualBlock(64, 64)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(64, 64)
        self.fc2 = nn.Linear(64, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = self.resblock1(x)
        x = self.resblock2(x)
        x = self.avgpool(x)
        x = torch.flatten(x,1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

##### 모델 생성

In [5]:
model = ResNet().to(device)

In [6]:
print(model)

ResNet(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (resblock1): ResidualBlock(
    (conv1): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (shortcut): Sequential(
      (0): Conv2d(32, 64, kernel_size=(1, 1), stride=(1, 1))
    )
  )
  (resblock2): ResidualBlock(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (conv3): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (shortcut): Sequential()
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc1): Linear(in_features=64, out_features=64, bias=True)
  (fc2): Linear(in_features=64, out_features=10, bias=True)
)


### weight만 저장

In [7]:
torch.save(model.state_dict(), 'model_weights.pth')

불러오기

In [9]:
model.load_state_dict(torch.load('model_weights.pth'))

<All keys matched successfully>

### 구조도 함께 저장

In [10]:
torch.save(model, 'model.pth')

불러오기

In [11]:
model = torch.load('model.pth')

### 불러온 모델로 Traning

In [12]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

optimizer = optim.SGD(model.parameters(), lr=0.003)
scheduler = ReduceLROnPlateau(optimizer, mode='min', verbose=True)

In [13]:
def train_loop(dataloader, model, loss_fn, optimizer, scheduler, epoch):
    model.train()
    size = len(dataloader)
    for batch, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)
        
        pred = model(x)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad() # 기울기 초기화
        loss.backward()       # 미분값 계산
        optimizer.step()      # 가중치 업데이트
        
        if batch % 100 == 0: # batch 100번 마다 기록 출력
            loss = loss.item()
            print(f"epoch {epoch+1} : [{batch}/{size}] loss : {loss}")
            
    scheduler.step(loss) # epoch마다 스케쥴작업
    
    return loss.item()

In [14]:
for epoch in range(3):
    epoch_loss = train_loop(train_loader, model, F.nll_loss, optimizer, scheduler, epoch)
    print('                                              ')
    print(f"Epoch:{epoch+1} Total loss:{epoch_loss}" ) # Total이 맞는 표현일까?? 마지막 loss면 Last 아닐까?
    print('------------------------------------------------')
    

epoch 1 : [0/1875] loss : 2.3070342540740967
epoch 1 : [100/1875] loss : 2.3304343223571777
epoch 1 : [200/1875] loss : 2.307098865509033
epoch 1 : [300/1875] loss : 2.2731873989105225
epoch 1 : [400/1875] loss : 2.31030011177063
epoch 1 : [500/1875] loss : 2.330646514892578
epoch 1 : [600/1875] loss : 2.314789295196533
epoch 1 : [700/1875] loss : 2.3190481662750244
epoch 1 : [800/1875] loss : 2.316894292831421
epoch 1 : [900/1875] loss : 2.3055002689361572
epoch 1 : [1000/1875] loss : 2.287764549255371
epoch 1 : [1100/1875] loss : 2.3079030513763428
epoch 1 : [1200/1875] loss : 2.300863027572632
epoch 1 : [1300/1875] loss : 2.2941646575927734
epoch 1 : [1400/1875] loss : 2.289180278778076
epoch 1 : [1500/1875] loss : 2.30958890914917
epoch 1 : [1600/1875] loss : 2.312286853790283
epoch 1 : [1700/1875] loss : 2.2961041927337646
epoch 1 : [1800/1875] loss : 2.3020691871643066
                                              
Epoch:1 Total loss:2.2981860637664795
---------------------------

### Save/load and Resuming Training

학습을 하다보면 중간에 끊기거나 등등 학습이 중단 될 경우가 종종있다.<br>
이처럼 학습을 끊었다 다시 할 경우에 epoch를 같이 저장하면 그 뒤에 바로 이어서 할수 있는 기능이 있다.

In [18]:
checkpoint_path = 'checkpoint.pth'

epoch, weight, optimizer, loss 등등 딕셔너리 형태로 저장 가능

In [19]:
torch.save({
    'epoch' : epoch,
    'model_state_dict' : model.state_dict(),
    'optimizer_state_dict' : optimizer.state_dict(),
    'loss' : epoch_loss
}, checkpoint_path)

In [20]:
model = ResNet().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.0003)

In [22]:
checkpoint = torch.load(checkpoint_path)
checkpoint.keys()

dict_keys(['epoch', 'model_state_dict', 'optimizer_state_dict', 'loss'])

불러온 모델 적용 방법

In [24]:
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

epoch = checkpoint['epoch']
loss = checkpoint['loss']