# 1. Modeling 

 - `nn.Sequential`
 - Sub-class of `nn.Module`

In [1]:
import torch
from torch import nn
import torch.nn.functional as F

## nn의 모듈

In [2]:
nn.Conv2d(
    in_channels=3,
    out_channels=32,
    kernel_size=3,
    stride=1,
    padding=1,
    bias=False
)

Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)

In [3]:
nn.Linear(
    in_features=784,
    out_features=500,
    bias=False
)

Linear(in_features=784, out_features=500, bias=False)

### `nn.Sequential`


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
model = nn.Sequential(
    nn.Linear(784, 15),
    nn.Sigmoid(),
    nn.Linear(15, 10),
    nn.Sigmoid()
)

In [6]:
print(model)

Sequential(
  (0): Linear(in_features=784, out_features=15, bias=True)
  (1): Sigmoid()
  (2): Linear(in_features=15, out_features=10, bias=True)
  (3): Sigmoid()
)


In [8]:
import torchsummary

In [10]:
torchsummary.summary(model, (784,))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 15]          11,775
           Sigmoid-2                   [-1, 15]               0
            Linear-3                   [-1, 10]             160
           Sigmoid-4                   [-1, 10]               0
Total params: 11,935
Trainable params: 11,935
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.05
Estimated Total Size (MB): 0.05
----------------------------------------------------------------


### `nn.module` sub class

- `__init__()` 에서 Layers를 초기화 함. 
- `forward` 함수를 구현

In [11]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(20, 50, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(4900, 500)
        self.fc2 = nn.Linear(500, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4900)
        x = F.relu(self.fc1(x))
        x = F.log_softmax(self.fc2(x), dim=1)
        
        return x

In [12]:
model = Net()

In [13]:
torchsummary.summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 20, 28, 28]             200
            Conv2d-2           [-1, 50, 14, 14]           9,050
            Linear-3                  [-1, 500]       2,450,500
            Linear-4                   [-1, 10]           5,010
Total params: 2,464,760
Trainable params: 2,464,760
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.20
Params size (MB): 9.40
Estimated Total Size (MB): 9.60
----------------------------------------------------------------


### 간단한 ResNet 구현

In [18]:
class ResidulBlock(nn.Module):
    def __init__(self, in_channel, out_channel):
        super(ResidulBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=1, padding=0)
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(out_channel, out_channel, kernel_size=1, padding=0)
        
        if in_channel != out_channel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channel, out_channel, kernel_size=1, padding=0)
            )
        else:
            self.shortcut = nn.Sequential()
            
    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.relu(self.conv2(out))
        out = F.relu(self.conv3(out))
        out += self.shortcut(x)
        return out  
        

In [19]:
class ResNet(nn.Module):
    
    def __init__(self, color="gray"):
        super(ResNet, self).__init__()
        if color == "gray":
            self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        elif color == "rgb":
            self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
            
        self.resblock1 = ResidulBlock(32, 64)
        self.resblock2 = ResidulBlock(64, 64)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc1 = nn.Linear(64, 64)
        self.fc2 = nn.Linear(64, 10)
        
    def forward(self, x):
        
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = self.resblock1(x)   
        x = self.resblock2(x)   
        x = self.avgpool(x)   
        x = torch.flatten(x, 1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x
        

In [22]:
model = ResNet().to(device)

In [24]:
torchsummary.summary(model, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
            Conv2d-2           [-1, 64, 14, 14]           2,112
            Conv2d-3           [-1, 64, 14, 14]          36,928
            Conv2d-4           [-1, 64, 14, 14]           4,160
            Conv2d-5           [-1, 64, 14, 14]           2,112
      ResidulBlock-6           [-1, 64, 14, 14]               0
            Conv2d-7           [-1, 64, 14, 14]           4,160
            Conv2d-8           [-1, 64, 14, 14]          36,928
            Conv2d-9           [-1, 64, 14, 14]           4,160
     ResidulBlock-10           [-1, 64, 14, 14]               0
AdaptiveAvgPool2d-11             [-1, 64, 1, 1]               0
           Linear-12                   [-1, 64]           4,160
           Linear-13                   [-1, 10]             650
Total params: 95,690
Trainable params: 

# 2. Training logic

이전까지 진행 내용

In [30]:
import torch
from torch import nn 
from torch import optim
import torch.nn.functional as F

from torchvision import datasets, transforms

In [31]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [32]:
batch_size = 32

train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('dataset/', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize(mean=(0.5,), std=(0.5,))
                   ])),
    batch_size=batch_size,
    shuffle=True)

In [33]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channel, out_channel):
        super(ResidualBlock, self).__init__()
        
        self.in_channel, self.out_channel = in_channel, out_channel
        
        self.conv1 = nn.Conv2d(in_channel, out_channel, kernel_size=1, padding=0)
        self.conv2 = nn.Conv2d(out_channel, out_channel, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(out_channel, out_channel, kernel_size=1, padding=0)
        
        if in_channel != out_channel:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channel, out_channel, kernel_size=1, padding=0)
            )
        else:
            self.shortcut = nn.Sequential()
    
    def forward(self, x):
        out = F.relu(self.conv1(x))
        out = F.relu(self.conv2(out))
        out = F.relu(self.conv3(out))
        out = out + self.shortcut(x)
        return out

class ResNet(nn.Module):
    def __init__(self, color='gray'):
        super(ResNet, self).__init__()
        if color == "gray":
            self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        elif color == "rgb":
            self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
            
        self.resblock1 = ResidualBlock(32, 64)
        self.resblock2 = ResidualBlock(64, 64)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1,1))
        self.fc1 = nn.Linear(64, 64)
        self.fc2 = nn.Linear(64, 10)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = self.resblock1(x)
        x = self.resblock2(x)
        x = self.avgpool(x)
        x = torch.flatten(x,1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

In [35]:
model = ResNet().to(device)

In [36]:
optimizer = optim.Adam(model.parameters(), lr=0.03)

In [37]:
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [38]:
scheduler = ReduceLROnPlateau(optimizer, mode="min", verbose=True)

In [41]:
def train_loop(dataloader, model, loss_fn, optimizer, scheduler, epoch):
    # 학습
    model.train()
    size = len(dataloader)
    for batch, (x, y) in enumerate(dataloader):
        x, y = x.to(device), y.to(device)
        
        pred = model(x)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch%100==0:
            loss = loss.item()
            print("Epoch : {} : [{}/{}] loss : {}".format(epoch, batch, size, loss))
            
    scheduler.step(loss)
    
    return loss.item()

In [42]:
for epoch in range(10):
    loss = train_loop(train_loader, model, F.nll_loss, optimizer, scheduler, epoch)
    print(f"Epoch : {epoch} loss : {loss}")
# epoch 4 진행도중 정지, 아래 실습 진행

Epoch : 0 : [0/1875] loss : 8.253190994262695
Epoch : 0 : [100/1875] loss : 2.2681257724761963
Epoch : 0 : [200/1875] loss : 1.6840119361877441
Epoch : 0 : [300/1875] loss : 1.3234138488769531
Epoch : 0 : [400/1875] loss : 0.8275289535522461
Epoch : 0 : [500/1875] loss : 1.2665024995803833
Epoch : 0 : [600/1875] loss : 1.1125831604003906
Epoch : 0 : [700/1875] loss : 1.1044458150863647
Epoch : 0 : [800/1875] loss : 0.6034008860588074
Epoch : 0 : [900/1875] loss : 0.7538595795631409
Epoch : 0 : [1000/1875] loss : 0.538875937461853
Epoch : 0 : [1100/1875] loss : 1.2849065065383911
Epoch : 0 : [1200/1875] loss : 0.5863266587257385
Epoch : 0 : [1300/1875] loss : 0.9200574159622192
Epoch : 0 : [1400/1875] loss : 0.5049909353256226
Epoch : 0 : [1500/1875] loss : 0.6024105548858643
Epoch : 0 : [1600/1875] loss : 0.5764040946960449
Epoch : 0 : [1700/1875] loss : 0.49787086248397827
Epoch : 0 : [1800/1875] loss : 0.36794930696487427
Epoch : 0 loss : 0.7851536273956299
Epoch : 1 : [0/1875] loss 

KeyboardInterrupt: 

# 3. Model Save

## weight만 저장

In [49]:
torch.save(model.state_dict(), "./checkpoint/model_weights.pth")

In [51]:
model.load_state_dict(torch.load("./checkpoint/model_weights.pth"))

<All keys matched successfully>

### 구조도 함께 저장

In [52]:
torch.save(model, "./checkpoint/model.pth")

In [53]:
model = torch.load("./checkpoint/model.pth")

그리고 불러온 모델 또는 가중치로 학습 ㄱㄱ

## Save, Load and Resuming Training
- 학습 중 끊어야 되는 상황 또는 불시의 상황을 대비하여
- checkpoint 생성가능

In [55]:
checkpoint_path = "./checkpoint/checkpoint.pth"

In [56]:
torch.save({
    "epoch":epoch,
    "model_state_dict": model.state_dict(),
    "optimizer_state_dict": optimizer.state_dict(),
    "loss": loss
}, checkpoint_path)

In [70]:
model = ResNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.03)

In [71]:
checkpoint = torch.load(checkpoint_path)
checkpoint

{'epoch': 4,
 'model_state_dict': OrderedDict([('conv1.weight',
               tensor([[[[-4.8147e-01, -1.7826e+00,  1.5993e+00],
                         [ 1.4017e-01,  6.2568e-01,  1.3641e+00],
                         [ 2.3104e+00, -5.8971e-01, -1.1125e+00]]],
               
               
                       [[[-1.0574e-01,  1.3818e-01,  7.1724e-01],
                         [ 7.2353e-01,  6.1679e-01,  9.6337e-01],
                         [-2.4174e+00, -4.9676e-01, -1.1158e+00]]],
               
               
                       [[[ 1.8327e-01,  8.2706e-02,  4.9558e-02],
                         [-4.0852e-02, -9.9535e-03, -3.6118e-02],
                         [ 9.4717e-02, -1.7774e-02, -6.6554e-02]]],
               
               
                       [[[ 1.8812e+00,  2.5469e-01, -8.8553e-01],
                         [-1.9690e-01,  3.0844e-01, -8.4311e-01],
                         [ 1.7081e+00,  2.4167e-01, -7.5510e-01]]],
               
               
        

In [72]:
checkpoint.keys()

dict_keys(['epoch', 'model_state_dict', 'optimizer_state_dict', 'loss'])

In [73]:
model.load_state_dict(checkpoint["model_state_dict"])
save_epoch = checkpoint["epoch"]
optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
loss = checkpoint["loss"]

In [74]:
print("Load epoch : {}".format(save_epoch))

Load epoch : 4


In [76]:
# 정보를 불러와서 다시 학습시작
for epoch in range(save_epoch, 5):
    loss = train_loop(train_loader, model, F.nll_loss, optimizer, scheduler, epoch)
    print(f"Epoch : {epoch} loss : {loss}")

Epoch : 4 : [0/1875] loss : 0.4260527491569519
Epoch : 4 : [100/1875] loss : 0.4304354190826416
Epoch : 4 : [200/1875] loss : 0.13532590866088867
Epoch : 4 : [300/1875] loss : 0.20264580845832825
Epoch : 4 : [400/1875] loss : 0.39990535378456116
Epoch : 4 : [500/1875] loss : 0.8675734400749207
Epoch : 4 : [600/1875] loss : 0.6058720350265503
Epoch : 4 : [700/1875] loss : 0.28290966153144836
Epoch : 4 : [800/1875] loss : 0.6826493740081787
Epoch : 4 : [900/1875] loss : 0.2445315569639206
Epoch : 4 : [1000/1875] loss : 0.5505619645118713
Epoch : 4 : [1100/1875] loss : 0.45937439799308777
Epoch : 4 : [1200/1875] loss : 0.5163819789886475
Epoch : 4 : [1300/1875] loss : 0.46145346760749817
Epoch : 4 : [1400/1875] loss : 0.41588208079338074
Epoch : 4 : [1500/1875] loss : 0.29518380761146545
Epoch : 4 : [1600/1875] loss : 0.5683276653289795
Epoch : 4 : [1700/1875] loss : 0.4063260555267334
Epoch : 4 : [1800/1875] loss : 0.5476012229919434
Epoch : 4 loss : 0.2227802574634552
