# Model Save & Load Practice

In [1]:
import os
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torchsummary import summary

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Define VGG11

In [5]:
import torchvision
vgg = torchvision.models.vgg11(pretrained=False).to(device)
summary (vgg, batch_size = -1, input_size = (3,32,32), device = 'cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
              ReLU-2           [-1, 64, 32, 32]               0
         MaxPool2d-3           [-1, 64, 16, 16]               0
            Conv2d-4          [-1, 128, 16, 16]          73,856
              ReLU-5          [-1, 128, 16, 16]               0
         MaxPool2d-6            [-1, 128, 8, 8]               0
            Conv2d-7            [-1, 256, 8, 8]         295,168
              ReLU-8            [-1, 256, 8, 8]               0
            Conv2d-9            [-1, 256, 8, 8]         590,080
             ReLU-10            [-1, 256, 8, 8]               0
        MaxPool2d-11            [-1, 256, 4, 4]               0
           Conv2d-12            [-1, 512, 4, 4]       1,180,160
             ReLU-13            [-1, 512, 4, 4]               0
           Conv2d-14            [-1, 51

In [6]:
class VGG11(nn.Module):
    
    def __init__(self, num_classes=1000):
        super().__init__()
        self.conv1_1 = self.make_conv_relu(3, 64)
        
        self.conv2_1 = self.make_conv_relu(64, 128)

        self.conv3_1 = self.make_conv_relu(128, 256)
        self.conv3_2 = self.make_conv_relu(256, 256)

        self.conv4_1 = self.make_conv_relu(256, 512)
        self.conv4_2 = self.make_conv_relu(512, 512)

        self.conv5_1 = self.make_conv_relu(512, 512)
        self.conv5_2 = self.make_conv_relu(512, 512)
        
        self.maxpool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.relu = nn.ReLU()
        
        self.linear_layers = nn.Sequential(
            nn.Linear(512, 4096),
            nn.ReLU(),
            nn.Dropout2d(0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout2d(0.5),
            nn.Linear(4096, num_classes)
        )

    def make_conv_relu(self, in_channels, out_channel):
        layers = []
        layers += [nn.Conv2d(in_channels, out_channel, kernel_size=3, padding=1),  #using kernel size 3, padding 1 -> keep the spatial dimension of tensor
                   nn.ReLU()]
        return nn.Sequential(*layers)
    
        
    def forward(self, x):
        # x shape: [Batchsize, 3, 32, 32]
        
        x = self.conv1_1(x) # [Batchsize, 64, 32, 32]
        x = self.maxpool(x) # [Batchsize, 64, 16, 16]
        
        x = self.conv2_1(x) # [Batchsize, 128, 16, 16]
        x = self.maxpool(x) # [Batchsize, 128, 8, 8]
        
        x = self.conv3_1(x) # [Batchsize, 256, 8, 8]
        x = self.conv3_2(x) # [Batchsize, 256, 8, 8]
        x = self.maxpool(x) # [Batchsize, 256, 4, 4]
        
        x = self.conv4_1(x) # [Batchsize, 512, 4, 4]
        x = self.conv4_2(x) # [Batchsize, 512, 4, 4]
        x = self.maxpool(x) # [Batchsize, 512, 2, 2]
        
        x = self.conv5_1(x) # [Batchsize, 512, 2, 2]
        x = self.conv5_2(x) # [Batchsize, 512, 2, 2]
        x = self.maxpool(x) # [Batchsize, 512, 1, 1]

        x = x.view(x.size(0), -1) # [Batchsize, 512]
        
        x = self.linear_layers(x) # [Batchsize, num_classes]

        
        return x

In [10]:
vgg = VGG11(10).to(device)
summary(vgg, batch_size=-1, input_size=(3, 32, 32), device='cuda')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
              ReLU-2           [-1, 64, 32, 32]               0
         MaxPool2d-3           [-1, 64, 16, 16]               0
            Conv2d-4          [-1, 128, 16, 16]          73,856
              ReLU-5          [-1, 128, 16, 16]               0
         MaxPool2d-6            [-1, 128, 8, 8]               0
            Conv2d-7            [-1, 256, 8, 8]         295,168
              ReLU-8            [-1, 256, 8, 8]               0
            Conv2d-9            [-1, 256, 8, 8]         590,080
             ReLU-10            [-1, 256, 8, 8]               0
        MaxPool2d-11            [-1, 256, 4, 4]               0
           Conv2d-12            [-1, 512, 4, 4]       1,180,160
             ReLU-13            [-1, 512, 4, 4]               0
           Conv2d-14            [-1, 51

## python dict type
- key 와 value로 이루어진 datatype
- torch의 weight는 key : layer 이름 과 value : weights 의 형태로 구성

In [16]:
# dict_ex, print dict, show state_dict, weights for layer
a = {'name': 'dongmin', 'age' : 26, 'list' : [] }
a['name'] = 'kwonyoung'
print(a['name'])

kwonyoung


In [18]:
for k, v in a.items():
    print(k, v)

name kwonyoung
age 26
list []


In [24]:
state_dict = vgg.state_dict()
print(state_dict.keys())
weight = state_dict['conv1_1.0.weight']
print(weight.shape)

odict_keys(['conv1_1.0.weight', 'conv1_1.0.bias', 'conv2_1.0.weight', 'conv2_1.0.bias', 'conv3_1.0.weight', 'conv3_1.0.bias', 'conv3_2.0.weight', 'conv3_2.0.bias', 'conv4_1.0.weight', 'conv4_1.0.bias', 'conv4_2.0.weight', 'conv4_2.0.bias', 'conv5_1.0.weight', 'conv5_1.0.bias', 'conv5_2.0.weight', 'conv5_2.0.bias', 'linear_layers.0.weight', 'linear_layers.0.bias', 'linear_layers.3.weight', 'linear_layers.3.bias', 'linear_layers.6.weight', 'linear_layers.6.bias'])
torch.Size([64, 3, 3, 3])


In [26]:
vgg = vgg.to(device)
vgg.train()

In [36]:
# make checkpoints dir, save & load epoch 0 state_dict
import os
if not os.path.exists('check'):
    os.makedirs('check')
torch.save(vgg.state_dict(), 'check/vgg_epoch0.pth')
dictionary = {'state_dict' : vgg.state_dict(),
             'loss' : 1.0,
             'epoch' : 10}
torch.save(dictionary, 'check/test.pth')
state_dict0 = torch.load('check/vgg_epoch0.pth')
state_dict_test = torch.load('check/test.pth')

In [38]:
state_dict_test.keys()

dict_keys(['state_dict', 'loss', 'epoch'])

In [39]:
# Loading and normalizing CIFAR-10
import torch
import torchvision
import torchvision.transforms as transforms

import torch.optim as optim


transform = transforms.Compose([
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) #normalization value for cifar10
    ])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                      download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                         shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                      download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4,
                                        shuffle=False, num_workers=2)


classes = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg.parameters(), lr=0.001)

print(len(trainloader))
print(len(testloader))

Files already downloaded and verified
Files already downloaded and verified
782
2500


## Save models during training

In [43]:
### Train the network
vgg.to(device)
vgg.train()
epochs = 3
for epoch in range(1, epochs+1):
    running_loss = 0.0
    correct = 0
    total = 0
    for i, data in enumerate(trainloader):
        # get the inputs
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        # implement
        # forward + backward + optimize + loss aggregate
        optimizer.zero_grad()
        outputs = vgg(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        # print statistics
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                 (epoch, i + 1, running_loss / 100))
            running_loss = 0.0
    
    #save models
    if epoch % 3 == 0:
        torch.save(
            {
                'state_dict': vgg.state_dict(),
                'epoch': epoch,
                'running_loss' : running_loss
            },
            f'./check/vgg_epoch{epoch}.pth')
    torch.save(vgg.state_dict(), f'./check/vgg_latest.pth')
    
print('Finished Training')

[1,   100] loss: 1.177
[1,   200] loss: 1.175
[1,   300] loss: 1.175
[1,   400] loss: 1.117
[1,   500] loss: 1.095
[1,   600] loss: 1.080
[1,   700] loss: 1.047
[2,   100] loss: 0.978
[2,   200] loss: 0.979
[2,   300] loss: 0.995
[2,   400] loss: 0.982
[2,   500] loss: 0.980
[2,   600] loss: 0.945
[2,   700] loss: 0.905
[3,   100] loss: 0.878
[3,   200] loss: 0.837
[3,   300] loss: 0.874
[3,   400] loss: 0.845
[3,   500] loss: 0.847
[3,   600] loss: 0.845
[3,   700] loss: 0.834
Finished Training


In [44]:
#load model
vgg_before = VGG11(10)
checkpoint = torch.load("./check/vgg_epoch1.pth")

In [45]:
vgg_before.load_state_dict(checkpoint['state_dict'])

<All keys matched successfully>

In [46]:
state_dict_before = vgg_before.state_dict()
state_dict_after = vgg.state_dict()

print(state_dict_before['conv1_1.0.weight'][0,0])
print(state_dict_after['conv1_1.0.weight'][0,0])

tensor([[ 0.1272, -0.0377, -0.0771],
        [-0.0828, -0.1401,  0.0353],
        [-0.1506,  0.1945,  0.1298]])
tensor([[ 0.1399, -0.0530, -0.0927],
        [-0.0767, -0.1372,  0.0416],
        [-0.1508,  0.2058,  0.1322]], device='cuda:0')
