[View in Colaboratory](https://colab.research.google.com/github/Hyunjulie/KR-Reading-Image-Segmentation-Papers/blob/master/Pytorch_Using_Pretrained.ipynb)

In [1]:
#Load Pytorch 
!pip3 install http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl
!pip3 install torchvision


Collecting torch==0.3.0.post4 from http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl
[?25l  Downloading http://download.pytorch.org/whl/cu80/torch-0.3.0.post4-cp36-cp36m-linux_x86_64.whl (592.3MB)
[K    100% |████████████████████████████████| 592.3MB 117.6MB/s 
Installing collected packages: torch
Successfully installed torch-0.3.0.post4
Collecting torchvision
[?25l  Downloading https://files.pythonhosted.org/packages/ca/0d/f00b2885711e08bd71242ebe7b96561e6f6d01fdb4b9dcf4d37e2e13c5e1/torchvision-0.2.1-py2.py3-none-any.whl (54kB)
[K    100% |████████████████████████████████| 61kB 2.3MB/s 
Collecting pillow>=4.1.1 (from torchvision)
[?25l  Downloading https://files.pythonhosted.org/packages/62/94/5430ebaa83f91cc7a9f687ff5238e26164a779cca2ef9903232268b0a318/Pillow-5.3.0-cp36-cp36m-manylinux1_x86_64.whl (2.0MB)
[K    100% |████████████████████████████████| 2.0MB 11.0MB/s 
Installing collected packages: pillow, torchvision
  Found existing installation

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler 
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time 
import os
import copy

In [0]:
#Examples of models supproted by torchvision.modles
vgg16 = models.vgg16_bn()
resnet50 = models.resnet50()

In [11]:
#Exploration on ResNet50 Model
#Pretrained 된 model 을 그냥 가져다가 쓸 때 --> 이번에는 ResNet 을 사용해서 
# 마지막 FC layer의 output class를 나의 데이터에 맞춰서 바꿔야 한다 
#일단 ResNet이 어떻게 생겼는지 Explore 할 수 있음 

#간단하게는 parameter만 print 할 수 있고 
for name, params in resnet50.named_children():
  print(name)
  
# #모든 layer의 function들까지 다 보고 싶으면 
# for name, child in resnet50.named_children():
#   for name2, params in child.named_parameters():
#     print(name, name2)
    
#마지막 layer가 fc 인걸 아니까 
print(resnet50.fc)


conv1
bn1
relu
maxpool
layer1
layer2
layer3
layer4
avgpool
fc
Linear(in_features=2048, out_features=1000)


In [15]:
#아니면 간단하게 
num_ftrs = resnet50.fc.in_features
out_ftrs = resnet50.fc.out_features
resnet50.fc = nn.Linear(num_ftrs,10) 
#필요한 output class의 개수만큼 fc 의 output filter 를 바꿔준다 
print(resnet50.fc) #바뀌어있음

Linear(in_features=2048, out_features=10)


### 원하는Layer 를 Freeze 해서 쓰기

In [0]:
#params.requires_grad가 false 라면 back propagation 할 때 weight 가 learn이 안된다 

#모든 layer 를 freeze 하기
for params in resnet50.parameters():
  params.requires_grad = False

#앞에 몇개 Layer만 freeze하기 - 여기서는 첫 3개 layer만  
ct = 0 
for name, child in resnet50.named_children():
  ct += 1 
  if ct < 7: 
    for name2, params in child.named_parameters():
      params.requires_grad = False 


## 다른 예시 - SqueezeNet
- Final layer 가 sequential 컨테이너 안에 있음 
- 안에 있는 모든 child layer를 보고 나서 out 개수 재지정 하고 다시 컨테이너 안에 넣어야 한다 


In [17]:
squeeze = torchvision.models.squeezenet1_1()
for name, params in squeeze.named_children():
  print(name)
  
#Feature 과 Classifier에 각각 어떤 child layer 가 있는지 혹시 보고 싶으면
print(list(squeeze.features.children()))
print(list(squeeze.classifier.children()))

features
classifier
[Conv2d (3, 64, kernel_size=(3, 3), stride=(2, 2)), ReLU(inplace), MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)), Fire(
  (squeeze): Conv2d (64, 16, kernel_size=(1, 1), stride=(1, 1))
  (squeeze_activation): ReLU(inplace)
  (expand1x1): Conv2d (16, 64, kernel_size=(1, 1), stride=(1, 1))
  (expand1x1_activation): ReLU(inplace)
  (expand3x3): Conv2d (16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (expand3x3_activation): ReLU(inplace)
), Fire(
  (squeeze): Conv2d (128, 16, kernel_size=(1, 1), stride=(1, 1))
  (squeeze_activation): ReLU(inplace)
  (expand1x1): Conv2d (16, 64, kernel_size=(1, 1), stride=(1, 1))
  (expand1x1_activation): ReLU(inplace)
  (expand3x3): Conv2d (16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (expand3x3_activation): ReLU(inplace)
), MaxPool2d(kernel_size=(3, 3), stride=(2, 2), dilation=(1, 1)), Fire(
  (squeeze): Conv2d (128, 32, kernel_size=(1, 1), stride=(1, 1))
  (squeeze_activation): ReLU(inplace)

In [19]:
#how many in_channels are there for conv layer 
in_ftrs = squeeze.classifier[1].in_channels
print(in_ftrs)
#위에 셀에 있는 마지막 convolution layer 의 shape을 보면 사실 알 수 있음 

out_ftrs = squeeze.classifier[1].out_channels
print(out_ftrs)


512
1000


마지막 convolutional layer 의 output 사이즈를 바꾸기 


squeeze net 은  마지막 layer 가 그냥 fc layer 로 되어 있는게 아니라 
- Classifier라고 불리는 컨테이너 안에 마지막 layer가 포함되어 있기 때문에 
- 컨테이너를 리스트로 풀고 
- 마지막 layer를 바꾸고 
- 다시 리스트를 컨테이너에 넣어서 
- 연결해야한다 

In [20]:
#classfier 의 layer를 리스트로 변환하기 
features = list(squeeze.classifier.children())
print(features)

[Dropout(p=0.5), Conv2d (512, 1000, kernel_size=(1, 1), stride=(1, 1)), ReLU(inplace), AvgPool2d(kernel_size=13, stride=1, padding=0, ceil_mode=False, count_include_pad=True)]


In [0]:
# 마지막 conv layer 를 내가 원하는 사이즈로 바꾸기 --> output class 의 갯수로 
features[1] = nn.Conv2d(in_ftrs, 100, kernel_size=(1,1), stride=(1,1))
features[3] = nn.AvgPool2d(12, stride=1)


In [0]:
#다시 붙어넣기 
squeeze.classifier = nn.Sequential(*features)

#바뀌어 있는걸 확인하기
print(list(squeeze.classifier.children()))

## 또 다른 예시 - VGG Net 
- 얘 또한 fc layer 가 컨테이너 안에 들어있음. 
- 컨테이너가를 읽은 후에 마지막 fc layer 를 우리의 데이터셋에 맞춰서 바꾸면 된다 

In [25]:
vgg = torchvision.models.vgg19(pretrained='imagenet')

print(list(vgg.classifier.children()))
#마지막 linear layer (fc)가 in 은 4096개, out 은 1000개 인것을 알 수 있음

Downloading: "https://download.pytorch.org/models/vgg19-dcbb9e9d.pth" to /root/.torch/models/vgg19-dcbb9e9d.pth
100%|██████████| 574673361/574673361 [00:10<00:00, 55789494.88it/s]


[Linear(in_features=25088, out_features=4096), ReLU(inplace), Dropout(p=0.5), Linear(in_features=4096, out_features=4096), ReLU(inplace), Dropout(p=0.5), Linear(in_features=4096, out_features=1000)]


In [27]:
#OR 다른 방법으로 마지막에 filter 개수 몇갠지 알아보는 방법 
num_ftrs = vgg.classifier[-1].in_features
#layer 가 몇개 있는지 모르면 걍 -1쓰기
print(num_ftrs)

#layer 를 list 로 바꾸고 마지막꺼만 없애기 
features = list(vgg.classifier.children())[:-1]

#맨 뒤에 원하는 class 의 개수에 따라서 n_class 를 조정해서 넣기 
features.extend([nn.Linear(num_ftrs, 100)])

##다시 붙여넣기 
vgg.classifier = nn.Sequential(*features)
print(list(vgg.classifier.children()))



4096
[Linear(in_features=25088, out_features=4096), ReLU(inplace), Dropout(p=0.5), Linear(in_features=4096, out_features=4096), ReLU(inplace), Dropout(p=0.5), Linear(in_features=4096, out_features=100)]


### 또 다른 예시 ~ Inception V3


In [28]:
inception = torchvision.models.inception_v3(pretrained='imagenet')

Downloading: "https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth" to /root/.torch/models/inception_v3_google-1a9a5a14.pth
100%|██████████| 108857766/108857766 [00:13<00:00, 7813116.32it/s] 


In [29]:
# 원하는 Layer만 freeze 해서 사용하는 방법~~~ 
# 일단 다 freeze 를 시킨다음에 원하는 layer  만 unfreeze 하는 2단계로 해야함

#1) 모든 layer freeze 하기
for i, param in inception.named_parameters():
  param.requires_grad = False 

# imagenet 으로  pretrained 된거라서 마지막에 1000classes 로 분류가 된다. 
#마지막 layer 의 output 갯수를 원하는걸로 바꾸기 
num_ftrs = inception.fc.in_features #일단 input을 몇개를 받고 있는지 확인하기 -> 똑같이 해줘야 함 
inception.fc = nn.Linear(num_ftrs, 10) #원하는 output의 갯수로 바꿔주기 

#print(list(inception.classifier.children()))
#바뀐거 확인하고 싶으면


#2) 특정 layer 전까지는 unfreeze 하기 
ct = []
for name, child in inception.named_children():
  if "Conv2d_4a_3x3" in ct:
    for params in child.parameters():
      params.requires_grad = True
  ct.append(name)
#어떤 Layer가 freeze 됐는지 확인하기 
for name, child in inception.named_children():
  for name2, params in child.named_parameters():
    print(name2, params.requires_grad)


conv.weight False
bn.weight False
bn.bias False
conv.weight False
bn.weight False
bn.bias False
conv.weight False
bn.weight False
bn.bias False
conv.weight False
bn.weight False
bn.bias False
conv.weight False
bn.weight False
bn.bias False
branch1x1.conv.weight True
branch1x1.bn.weight True
branch1x1.bn.bias True
branch5x5_1.conv.weight True
branch5x5_1.bn.weight True
branch5x5_1.bn.bias True
branch5x5_2.conv.weight True
branch5x5_2.bn.weight True
branch5x5_2.bn.bias True
branch3x3dbl_1.conv.weight True
branch3x3dbl_1.bn.weight True
branch3x3dbl_1.bn.bias True
branch3x3dbl_2.conv.weight True
branch3x3dbl_2.bn.weight True
branch3x3dbl_2.bn.bias True
branch3x3dbl_3.conv.weight True
branch3x3dbl_3.bn.weight True
branch3x3dbl_3.bn.bias True
branch_pool.conv.weight True
branch_pool.bn.weight True
branch_pool.bn.bias True
branch1x1.conv.weight True
branch1x1.bn.weight True
branch1x1.bn.bias True
branch5x5_1.conv.weight True
branch5x5_1.bn.weight True
branch5x5_1.bn.bias True
branch5x5_2.conv

In [31]:
# 이 방법이 괜히 귀찮게 2번 하는거면...>! 
#더 깔끔하게 원하는 Layer 까지만 freeze 하기 
child_counter = 0
for child in inception.children():
    if child_counter < 6:
        print("child ",child_counter," was frozen")
        for param in child.parameters():
            param.requires_grad = False
    elif child_counter == 6:
        children_of_child_counter = 0
        for children_of_child in child.children():
            if children_of_child_counter < 1:
                for param in children_of_child.parameters():
                    param.requires_grad = False
                print('child ', children_of_child_counter, 'of child',child_counter,' was frozen')
            else:
                print('child ', children_of_child_counter, 'of child',child_counter,' was not frozen')
            children_of_child_counter += 1

    else:
        print("child ",child_counter," was not frozen")
    child_counter += 1
    

child  0  was frozen
child  1  was frozen
child  2  was frozen
child  3  was frozen
child  4  was frozen
child  5  was frozen
child  0 of child 6  was frozen
child  1 of child 6  was not frozen
child  2 of child 6  was not frozen
child  3 of child 6  was not frozen
child  4 of child 6  was not frozen
child  5 of child 6  was not frozen
child  6 of child 6  was not frozen
child  7  was not frozen
child  8  was not frozen
child  9  was not frozen
child  10  was not frozen
child  11  was not frozen
child  12  was not frozen
child  13  was not frozen
child  14  was not frozen
child  15  was not frozen
child  16  was not frozen
child  17  was not frozen


In [0]:
#Pretrained Model 을 사용한 단순한 예시 - End to End 
## Loading the dataloaders -- Make sure that the data is saved in following way
"""
data/
  - train/
      - class_1 folder/
          - img1.png
          - img2.png
      - class_2 folder/
      .....
      - class_n folder/
  - val/
      - class_1 folder/
      - class_2 folder/
      ......
      - class_n folder/
"""

data_dir = "data/"
input_shape = 299
batch_size = 32
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
scale = 360
input_shape = 299 
use_parallel = True
use_gpu = True
epochs = 100

data_transforms = {
        'train': transforms.Compose([
        transforms.Resize(scale),
        transforms.RandomResizedCrop(input_shape),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.RandomRotation(degrees=90),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)]),
        'val': transforms.Compose([
        transforms.Resize(scale),
        transforms.CenterCrop(input_shape),
        transforms.ToTensor(),
        transforms.Normalize(mean, std)]),}



image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                      data_transforms[x]) for x in ['train', 'val']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                         shuffle=True, num_workers=4) for x in ['train', 'val']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
class_names = image_datasets['train'].classes

if use_parallel:
    print("[Using all the available GPUs]")
    model_conv = nn.DataParallel(model_conv, device_ids=[0, 1])

print("[Using CrossEntropyLoss...]")
criterion = nn.CrossEntropyLoss()

print("[Using small learning rate with momentum...]")
optimizer_conv = optim.SGD(list(filter(lambda p: p.requires_grad, model_conv.parameters())), lr=0.001, momentum=0.9)

print("[Creating Learning rate scheduler...]")
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_conv, step_size=7, gamma=0.1)

print("[Training the model begun ....]")
# train_model function is here: https://github.com/Prakashvanapalli/pytorch_classifiers/blob/master/tars/tars_training.py
model_ft = train_model(model_conv, dataloaders, dataset_sizes, criterion, optimizer_conv, exp_lr_scheduler, use_gpu,
                     num_epochs=epochs)



# More in 
# https://github.com/Spandan-Madan/Pytorch_fine_tuning_Tutorial/blob/master/main_fine_tuning.py 
# https://github.com/pytorch/tutorials/blob/master/beginner_source/transfer_learning_tutorial.py


## 특정한 Layer 의 weight/ bias 만 가져다가 쓰고 싶을 떄?


In [0]:
model_enc.linear_3d.weight = model_trained.linear_3d.weight
model_enc.linear_3d.bias = model_trained.linear_3d.bias
#이런식으로 그냥 가져다가 쓰면 된다 

####  Parameter 자체가 어떻게 생겼는지 보고 싶다면

In [0]:
for child in model.children():
    for param in child.parameters():
        print("Parameters: ",param)
        break
    break


### 중요!!!! freeze 를 했으면 optimizer를 바꿔줘야 한다 
원래는 
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.1)


이런식으로 model.parameters() 를 다 해줬겠지만,  
우리가 freeze 를 시켰기 때문에 error 가 날 것이다 


In [0]:
optimizer = torch.optim.RMSprop(filter(lambda p: p.requires_grad, model.parameters()), lr=0.1)



### 모델 저장하기 
pytorch 에서 추천하는 방법으로는 
"state dictionaries" 방법이다 (빠르고 차지하는 용량도 적음 )
 - 모델이 어떻게 생겼는지에대한 정보는 하나도 없고 
 - 그냥 Parameter/weight 의 값만 저장하는 것임 
 - 나중에 로딩 할 때 똑같은 structure 의 모델을 만들고 loading을 해야한다 
 

In [0]:
# MODEL_PATH로 저장한다고 할 때 

# Saving a Model
torch.save(model.state_dict(), MODEL_PATH)

# Loading the model.

# First create a model and define it's architecture as done above in this notebook. If you want a custom architecture.
# read below it's been covered below.
checkpoint = torch.load(MODEL_PATH)
model.load_state_dict(checkpoint)


#### 마지막 layer, 혹은 뒤에서 몇번째 layer만 지우려고 할 때는 위에서 했던것 처럼 걍 지우면 됨
왜 이런일을 할까? 
- classifier가 아니라 feature 를 필요로 할 때 쓴다 

In [0]:
new_model = nn.Sequential(*list(model.children())[:-1])
new_model_2_removed = nn.Sequential(*list(model.children())[:-2])


## Layer 더하기 
원래 모델과 똑같은 structure 가 아니라 뒤에 그냥 우리가 원하는 Layer 를 붙이려고 할 때는? 
- 리스트로 만든 다음에 append 하는 방법--> 보통을 할 수가 없는게, 위에서 얘기 한 것 처럼 loading 할 때 저장했던 '똑같은 ' structure/architecture 의 layer 로 불러 와야해서 list를 쓸 수가 없음 
- Adding layers on top - 
- 단순하게 custom 모델을 만들면 됨 

### Custom Models! 
- 모델의 앞에 반은 pretrained 
- 모델의 뒤에 반은 새로운거! 
- 몇개는 freeze 하고 싶고, 몇개는 update 될 수 있게 하고싶다 

 Segnet에서 써야하는 방법임 - 예시로 resnet 18을 써볼 것

In [0]:
import torch.nn as nn
import math
import torch.utils.model_zoo as model_zoo
import torch
from torch.autograd.variable import Variable
from torchvision import datasets, models, transforms

#새로운 모델은 하나의 class로 정의하기!!! 
#모델을 만든다는건 이 class 의 하나의 instance 를 만드는 것임 


#모델 structure: 반은 resent (반만 freeze 시킬 것임)
                                    #항상 nn.Module 을 inherit 시키게 하기 
class Resnet_Added_Layers_Half_Frozen(nn.Module):
    def __init__(self,LOAD_VIS_URL=None):
        super(Resnet_Added_Layers_Half_Frozen, self).__init__()
    
    #앞의 1/2 은 resnet 임 - pretrained 는 false 로 불러오고 원하는 갯수의 class 로 맞춰주기 
        model = models.resnet18(pretrained = False)
        num_final_in = model.fc.in_features
        model.fc = nn.Linear(num_final_in, 300)
        
        # Architecture 가 똑같으니 이제 weight/parameter를 불러올 수 있음 
        checkpoint = torch.load(MODEL_PATH)
        model.load_state_dict(checkpoint)
        
      #몇개 layer 만 Freeze 하기 (여기서는 앞에 6개 layer 만 )
        child_counter = 0
        for child in model.children():
            if child_counter < 6:
                for param in child.parameters():
                    param.requires_grad = False
            elif child_counter == 6:
                children_of_child_counter = 0
                for children_of_child in child.children():
                    if children_of_child_counter < 1:
                        for param in children_of_child.parameters():
                            param.requires_grad = False
                    else:
                    children_of_child_counter += 1

            else:
                print("child ",child_counter," was not frozen")
            child_counter += 1
        
        # 반 정도는 freeze 된 layer들 위에 내가 원하는 새로운 layer 들을 넣는다 
        # 가지고 있는 structure 위에 layer 를 더 더한다는 것은 'forward()'로 정의된다 

        #self 안에 있어야 함 
        self.vismodel = nn.Sequential(*list(model.children()))
        self.projective = nn.Linear(512,400)
        self.nonlinearity = nn.ReLU(inplace=True)
        self.projective2 = nn.Linear(400,300)
        
    #실제로 모델을 만드는 단계 (input 을 flowㄹ)
    def forward(self,x):
        x = self.vismodel(x)
        x = torch.squeeze(x)
        x = self.projective(x)
        x = self.nonlinearity(x)
        x = self.projective2(x)
        return x


## Training Custom Model 
- Loss function: 우리가 원하는 결과로부터 얼마나 떨어져 있는지 (얼마나 틀린지를) 수치화한 값
- optimizer: parameter들을 어떻게 업데이트 할것인가? (minimize loss 하기 위해서 )

Custom loss function 도 class를 써서 만들어야 함 - torch.nn.Module에서  inherit 된다 
- 보통 input의 dimension 을 바꿔줘야 할 것임 - view() 함수를 통해서 함 
- dimension 을 늘리고 싶으면 unsqueeze() 를 쓴다 
- Loss function 을 통과해서 나오는 최종값은 항상 SCALAR 여야 한다 (vector / tensor XXXX)
- paramter는 계속 업데이트 되기 때문에 variable 로 만들어줘야 한다 (x, y 둘다 variable  로 정의하기 ) - pytorch tensor 로 정의하기 






---

custom loss function - 
- X: (5, 10)의 형태 
- Y: (5, 5, 10 )의 형태 
- x 에 dimension 을 더하고, 그 더해진 y 의 dimension 에 맞춰서 반복해야함 
- (x - y )의 값은 (5, 5, 10) 모양이 될 것 
- scalar로 return 하기 위해서 torch.sum() 을 3번 사용하기 


In [0]:
class Regress_Loss(nn.Module):
    
    def __init__(self):
        super(Regress_Loss,self).__init__()
        
    def forward(self,x,y):
        y_shape = y.size()[1]
        x_added_dim = x.unsqueeze(1)
        x_stacked_along_dimension1 = x_added_dim.repeat(1,NUM_WORDS,1)
        diff = torch.sum((y - x_stacked_along_dimension1)**2,2)
        totloss = torch.sum(torch.sum(torch.sum(diff)))
        return totloss


## Save model parameters and loading!!!


In [0]:
  model = MyModel()
  # ... after training, save your model 
  model.save_state_dict('mytraining.pt')

  # .. to load your previously training model:
  model.load_state_dict(torch.load('mytraining.pt'))
pretrained_dict = ...
model_dict = model.state_dict()

# 1. filter out unnecessary keys
pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
# 2. overwrite entries in the existing state dict
model_dict.update(pretrained_dict) 
# 3. load the new state dict
model.load_state_dict(model_dict)


In [0]:
pre_trained_model=torch.load("Path to the .pth file")
 new=list(pre_trained.items())

my_model_kvpair=mymodel.state_dict()
count=0
for key,value in my_model_kvpair.item():
  layer_name,weights=new[count]      
mymodel_kvpair[key]=weights
count+=1
2
