<a href="https://colab.research.google.com/github/PANDASANG1231/Deeplearning_byHand/blob/main/037_Finetune.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import sys
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
sys.path.append('/content/drive/MyDrive/Colab Notebooks/deeplearning_note')
from tool import *

Mounted at /content/drive


In [None]:
! pip install d2l

In [2]:
! nvidia-smi

Sun Jan 30 06:11:11 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Resnet Finetune vs Resnet

 - Finetune Use Resnet pretrain model. So, we need to do some preprocess
   - The resnet model has normalize, so we need to do that as well.
   - The resnet has a input size of 224, so we need to do the resize.


In [27]:
from d2l import torch as d2l

d2l.DATA_HUB['hotdog'] = (d2l.DATA_URL + 'hotdog.zip', 'fba480ffa8aa7e0febbb511d181409f899b9baa5')

data_dir = d2l.download_extract('hotdog')

In [30]:
normalize = torchvision.transforms.Normalize(
    [0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

train_augs = torchvision.transforms.Compose([
    torchvision.transforms.RandomResizedCrop(224),
    torchvision.transforms.ToTensor(),
    normalize])

test_augs = torchvision.transforms.Compose([
    torchvision.transforms.Resize(256),
    torchvision.transforms.CenterCrop(224),
    torchvision.transforms.ToTensor(),
    normalize])

train_dataset = torchvision.datasets.ImageFolder(os.path.join(data_dir, 'train'), transform=train_augs)
test_dataset = torchvision.datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=test_augs)

train_dataloader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=64, shuffle=False)

device = torch.device('cuda')

### No Finetune

 - No pretrain, use xavier as weight initiate
 - learning_rate = 0.05, final accuracy is 0.834
 - training speed: 608 examples/sec on cuda

In [53]:
resnet = torchvision.models.resnet18(pretrained=False)
resnet.fc = nn.Linear(resnet.fc.in_features, 2)

def init_xavier(m):
    if type(m) == nn.Linear or type(m) == nn.Conv2d:
        nn.init.xavier_uniform_(m.weight)
resnet.apply(init_xavier)


loss = nn.CrossEntropyLoss()
learning_rate = 0.05
optimizer = torch.optim.Adam(params=resnet.parameters(), lr=learning_rate)

train_p2(num_epochs=10, 
         net=resnet,  
         loss=loss, 
         train_iter=train_dataloader,
         test_iter=test_dataloader,
         device=device,
         optimizer=optimizer)

training on cuda
loss 2.008, train acc 0.534, test acc 0.623
loss 0.593, train acc 0.757, test acc 0.799
loss 0.419, train acc 0.806, test acc 0.823
loss 0.406, train acc 0.826, test acc 0.831
loss 0.393, train acc 0.829, test acc 0.836
loss 0.396, train acc 0.825, test acc 0.810
loss 0.376, train acc 0.831, test acc 0.850
loss 0.373, train acc 0.828, test acc 0.869
loss 0.371, train acc 0.830, test acc 0.846
loss 0.373, train acc 0.837, test acc 0.834
608.9 examples/sec on cuda


### Finetune(different learning rate)

  - Just xavier last fully connected layer
  - Different learning rate in different layer, last layer 10 times
  - learning rate is 0.0001, but accuracy is 0.963. (Far better)
  - training speed is the same, 608 examples per sec on cuda


In [35]:
resnet = torchvision.models.resnet18(pretrained=True)
resnet.fc = nn.Linear(resnet.fc.in_features, 2)
nn.init.xavier_uniform_(resnet.fc.weight)

loss = nn.CrossEntropyLoss()
learning_rate = 0.0001
param_norm = [param for name, param in resnet.named_parameters() if name not in ['fc.weight', 'fc.bias']]
optimizer = torch.optim.Adam(params=[{"params": param_norm},
                                    {"params": resnet.fc.parameters(), "lr": learning_rate * 10}], lr=learning_rate)


train_p2(num_epochs=10, 
         net=resnet,  
         loss=loss, 
         train_iter=train_dataloader,
         test_iter=test_dataloader,
         device=device,
         optimizer=optimizer)

training on cuda
loss 0.308, train acc 0.881, test acc 0.938
loss 0.136, train acc 0.946, test acc 0.948
loss 0.135, train acc 0.951, test acc 0.938
loss 0.125, train acc 0.950, test acc 0.944
loss 0.091, train acc 0.966, test acc 0.944
loss 0.086, train acc 0.968, test acc 0.960
loss 0.065, train acc 0.975, test acc 0.963
loss 0.071, train acc 0.974, test acc 0.938
loss 0.067, train acc 0.972, test acc 0.949
loss 0.061, train acc 0.980, test acc 0.954
608.2 examples/sec on cuda


### Finetune(require grad=True)
  - Just xavier last fully connected layer
  - Only update grads in the last 
  - learning rate is 0.0035, but accuracy is 0.926. (Far better)
  - training speed is the faster(3 times), 1650 examples per sec on cuda

In [41]:
resnet = torchvision.models.resnet18(pretrained=True)
resnet.fc = nn.Linear(resnet.fc.in_features, 2)
nn.init.xavier_uniform_(resnet.fc.weight)

loss = nn.CrossEntropyLoss()
learning_rate = 0.0035

for name, param in resnet.named_parameters():
    if name not in ['fc.weight', 'fc.bias']:
        param.requires_grad = False

optimizer = torch.optim.Adam(params=resnet.parameters(), lr=learning_rate)


train_p2(num_epochs=10, 
         net=resnet,  
         loss=loss, 
         train_iter=train_dataloader,
         test_iter=test_dataloader,
         device=device,
         optimizer=optimizer)

training on cuda
loss 0.579, train acc 0.715, test acc 0.890
loss 0.300, train acc 0.873, test acc 0.905
loss 0.277, train acc 0.881, test acc 0.910
loss 0.241, train acc 0.904, test acc 0.920
loss 0.235, train acc 0.905, test acc 0.926
loss 0.249, train acc 0.900, test acc 0.925
loss 0.243, train acc 0.902, test acc 0.921
loss 0.227, train acc 0.901, test acc 0.929
loss 0.218, train acc 0.916, test acc 0.915
loss 0.232, train acc 0.904, test acc 0.926
1650.2 examples/sec on cuda
