# **Transfer Learning**

**```Transfer learning yaitu suatu teknik dimana kita memakai sebuah arsitektur seseorang yang kita gunakan untuk menyelesaikan masalah kita, contoh nya yaitu kita menggunakan Arsitektur CNN sang juara seperti Arsitektur GoogLeNet, trus VGG yang memenagkan penghargaan ILRSVC```**

Ada dua langkah dalam transfer learning yaitu:
### **Langkah 1 Adapatasi**
    - Load model Juara
    - Bekukan waeight model (ekstraktor) yang akai dipakai agar arsitektur yang kita pakai tidak rusak
    - Ubah kepala(clasifier) dari arsitektur tersebut karna outpun yang kita harapkan adalah output yang sesuai dengan masalah kita
    - Train kepala(clasifier) tersebut
### **Langkah 2 Fine Tuning**
    - Lepaskan pembekuan pada semua layer atau sebagian
    - pada saat Training gunakan learning rate yang lebih kecil yakni 10% dari training pada masa adaptasi gar wight dari arsitektur yang kita gunakan tidak rusak
    - ulangi kembali tahap finet tuning dengan learning rate yang lebih kecil jika di perlukan
    
### **Aturan umum Transfer Learning**
- Jika dataset yang kita punya sedikit, dan mirip dengan data yang di train dari arsitektur yang kita load cukup dengan menggati clasifier(kepala) dari arsitektur tersebut
- Jika dataset yang kita punya sedikit, dan tidak mirip dengan data yang di train dari arsitektur yang kita load wasssalam ya, jadi kesimpulannya arsitektur sang juara tidak bisa digunakan dan kita harus buat model sendiri

# **Clasifikasi Panorama**

In [1]:
import numpy as np
import matplotlib.pyplot as plt

import torch

from torch import nn, optim
from jcopdl.callback import Callback, set_config

#cek apakah gpu bisa digunakan
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

# **Data Set Dan Data Loader**

aturan arsitektur/model mobilenetV2
https://pytorch.org/hub/pytorch_vision_mobilenet_v2/

In [3]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from jcopdl.utils.dataloader import MultilabelDataset

In [14]:
#karna model/arsitektur sang juara input size nya 224 maka kita harus mengikuti nya
crop_size = 224
#bacth size
bs = 64


train_transform = transforms.Compose([
    transforms.RandomRotation(10),
    transforms.RandomResizedCrop(crop_size, scale=(0.7, 1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    #aturan dari mobilenetV2
    transforms.Resize(230),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_set = MultilabelDataset("data/train_label.csv", "data/train/", transform=train_transform)
trainloader = DataLoader(train_set, batch_size = bs, shuffle = True)

test_set = MultilabelDataset("data/test_label.csv", "data/test/", transform=test_transform)
testloader = DataLoader(test_set, batch_size = bs, shuffle = True)

In [15]:
images, labels = next(iter(trainloader))
images.shape

torch.Size([64, 3, 224, 224])

In [16]:
labelcat= train_set.classes
labelcat

['desert', 'mountains', 'sea', 'sunset', 'trees']

# **Arsitektur Dan Config**

### **Contoh Pretrained model**

In [17]:
from torchvision.models import mobilenet_v2

In [18]:
mnet = mobilenet_v2(pretrained = True) # True => download model dan weight, False => download arsitektur doang

Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /home/rizki/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth


  0%|          | 0.00/13.6M [00:00<?, ?B/s]

In [19]:
#setelah load bekuin weight
for param in mnet.parameters():
    #sehingga dia tidak akan menghitung gradien, yang menyebabkan weight nya tidak akan terupdate
    param.requires_grad = False

In [20]:
mnet

MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momen

In [21]:
mnet.classifier = nn.Sequential(
    nn.Linear(1280, 5),
    nn.Sigmoid()
)

In [22]:
mnet

MobileNetV2(
  (features): Sequential(
    (0): ConvBNActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momen

# **Arsitektur Config**

In [23]:
class CostomMobileNetV2(nn.Module):
    def __init__(self, output_Size):
        super().__init__()
        #load Model
        self.mnet = mobilenet_v2(pretrained = True)
                                 
        #bekukan model
        self.freeze()
                                 
        #ubah clasifier
        self.mnet.classifier = nn.Sequential(
            nn.Linear(1280, output_Size),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        return self.mnet(x)
        
    def freeze(self):
        for param in self.mnet.parameters():
            param.requires_grad = False
    
    def unfreeze(self):
        for param in self.mnet.parameters():
            param.requires_grad = True

In [25]:
config = set_config({
    "output_size":len(train_set.classes),
    "batch_size" :bs,
    "crop_size" : crop_size
})

# **Fase 1 Adaptasi (lr standart + erlystoping/patience kecil)**

In [None]:
#konsep MCOC
model = CostomMobileNetV2(config.output_size).to(device)
criterion = nn.BCELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001)
callback = Callback(model, config, outdir="model", early_stop_patience = 2)

### ingat pada multi label data tidak bisa di hitung akurasi nya ###

In [None]:
from tqdm.auto import tqdm

def loop_fn(mode, dataset, dataloader, model, criterion, optimizer, device):
    if mode == "train":
        model.train()
    elif mode == "test":
        model.eval()
    
    cost = 0
    for feature, target in tqdm(dataloader, desc=mode.title()):
        feature, target = feature.to(device), target.to(device)
        output = model(feature) #fedforward
        loss = criterion(output, target)
        
        if mode == "train":
            loss.backward() #backprop
            optimizer.step() #update weight
            optimizer.zero_grad()
        
        cost += loss.item() * feature.shape[0]
        
    cost = cost / len(dataset)
    return cost

In [None]:
while True:
    train_cost= loop_fn("train", train_set, trainloader, model, criterion, optimizer, device)
    with torch.no_grad():
        test_cost = loop_fn("test", test_set, testloader, model, criterion, optimizer, device)
    
    #Logging
    callback.log(train_cost, test_cost)
    
    #Checkpoint
    callback.save_checkpoint()
    
    #Runtime Plotting
    callback.cost_runtime_plotting()
    
    #Early Stopping
    if callback.early_stopping(model, monitor="test_score"):
        callback.plot_cost()
        break

# **Fase 2 Adaptasi (lr kecil + erlystoping/patience tambah)**

In [None]:
model.unfreeze()
optimezer = optim.AdamW(model.parameters(), lr=1e-5)

callback.reset_early.stop()
callback.early_stop_patience = 5

In [None]:
while True:
    train_cost = loop_fn("train", train_set, trainloader, model, criterion, optimizer, device)
    with torch.no_grad():
        test_cost = loop_fn("test", test_set, testloader, model, criterion, optimizer, device)
    
    #Logging
    callback.log(train_cost, test_cost)
    
    #Checkpoint
    callback.save_checkpoint()
    
    #Runtime Plotting
    callback.cost_runtime_plotting()
    
    #Early Stopping
    if callback.early_stopping(model, monitor="test_cost"):
        callback.plot_cost()
        break

# **Predict**

In [None]:
feature, target = next(iter(testloader))
feature, target = feature.to(device), target.to(device)

In [None]:
with torch.no_grad():
    model.eval()
    output = model(feature)
    preds = (output > 0.5).to(torch.float32)
preds

# **Sanity Check**

In [None]:
def convet_to_label(x):
    return [label for pred, label in zip(x,labelcat) if pred == 1]

def invers_norm(img):
    img[0, :, :] = img [0, :, :] * 0.229 + 0.485
    img[1, :, :] = img [1, :, :] * 0.224 + 0.456
    img[2, :, :] = img [2, :, :] * 0.225 + 0.406
    return img

In [None]:
fig, axes = plt.subplots(6, 6, figsize=(24,24))
for img, label, pred, ax in zip(feature, target, preds, axes.flatten()):
    ax.imshow(invers_norm(img).permute(1,2,0).cpu())
    font = {"color":"r"} if (pred != label).any() else {"color":"g"}
    label, pred = convet_to_label(label), convet_to_label(pred)
    ax.set_title(f"Label: {label} | Pred {pred}", fontdict = font)
    ax.axis("off")