In [1]:
import torch
from torch import optim, nn
from jcopdl.callback import Callback,set_config

https://towardsdatascience.com/review-mobilenetv2-light-weight-model-image-classification-8febb490e61c

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

# Dataset & Dataloader

In [3]:
from torch.utils.data import DataLoader
from torchvision import datasets,transforms
from jcopdl.utils.dataloader import MultilabelDataset

https://pytorch.org/hub/pytorch_vision_mobilenet_v2/
    
""""""
All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB images 
of shape (3 x H x W), where H and W are expected to be at least 224. The images have to be loaded in to a range of [0, 1] and 
then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225].
""""""

In [4]:
bs= 64
crop_size= 224

train_transform=transforms.Compose([
    transforms.RandomRotation(15),
    transforms.RandomResizedCrop(crop_size,scale=(0.7,1.0)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

test_transform= transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


train_set = MultilabelDataset("data/train_label.csv","data/train/",transform=train_transform, fname_col="fname")
trainloader = DataLoader(train_set,batch_size=bs,shuffle=True,num_workers=2)

test_set = MultilabelDataset("data/test_label.csv","data/test/",transform=test_transform, fname_col="fname")
testloader = DataLoader(test_set,batch_size=bs, shuffle=True,num_workers=2)

In [5]:
label2cat= train_set.classes

# Arsitektur dan Config
*cara pakai pretrained model

- Load a pretrained-model
- freeze the feature extractor
- modify the classifier to our data, leave it unfreezed
- train only on the classifier

In [6]:
from torchvision.models import mobilenet_v2

In [7]:
mnet = mobilenet_v2(pretrained=True)

In [8]:
for param in mnet.parameters():
    param.requires_grad = False

In [9]:
mnet

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=Tr

In [10]:
mnet.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=False)
  (1): Linear(in_features=1280, out_features=1000, bias=True)
)

In [11]:
mnet.parameters()

<generator object Module.parameters at 0x0000025E18A904C8>

In [12]:
mnet.classifier= nn.Sequential(
    nn.Linear(1280, 5),
    nn.Sigmoid()
)

In [13]:
mnet

MobileNetV2(
  (features): Sequential(
    (0): ConvBNReLU(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvBNReLU(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=Tr

In [14]:
class CustomMobilenetV2(nn.Module):
    def __init__(self):
        super().__init__()
        self.mnet = mobilenet_v2(pretrained=True)
        self.freeze()
        self.mnet.classifier= nn.Sequential(
            nn.Linear(1280, 5),
            nn.Sigmoid()
        )
    def forward(self,x):
        x= self.mnet(x)
        return x
    def freeze(self):
        for param in self.mnet.parameters():
            param.requires_grad = False
    def unfreeze(self):
        for param in self.mnet.parameters():
            param.requires_grad = True

In [15]:
config = set_config({
    "batch_size" : bs,
    "crop_size" : crop_size 
})

# Training Preprataion ~> MCOC

###### phase 1 : addaptive (learning rate standard + patience kecil, ex =2)

In [16]:
model = CustomMobilenetV2().to(device)
criterion = nn.BCELoss()
optimizer = optim.AdamW(model.parameters(),lr=0.001)
callback = Callback(model,config,early_stop_patience=2,outdir="model")

In [20]:
from tqdm.auto import tqdm

In [24]:
def loop_fn (mode, dataset, dataloader, model, criterion, optimizer, device):
    if mode =="train":
        model.train()
    elif mode == "test":
        model.eval()
        
    cost = 0
    for feature,target in tqdm(dataloader,desc = mode.title()):
        feature, target = feature.to(device), target.to(device)
        output = model(feature)
        loss = criterion(output, target)
        
        if mode == "train":
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        cost += loss.item()*feature.shape[0]
    cost=cost/len(dataset)
    return cost

In [None]:
while True :
    train_cost= loop_fn("train",train_set,trainloader,model,criterion,optimizer,device)
    with torch.no_grad():
        test_cost= loop_fn("test",test_set,testloader,model,criterion,optimizer,device)

    callback.log(train_cost, test_cost)


    callback.save_checkpoint()


    callback.cost_runtime_plotting()



    if callback.early_stopping(model, monitor='test_cost'):
        callback.plot_cost()
        break

HBox(children=(FloatProgress(value=0.0, description='Train', max=26.0, style=ProgressStyle(description_width='…

##### phase 2, fIne tuning

In [None]:
model.unfreeze()
optimizer=optim.AdamW(model.parameters(),lr=1e-5)

callback.reset_early_stop()
callback.early_stop_patience = 5

In [None]:
while True :
    train_cost= loop_fn("train",train_set,trainloader,model,criterion,optimizer,device)
    with torch.no_grad():
        test_cost= loop_fn("test",test_set,testloader,model,criterion,optimizer,device)

    callback.log(train_cost, test_cost)


    callback.save_checkpoint()


    callback.cost_runtime_plotting()



    if callback.early_stopping(model, monitor='test_cost'):
        callback.plot_cost()
        break

# Predict

In [None]:
feature,target= next(iter(testloader))
feature, target = feature.to(device), target.to(device)

In [None]:
with torch.no_grad():
    model.eval()
    output = model(feature)
    pred = (output>0.5).to(torch.float32)
pred

# Sanity Check

* untuk data multilabel dalam hal visualisasi perlu ada nya step tambahan dimana data nya harus kita inverse kembali karena data kita tadi udah kena normalisasi, maka nya kita perlu inverse balik dengan angka normalisasi tadi

In [None]:
def convert(x):
    return [label for pred, label in zip(x, label2cat) if pred==1]

def inverse(img):
    img[0, :, :]= img[0, :, :]*0.229 + 0.485
    img[1, :, :]= img[1, :, :]*0.224 + 0.456
    img[2, :, :]= img[2, :, :]*0.225 + 0.406
    return img

In [None]:
import numpy as np
import matplotlib.pyplot as plt

In [None]:
fig, axes= plt.subplots(6, 6, figsize= (24, 24))
for img, label,pred, ax in zip(feature, target, pred,axes.flatten()):
    ax.imshow(inverse(img).permute(1,2,0).cpu())
    font = {"color":"r"} if (pred != label).any() else {"color" :"g"}
    label.pred = convert(label),convert(pred)
    ax.set_title(f"label: {label}\nPred: {pred}",fontdict=font);
    ax.axis("off");