## Data

In [None]:
from data import get_data_paths, celeb2mask, CustomDataset
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import time
import torch

from warnings import filterwarnings
filterwarnings("ignore")

%reload_ext autoreload
%autoreload 2

In [None]:
%%time
# get data paths
data_paths = get_data_paths(celeb_img_path="../data/dataset_celebs/imgs_256/*.jpg",
                            celeb_mask_path="../data/dataset_celebs/masks_256/")
data_paths = [i for i in data_paths if i[0] == "community_dataset"]
# data_paths = [i for i in data_paths if i[0] != "community_dataset"]
# np.random.shuffle(data_paths)
# data_paths = data_paths[:100]

# split data
train_paths, test_paths = train_test_split(data_paths, test_size=0.2, random_state=42)
test_paths, val_paths = train_test_split(test_paths, test_size=0.5, random_state=42)

print(f"Train: {len(train_paths)}, Val: {len(val_paths)}, Test: {len(test_paths)}")

# Datasets
train_dataset = CustomDataset(train_paths)
val_dataset = CustomDataset(val_paths)
test_dataset = CustomDataset(test_paths)

# Dataloaders
torch.manual_seed(42)
train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=8, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

del data_paths, train_paths, test_paths, val_paths

## Training

In [None]:
from pretrained_models import PretrainedModel, train_loop

In [None]:
# utils
epochs = 15
test_metrics = pd.DataFrame()

## Baseline Model

In [None]:
# Weights tests
arch = "unet"
loss_name = "BCEWeighted"
optimizer_name = "Adam"
encoder_name = "xception"
weights = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
           [0.5, 3, 3, 1, 0.5, 3, 2, 0.5, 1.5, 1.5, 1.5],
           [0.02335332, 2.29093098, 4.60850608, 0.95243978, 0.16392607, 3.69650905, 10.48914825, 0.05858025, 2.10530202, 4.27744651, 1.27002274]]
weight_name = ["equals", "weighted_empirical", "weighted_proportional"]

for weight_num, weight in enumerate(weights):
    params = {"weight":weight, "optimizer_name":optimizer_name,
              "lr": 1e-3, "weight_decay": 0,
              "loss_name": loss_name, "arch":arch,
              "encoder_name":encoder_name, "automatic_optimization":True}
    # callbacks utils
    logger_name = f"baseline_{weight_name[weight_num]}"
    logger_save_path = "../data/logs/pretrained/"
    callback_name = f"baseline_{weight_name[weight_num]}"
    callback_save_path = "../data/models/pretrained/"
    # train loop
    test_metrics = train_loop(params, test_metrics, logger_name, logger_save_path, callback_name,
                              callback_save_path, epochs, train_dataloader, val_dataloader, test_dataloader)

In [None]:
test_metrics

## Backbones

In [None]:
test_metrics = pd.DataFrame()
arch = "unet"
loss_name = "BCEWeighted"
optimizer_name = "Adam"
backbones = ["timm-efficientnet-b3", "timm-mobilenetv3_large_100"]
weight = [0.02335332, 2.29093098, 4.60850608, 0.95243978, 0.16392607, 3.69650905, 10.48914825, 0.05858025, 2.10530202, 4.27744651, 1.27002274]

for encoder_name in backbones:
    print(f"Encoder: {encoder_name}")
    # params
    params = {"weight":weight, "optimizer_name":optimizer_name,
              "lr": 1e-3, "weight_decay": 0,
              "loss_name": loss_name, "arch":arch,
              "encoder_name":encoder_name, "automatic_optimization":True}
    # callbacks utils
    logger_name = f"{encoder_name}"
    logger_save_path = "../data/logs/pretrained/"
    callback_name = f"{encoder_name}"
    callback_save_path = "../data/models/pretrained/"
    # train loop
    test_metrics = train_loop(params, test_metrics, logger_name, logger_save_path, callback_name,
                              callback_save_path, epochs, train_dataloader, val_dataloader, test_dataloader)

In [None]:
test_metrics

## Optimizers

In [None]:
test_metrics = pd.DataFrame()
arch = "unet"
loss_name = "BCEWeighted"
encoder_name = "xception"
optimizers = ["AdamP", "AdaBelief"]
weight = [0.02335332, 2.29093098, 4.60850608, 0.95243978, 0.16392607, 3.69650905, 10.48914825, 0.05858025, 2.10530202, 4.27744651, 1.27002274]

for optimizer_name in optimizers:   
    print(f"Optimizer: {optimizer_name}")    
    # params
    params = {"weight":weight, "optimizer_name":optimizer_name,
              "lr": 1e-4, "weight_decay": 0,
              "loss_name": loss_name, "arch":arch,
              "encoder_name":encoder_name, "automatic_optimization":False}
    # callbacks ut*ils
    logger_name = f"{optimizer_name}"
    logger_save_path = "../data/logs/pretrained/"
    callback_name = f"{optimizer_name}"
    callback_save_path = "../data/models/pretrained/"
    # train loop
    test_metrics = train_loop(params, test_metrics, logger_name, logger_save_path, callback_name,
                              callback_save_path, epochs, train_dataloader, val_dataloader, test_dataloader)

In [None]:
test_metrics

## Losses

In [None]:
test_metrics = pd.DataFrame()
arch = "unet"
encoder_name = "xception"
optimizer_name = "Adam"
weight = [0.02335332, 2.29093098, 4.60850608, 0.95243978, 0.16392607, 3.69650905, 10.48914825, 0.05858025, 2.10530202, 4.27744651, 1.27002274]
losses = ["DiceLoss", "FocalLoss", "TverskyLoss", "BiasLoss"]

for loss_name in losses:  
    print(f"Loss: {loss_name}")
    # params
    params = {"weight":weight, "optimizer_name":optimizer_name,
              "lr": 1e-4, "weight_decay": 0,
              "loss_name": loss_name, "arch":arch,
              "encoder_name":encoder_name, "automatic_optimization":True}
    # callbacks utils
    logger_name = f"{loss_name}"
    logger_save_path = "../data/logs/pretrained/"
    callback_name = f"{loss_name}"
    callback_save_path = "../data/models/pretrained/"
    # train loop
    test_metrics = train_loop(params, test_metrics, logger_name, logger_save_path, callback_name,
                              callback_save_path, epochs, train_dataloader, val_dataloader, test_dataloader)

In [None]:
test_metrics