## Import Libs

In [1]:
import os
import random
import gc
import time
import copy
import sys
import numpy as np
import pandas as pd
from io import BytesIO
import shutil

from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn.functional as F
from torch import nn
import timm
from sklearn.metrics import roc_curve, auc, roc_auc_score

from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
from tqdm import tqdm
from collections import defaultdict

import warnings # 避免一些可以忽略的报错
warnings.filterwarnings('ignore')
# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

  from .autonotebook import tqdm as notebook_tqdm


## CONFIG

In [2]:
is_debug = False

class CONFIG:
    seed = 308
    
    # 256 tiny_vit ---------- 3h+
    # 512 efficientnet convnext -------------- 17mins+
    #     efficientvit_b0 --------------- 7mins+
    test_batch_size = 512
    img_size = [160, 160]
    n_classes = 1
    n_folds = 5
    formatted_time = None
    ckpt_save_path = None
    
    n_accumulate = 1.0
    n_workers = os.cpu_count()

    DataParallel = False
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    """
    tf_efficientnet_b0_ns
    tiny_vit_21m_512.dist_in22k_ft_in1k
    eva02_base_patch14_224.mim_in22k
    convnext_tiny.fb_in22k_ft_in1k_384
    tf_efficientnetv2_s.in21k_ft_in1k
    efficientvit_b0.r224_in1k

    edgenext_base.in21k_ft_in1k
    """
    model_name = "tf_efficientnetv2_s.in21k_ft_in1k"
    ckpt_path = "/data1/hjs/pythonProject/pythonProject/ISIC2024/output/2024-08-27_17:56:31_tf_efficientnetv2_s.in21k_ft_in1k_output"
    TTA_n = 8

    test_csv = "/data1/hjs/pythonProject/pythonProject/ISIC2024/my_train_with_sgkfold.csv"
    train_img_dir = "/data1/hjs/pythonProject/pythonProject/ISIC2024/image"
    
    # debug
#     test_csv = "/kaggle/input/isic-2024-challenge/train-metadata.csv"
#     test_img_hdf5 = "/kaggle/input/isic-2024-challenge/train-image.hdf5"

if CONFIG.DataParallel:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
    print("IN DataParallel!")
else:
    os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
    print("NO IN DataParallel!")

NO IN DataParallel!


## Set Random Seed

In [3]:
def set_seed(seed=308):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
set_seed(CONFIG.seed)

## Data Progress

In [4]:
test = pd.read_csv(CONFIG.test_csv)
test

Unnamed: 0,isic_id,target,patient_id,age_approx,sex,anatom_site_general,clin_size_long_diam_mm,image_type,tbp_tile_type,tbp_lv_A,...,iddx_full,iddx_1,iddx_2,iddx_3,iddx_4,iddx_5,mel_mitotic_index,mel_thick_mm,tbp_lv_dnn_lesion_confidence,kfold
0,ISIC_0015670,0,IP_1235828,60.0,male,lower extremity,3.04,TBP tile: close-up,3D: white,20.244422,...,Benign,Benign,,,,,,,97.517282,4
1,ISIC_0015845,0,IP_8170065,60.0,male,head/neck,1.10,TBP tile: close-up,3D: white,31.712570,...,Benign,Benign,,,,,,,3.141455,0
2,ISIC_0015864,0,IP_6724798,60.0,male,posterior torso,3.40,TBP tile: close-up,3D: XP,22.575830,...,Benign,Benign,,,,,,,99.804040,0
3,ISIC_0015902,0,IP_4111386,65.0,male,anterior torso,3.22,TBP tile: close-up,3D: XP,14.242329,...,Benign,Benign,,,,,,,99.989998,2
4,ISIC_0024200,0,IP_8313778,55.0,male,anterior torso,2.73,TBP tile: close-up,3D: white,24.725520,...,Benign,Benign,,,,,,,70.442510,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401054,ISIC_9999937,0,IP_1140263,70.0,male,anterior torso,6.80,TBP tile: close-up,3D: XP,22.574335,...,Benign,Benign,,,,,,,99.999988,4
401055,ISIC_9999951,0,IP_5678181,60.0,male,posterior torso,3.11,TBP tile: close-up,3D: white,19.977640,...,Benign,Benign,,,,,,,99.999820,1
401056,ISIC_9999960,0,IP_0076153,65.0,female,anterior torso,2.05,TBP tile: close-up,3D: XP,17.332567,...,Benign,Benign,,,,,,,99.999416,2
401057,ISIC_9999964,0,IP_5231513,30.0,female,anterior torso,2.80,TBP tile: close-up,3D: XP,22.288570,...,Benign,Benign,,,,,,,100.000000,0


## Dataset and DataLoader

In [5]:
def transform(img):
    composition = A.Compose([
        A.Transpose(p=0.5),
        A.VerticalFlip(p=0.5),
        A.HorizontalFlip(p=0.5),
#         A.RandomBrightness(limit=0.2, p=0.75),
#         A.RandomContrast(limit=0.2, p=0.75),
        A.OneOf([
            A.MotionBlur(blur_limit=5),
            A.MedianBlur(blur_limit=5),
            A.GaussianBlur(blur_limit=5),
            A.GaussNoise(var_limit=(5.0, 30.0)),
        ], p=0.7),

        A.OneOf([
            A.OpticalDistortion(distort_limit=1.0),
            A.GridDistortion(num_steps=5, distort_limit=1.),
            A.ElasticTransform(alpha=3),
        ], p=0.7),

        A.CLAHE(clip_limit=4.0, p=0.7),
        A.HueSaturationValue(hue_shift_limit=10, sat_shift_limit=20, val_shift_limit=10, p=0.5),
        A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=15, border_mode=0, p=0.85),
        A.Resize(CONFIG.img_size[0], CONFIG.img_size[1]),
#         A.Cutout(max_h_size=int(CONFIG.img_size[0] * 0.375), max_w_size=int(CONFIG.img_size[1] * 0.375), num_holes=1, p=0.7),    
        A.Normalize(),
        ToTensorV2(),
    ])
    return composition(image=img)["image"]

In [6]:
class MyDataset(Dataset):
    def __init__(self, df, transform=None):
        super().__init__()
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, idx):
        row = self.df.iloc[idx, :]
        img_id = row.isic_id + ".jpg"
        label = torch.tensor(row.target, dtype=torch.float32)
        
        img_path = os.path.join(CONFIG.train_img_dir, img_id)
        img = Image.open(img_path)
        img = np.array(img)

        if self.transform != None:
            img = self.transform(img)

        return img, label

In [7]:
def prepare_loaders(df, fold):
    df_test = df[df["kfold"] == fold]
    test_datasets = MyDataset(df=df_test, transform=transform)
    
    test_loader = DataLoader(test_datasets, batch_size=CONFIG.test_batch_size, num_workers=CONFIG.n_workers, shuffle=False, pin_memory=True)
    
    return test_loader

In [8]:
# test_loader = prepare_loaders(test, fold=0)
# x, y = next(iter(test_loader))
# x.shape
# y.shape

## Evaluation

In [9]:
def compute_pAUC(y_true, y_scores, min_tpr=0.8):
    y_hat = y_scores
    if len(np.unique(y_true)) == 1:
        return 0.0  # 或者其他你认为合适的默认值
    min_tpr = min_tpr
    max_fpr = abs(1 - min_tpr)
    
    v_gt = abs(y_true - 1)
    v_pred = np.array([1.0 - x for x in y_hat])
    
    partial_auc_scaled = roc_auc_score(v_gt, v_pred, max_fpr=max_fpr)
    partial_auc = 0.5 * max_fpr**2 + (max_fpr - 0.5 * max_fpr**2) / (1.0 - 0.5) * (partial_auc_scaled - 0.5)
    
    return partial_auc

## DL Model

In [10]:
def updata_req_grad(models, requires_grad=True):
    for model in models:
        for param in model.parameters():
            param.requires_grad = requires_grad

In [11]:
class GeMPool(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeMPool, self).__init__()
        self.p = nn.Parameter(torch.ones(1) * p)
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
    
    def gem(self, x, p=3, eps=1e-6):
        return torch.mean(x.clamp(min=eps).pow(p), dim=(-2, -1)).pow(1./p)
    
    def __repr__(self):
        return self.__class__.__name__ + f'(p={self.p.data.tolist()[0]:.4f}, eps={self.eps})'

In [12]:
class ISIC2024Model(nn.Module):
    def __init__(self):
        super(ISIC2024Model, self).__init__()
        self.backbone = timm.create_model(model_name=CONFIG.model_name, 
                                          pretrained=False)
        
        if "efficientnet" in CONFIG.model_name:
            in_features = self.backbone.classifier.in_features
            self.backbone.classifier = nn.Identity()
        elif "convnext" in CONFIG.model_name or "tiny_vit" in CONFIG.model_name:
            in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()
        elif "eva" in CONFIG.model_name:
            in_features = 768
        elif "efficientvit" in CONFIG.model_name:
            in_features = self.backbone.head.classifier[4].in_features
            self.backbone.head.classifier[4] = nn.Identity()
        elif "edgenext" in CONFIG.model_name:
            in_features = self.backbone.head.fc.in_features
            self.backbone.head.fc = nn.Identity()

        self.head = nn.Sequential(
            nn.Linear(in_features, CONFIG.n_classes)
        )
        
    def forward(self, x):
        _tmp = self.backbone(x)
        output = self.head(_tmp)
        return output

## Load Models

In [13]:
from collections import OrderedDict

def remove_module(path):
    # 加载保存的状态字典
    state_dict = torch.load(path)

    # 使用 OrderedDict 去掉 'module.' 前缀
    new_state_dict = OrderedDict()
    for key, value in state_dict.items():
        new_state_dict[key.replace('module.', '')] = value
    return new_state_dict

In [14]:
models = []

all_paths = sorted(os.listdir(CONFIG.ckpt_path))
paths = []
for i in range(CONFIG.n_folds):
    _tmp_paths = []
    for path in all_paths:
        if path[0] == str(i+1):
            _tmp_paths.append(path)
    paths.append(_tmp_paths[-1])
    
# paths = ["1_CV0.1547_Loss0.6784_epoch2.bin",
#          "2_CV0.1708_Loss0.5088_epoch5.bin",
#          "3_CV0.1791_Loss0.3739_epoch7.bin",
#          "4_CV0.1654_Loss0.5382_epoch3.bin",
#          "5_CV0.1812_Loss0.4236_epoch8.bin"]

if CONFIG.DataParallel:
    device_ids = [0, 1]
    for i in range(CONFIG.n_folds):
        model = ISIC2024Model()
        model = torch.nn.DataParallel(model, device_ids=device_ids)
        model = model.cuda()
        model.load_state_dict(torch.load(os.path.join(CONFIG.ckpt_path, paths[i])))
        print(f"fold : {i} --------- path : {paths[i]}")
        model.eval()
        models.append(model)
else:
    for i in range(CONFIG.n_folds):
        model = ISIC2024Model()
        model = model.cuda()
        # model.load_state_dict(torch.load(os.path.join(CONFIG.ckpt_path, paths[i])))
        model.load_state_dict(remove_module(os.path.join(CONFIG.ckpt_path, paths[i])))
        print(f"fold : {i} --------- path : {paths[i]}")
        model.eval()
        models.append(model)

fold : 0 --------- path : 1_CV_0.1514_Loss0.2055_epoch15.bin
fold : 1 --------- path : 2_CV_0.1445_Loss0.2757_epoch17.bin
fold : 2 --------- path : 3_CV_0.1704_Loss0.1593_epoch25.bin
fold : 3 --------- path : 4_CV_0.1492_Loss0.2547_epoch20.bin
fold : 4 --------- path : 5_CV_0.1627_Loss0.2155_epoch19.bin


## Infer Function

In [15]:
def Infer(model, test_loader):
    y_preds = []
    y_trues = []
    bar = tqdm(enumerate(test_loader), total=len(test_loader))
    with torch.no_grad():
        for step, (images, labels) in bar:
            
            if CONFIG.DataParallel:
                images = images.cuda().float()
            else:
                images = images.to(CONFIG.device, dtype=torch.float)
                  
            outputs = model(images)
#             outputs = F.sigmoid(outputs)
            y_preds.append(outputs.detach().flatten().cpu().numpy())
            y_trues.append(labels.numpy())
            
    y_preds = np.concatenate(y_preds)
    y_trues = np.concatenate(y_trues)
    return y_preds, y_trues

## Start Infer

In [16]:
# train preds
oof = []
true = []

for fold in range(0, CONFIG.n_folds):
    _tmp_preds = []
    for _ in range(CONFIG.TTA_n):
        train_loader = prepare_loaders(test, fold)
        preds, trues = Infer(models[fold], train_loader)
        _tmp_preds.append(preds)
    print(f"fold {fold} TTA complete")
        
    preds = np.mean(_tmp_preds, axis=0)
    preds = F.sigmoid(torch.from_numpy(preds)).numpy()
    oof.append(preds)
    true.append(trues)

100%|██████████| 148/148 [00:47<00:00,  3.14it/s]

fold 0 TTA complete



100%|██████████| 174/174 [00:47<00:00,  3.65it/s]

fold 1 TTA complete



100%|██████████| 160/160 [00:43<00:00,  3.67it/s]

fold 2 TTA complete



100%|██████████| 132/132 [00:35<00:00,  3.67it/s]

fold 3 TTA complete



100%|██████████| 172/172 [00:46<00:00,  3.69it/s]

fold 4 TTA complete





## Local CV

In [17]:
true = np.concatenate(true)
oof = np.concatenate(oof)

In [18]:
local_cv = compute_pAUC(true, oof)
print("Local CV : ", local_cv)
# 0.1549645095369136

Local CV :  0.15496404593222513


In [19]:
# # 获取当前时间的时间戳
# current_time = time.time()
# print("当前时间戳:", current_time)

# # 将时间戳转换为本地时间结构
# local_time = time.localtime(current_time)

# # 格式化本地时间
# CONFIG.formatted_time = time.strftime('%Y-%m-%d_%H:%M:%S', local_time)
# print("当前时间:", CONFIG.formatted_time)

# CONFIG.ckpt_save_path = f"output/{CONFIG.formatted_time}_kfold_ensemble_output"
# if os.path.exists(CONFIG.ckpt_save_path) is False:
#     os.makedirs(CONFIG.ckpt_save_path)

In [20]:
# np.save(f"result_analysis/{CONFIG.formatted_time}_{CONFIG.model_name}.npy", oof)
# df.to_csv(CONFIG.ckpt_save_path+"/ensemble_kfold.csv", index=False)

In [21]:
# source_files = CONFIG.ckpt_paths
# for source_file in source_files:
#     # 目标文件路径
#     file_name = source_file.split("/")[-1]
#     destination_file = os.path.join(CONFIG.ckpt_save_path, file_name)
#     # 复制文件
#     shutil.copyfile(source_file, destination_file)
#     print(f'文件已从 {source_file} 复制到 {destination_file}')