# Training with Distance Weight

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
import random
import pickle

import matplotlib.pyplot as plt
import seaborn as sns
import os
import cv2

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.metrics import classification_report, accuracy_score

# created library
import dataaugmentation
import mydata
from skindetection import SkinExtraction
from skincolors import IndividualTypologyAngle
from performance import PerformanceMeasure, PerformanceEstimation
from distance import DistanceMeasure

from mymodels import TransDataset, EfficientB3Model
import training

INFO:albumentations.check_version:A new version of Albumentations is available: 1.4.14 (you have 1.4.10). Upgrade using: pip install --upgrade albumentations


In [2]:
db = "UTKFace"
root = f"../../dataset/AAAI 2025/{db}/"
polynomial_save_file = f"{root}model/pickle/polynomial_model_ef3.pkl"

# Load model
with open(polynomial_save_file, "rb") as f:
    polynomial_model = pickle.load(f)

BASELINE_FILE = "44_0_3_20170119195216221.jpg.chip.jpg"
STANDARD = 0.93

In [3]:
polynomial_model(np.array([45, 50]))

array([0.9640472, 0.9678538])

In [4]:
img = cv2.imread(f"{root}mask/{BASELINE_FILE}")
baseline_skin_pixels_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

ita = IndividualTypologyAngle(baseline_skin_pixels_image)
baseline_mean_ita = ita.get_mean_ita()
print(f"Conventional ITA values: {baseline_mean_ita}")
baseline_nuance_ita = ita.get_nuance_ita()

Conventional ITA values: 17.321600446516427


In [5]:
df_train = mydata.load_and_process_csv(f"{root}/dataframe/df_train.csv")
df_valid = mydata.load_and_process_csv(f"{root}/dataframe/df_valid.csv")
df_test = mydata.load_and_process_csv(f"{root}/dataframe/df_test.csv")

In [6]:
def calculate_distance_penalty(distances):
    diff = polynomial_model(np.array(distances)) - STANDARD
    diff = torch.tensor(diff)
    return diff

In [7]:
ycol="labels"
batch_size = 8
train_loader, valid_loader, test_loader = training.create_dataloaders(df_train, df_valid, df_test, ycol, batch_size)

H*W:  (200, 200)


In [8]:
import torch
import torch.nn as nn
from torchvision import transforms
import torch.nn.functional as F
from torch.optim import Adam
from torch.optim import lr_scheduler
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score

class CustomBCEWithLogitsLoss(nn.Module):
    def __init__(self):
        super(CustomBCEWithLogitsLoss, self).__init__()

    def forward(self, outputs, targets, distances, device): # criterion
        bce_loss = F.binary_cross_entropy(outputs, targets)
        penalty = calculate_distance_penalty(distances).to(device)
        return bce_loss + penalty

In [9]:
def train_model(model, train_loader, valid_loader, num_epochs=25, lr=1e-5):

    train_losses = []
    valid_losses = []
    train_f1s = []
    valid_f1s = []
    train_aucs = []
    valid_aucs = []
    train_accuracies = []
    valid_accuracies = []

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    criterion = CustomBCEWithLogitsLoss()
    optimizer = Adam(model.parameters(), lr=lr)
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda = lambda epoch: 0.95 ** epoch)

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        all_preds = []
        all_labels = []
        
        for inputs, labels, masks in train_loader:
            inputs, labels = inputs.to(device), labels.to(device) # data -> GPU
            optimizer.zero_grad()
            outputs = model(inputs)

            distances = []
            #print("masks:", masks.shape)
            for mask in masks:
                mask_np = mask.cpu().numpy()
                mask_np = np.transpose(mask_np, (1, 2, 0)) 
                mask_np = mask_np * 255
                mask_np = mask_np.astype(np.uint8)

                #print("mask_np max:", mask_np.max())
                #print("mask_np shape:", mask_np.shape)
                
                ita = IndividualTypologyAngle(mask_np)
                mask_nuance_ita = ita.get_nuance_ita()
                dm = DistanceMeasure(baseline_nuance_ita, mask_nuance_ita)
                distance = distance = dm.sign_wasserstein_distance()
                distances.append(distance)
                
            print("distances: ", distances[0])
            loss = criterion(outputs.squeeze(), labels, distances, device)
            loss.backward() # Partial Derivative
            optimizer.step()
            
            running_loss += loss.item() * inputs.size(0)
            all_preds.extend(outputs.detach().cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
        
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_f1 = f1_score(all_labels, [1 if x >= 0 else 0 for x in all_preds])
        epoch_auc = roc_auc_score(all_labels, all_preds)
        epoch_acc = accuracy_score(all_labels, [1 if x >= 0 else 0 for x in all_preds])

        train_losses.append(epoch_loss)
        train_f1s.append(epoch_f1)
        train_aucs.append(epoch_auc)
        train_accuracies.append(epoch_acc)

        #print(f'Epoch {epoch}/{num_epochs - 1} | Loss: {epoch_loss:.4f} | F1: {epoch_f1:.4f} | AUC: {epoch_auc:.4f}')
        
        model.eval() # Validation だから。
        val_loss = 0.0
        val_preds = []
        val_labels = []
        
        with torch.no_grad():
            for inputs, labels in valid_loader:
                inputs, labels = inputs.to(device), labels.to(device) # data -> GPU
                outputs = model(inputs)
                loss = criterion(outputs.squeeze(), labels)
                
                val_loss += loss.item() * inputs.size(0)
                val_preds.extend(outputs.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())
        
        val_loss /= len(valid_loader.dataset)
        val_f1 = f1_score(val_labels, [1 if x >= 0.5 else 0 for x in val_preds])
        val_auc = roc_auc_score(val_labels, val_preds)
        val_acc = accuracy_score(val_labels, [1 if x >= 0.5 else 0 for x in val_preds])

        valid_losses.append(val_loss)
        valid_f1s.append(val_f1)
        valid_aucs.append(val_auc)
        valid_accuracies.append(val_acc)

        print(f'Validation Accuracy: {val_acc:.4f} | Loss: {val_loss:.4f} | F1: {val_f1:.4f} | AUC: {val_auc:.4f}')

        scheduler.step()
        
    epochs = range(num_epochs)
    plt.figure(figsize=(14, 10))
    
    plt.subplot(2, 2, 1)
    plt.plot(epochs, train_losses, label='Train Loss')
    plt.plot(epochs, valid_losses, label='Valid Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Loss')
    plt.legend()
    
    plt.subplot(2, 2, 2)
    plt.plot(epochs, train_accuracies, label='Train Accuracy')
    plt.plot(epochs, valid_accuracies, label='Valid Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Accuracy')
    plt.legend()
    
    plt.subplot(2, 2, 3)
    plt.plot(epochs, train_f1s, label='Train F1 Score')
    plt.plot(epochs, valid_f1s, label='Valid F1 Score')
    plt.xlabel('Epoch')
    plt.ylabel('F1 Score')
    plt.title('F1 Score')
    plt.legend()

    plt.show()

In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EfficientB3Model().to(device) # model -> GPU

num_epochs = 20
lr = 0.0001
train_model(model, train_loader, valid_loader, num_epochs=num_epochs, lr=lr)

distances:  31.92417859646297


../aten/src/ATen/native/cuda/Loss.cu:94: operator(): block: [0,0,0], thread: [0,0,0] Assertion `input_val >= zero && input_val <= one` failed.
../aten/src/ATen/native/cuda/Loss.cu:94: operator(): block: [0,0,0], thread: [1,0,0] Assertion `input_val >= zero && input_val <= one` failed.
../aten/src/ATen/native/cuda/Loss.cu:94: operator(): block: [0,0,0], thread: [2,0,0] Assertion `input_val >= zero && input_val <= one` failed.
../aten/src/ATen/native/cuda/Loss.cu:94: operator(): block: [0,0,0], thread: [3,0,0] Assertion `input_val >= zero && input_val <= one` failed.


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
