In [2]:
'''Use multiple rounds to get a more robust results'''
import torch
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc, balanced_accuracy_score, roc_auc_score, f1_score, accuracy_score
import matplotlib.pyplot as plt
import torchvision
import shutil
import os

In [60]:
def cal_metrics(df):
    """
    calculate average accuracy, accuracy per skin type, PQD, DPM, EOM, EOpp0, EOpp1, EOdd, and NAR.
    Skin type in the input df should be in the range of [0,5].
    input val results csv path, type_indices: a list
    output a dic, 'acc_avg': value, 'acc_per_type': array[x,x,x], 'PQD', 'DPM', 'EOM'
    """
    is_binaryCLF = len(df["label"].unique()) == 2

    type_indices = sorted(list(df["fitzpatrick"].unique()))
    type_indices_binary = sorted(list(df["fitzpatrick_binary"].unique()))

    labels_array = np.zeros((len(type_indices), len(df["label"].unique())))
    correct_array = np.zeros((len(type_indices), len(df["label"].unique())))
    predictions_array = np.zeros((len(type_indices), len(df["label"].unique())))
    prob_array = [[] for i in range(len(type_indices))]
    label_array_per_fitz = [[] for i in range(len(type_indices))]

    labels_array_binary = np.zeros((2, len(df["label"].unique())))
    correct_array_binary = np.zeros((2, len(df["label"].unique())))
    predictions_array_binary = np.zeros((2, len(df["label"].unique())))

    positive_list = []  # get positive probability for binary classification
    labels_ft0 = []
    labels_ft1 = []
    predictions_ft0 = []
    predictions_ft1 = []

    for i in range(df.shape[0]):
        prediction = df.iloc[i]["prediction"]
        label = df.iloc[i]["label"]
        type = df.iloc[i]["fitzpatrick"]
        type_binary = df.iloc[i]["fitzpatrick_binary"]

        labels_array[int(type), int(label)] += 1
        predictions_array[int(type), int(prediction)] += 1
        if prediction == label:
            correct_array[int(type), int(label)] += 1

        labels_array_binary[int(type_binary), int(label)] += 1
        predictions_array_binary[int(type_binary), int(prediction)] += 1
        if prediction == label:
            correct_array_binary[int(type_binary), int(label)] += 1

        if is_binaryCLF:
            prob_array[int(type)].append(df.iloc[i]["prediction_probability"])
            label_array_per_fitz[int(type)].append(label)
            if prediction == 0:
                positive_list.append(1.0 - df.iloc[i]["prediction_probability"])
            else:
                positive_list.append(df.iloc[i]["prediction_probability"])

        if type_binary == 0:
            labels_ft0.append(label)
            predictions_ft0.append(prediction)
        else:
            labels_ft1.append(label)
            predictions_ft1.append(prediction)

    correct_array = correct_array[type_indices]
    labels_array = labels_array[type_indices]
    predictions_array = predictions_array[type_indices]

    # Accuracy, accuracy per type
    Accuracy = accuracy_score(df["label"], df["prediction"]) * 100

    acc_array = []
    for i in range(len(type_indices)):
        acc_array.append(
            accuracy_score(
                df[df["fitzpatrick"] == i]["label"],
                df[df["fitzpatrick"] == i]["prediction"],
            )
            * 100
        )
    acc_array = np.array(acc_array)

    # f1_score, f1-score per type (Weighted average)
    F1_W = f1_score(df["label"], df["prediction"], average="weighted") * 100

    F1_W_array = []
    for i in range(len(type_indices)):
        F1_W_array.append(
            f1_score(
                df[df["fitzpatrick"] == i]["label"],
                df[df["fitzpatrick"] == i]["prediction"],
                average="weighted",
            )
            * 100
        )
    F1_W_array = np.array(F1_W_array)

    # f1_score, f1-score per type (Macro average)
    F1_Mac = f1_score(df["label"], df["prediction"], average="macro") * 100

    F1_Mac_array = []
    for i in range(len(type_indices)):
        F1_Mac_array.append(
            f1_score(
                df[df["fitzpatrick"] == i]["label"],
                df[df["fitzpatrick"] == i]["prediction"],
                average="macro",
            )
            * 100
        )
    F1_Mac_array = np.array(F1_Mac_array)

    # PQD
    PQD = acc_array.min() / acc_array.max()

    # DPM
    demo_array = predictions_array / np.sum(predictions_array, axis=1, keepdims=True)
    DPM = np.mean(demo_array.min(axis=0) / demo_array.max(axis=0))

    # EOM
    eo_array = correct_array / labels_array
    EOM = np.mean(np.nanmin(eo_array, axis=0) / np.nanmax(eo_array, axis=0))

    # NAR
    NAR = (acc_array.max() - acc_array.min()) / acc_array.mean()

    # NFR (Weighted)
    NFR_W = (F1_W_array.max() - F1_W_array.min()) / F1_W_array.mean()

    # NAR (Macro)
    NFR_Mac = (F1_Mac_array.max() - F1_Mac_array.min()) / F1_Mac_array.mean()

    # AUC
    if is_binaryCLF:
        # AUC per skin type
        AUC = roc_auc_score(df["label"], df["prediction_probability"]) * 100
        AUC_per_type = []
        for i in range(len(label_array_per_fitz)):
            try:
                AUC_per_type.append(
                    roc_auc_score(label_array_per_fitz[i], prob_array[i]) * 100
                )
            except:
                AUC_per_type.append(np.nan)
        AUC_Gap = max(AUC_per_type) - min(AUC_per_type)
    else:
        AUC = -1
        AUC_per_type = [-1] * len(type_indices)
        AUC_Gap = -1

    ##############################          Metrics with binary Sensative attribute         ##############################

    correct_array_binary = correct_array_binary[type_indices_binary]
    labels_array_binary = labels_array_binary[type_indices_binary]
    predictions_array_binary = predictions_array_binary[type_indices_binary]

    # avg acc, acc per type
    correct_array_sumc_binary, labels_array_sumc_binary = np.sum(
        correct_array_binary, axis=1
    ), np.sum(
        labels_array_binary, axis=1
    )  # sum skin conditions
    acc_array_binary = correct_array_sumc_binary / labels_array_sumc_binary
    avg_acc_binary = (np.sum(correct_array_binary) / np.sum(labels_array_binary)) * 100

    # PQD
    PQD_binary = acc_array_binary.min() / acc_array_binary.max()

    # DPM
    demo_array_binary = predictions_array_binary / np.sum(
        predictions_array_binary, axis=1, keepdims=True
    )
    DPM_binary = np.mean(demo_array_binary.min(axis=0) / demo_array_binary.max(axis=0))

    # EOM
    eo_array_binary = correct_array_binary / labels_array_binary
    EOM_binary = np.mean(
        np.nanmin(eo_array_binary, axis=0) / np.nanmax(eo_array_binary, axis=0)
    )

    # getting class-wise TPR, FPR, TNR for fitzpatrick 0
    conf_matrix_fitz0 = confusion_matrix(labels_ft0, predictions_ft0)

    # Initialize lists to store TPR, TNR, FPR for each class
    class_tpr_fitz0 = []
    class_tnr_fitz0 = []
    class_fpr_fitz0 = []

    for i in range(len(conf_matrix_fitz0)):
        # Calculate TPR for class i
        tpr = conf_matrix_fitz0[i, i] / sum(conf_matrix_fitz0[i, :])
        class_tpr_fitz0.append(tpr)

        # Calculate TNR for class i
        tn = (
            sum(sum(conf_matrix_fitz0))
            - sum(conf_matrix_fitz0[i, :])
            - sum(conf_matrix_fitz0[:, i])
            + conf_matrix_fitz0[i, i]
        )
        fp = sum(conf_matrix_fitz0[:, i]) - conf_matrix_fitz0[i, i]
        fn = sum(conf_matrix_fitz0[i, :]) - conf_matrix_fitz0[i, i]
        tnr = tn / (tn + fp)
        class_tnr_fitz0.append(tnr)

        # Calculate FPR for class i
        fpr = 1 - tnr
        class_fpr_fitz0.append(fpr)

    # getting class-wise TPR, FPR, TNR for fitzpatrick 1

    conf_matrix_fitz1 = confusion_matrix(labels_ft1, predictions_ft1)
    
    # Check if there is any class that is not in both subgroups to handle it
    try:
        class_idx = (set(df[df['fitzpatrick_binary'] == 0]['label'].unique()) - set(df[df['fitzpatrick_binary'] == 1]['label'].unique())).pop()
        conf_matrix_fitz1 = np.insert(conf_matrix_fitz1, class_idx, 0, axis=1)
        conf_matrix_fitz1 = np.insert(conf_matrix_fitz1, class_idx, 0, axis=0)
        print(f"INFO: class {class_idx} is not in both binary subgroups")
    except:
        class_idx = None

    # Initialize lists to store TPR, TNR, FPR for each class
    class_tpr_fitz1 = []
    class_tnr_fitz1 = []
    class_fpr_fitz1 = []

    for i in range(len(conf_matrix_fitz1)):
        # Calculate TPR for class i
        tpr = conf_matrix_fitz1[i, i] / sum(conf_matrix_fitz1[i, :])
        class_tpr_fitz1.append(tpr)

        # Calculate TNR for class i
        tn = (
            sum(sum(conf_matrix_fitz1))
            - sum(conf_matrix_fitz1[i, :])
            - sum(conf_matrix_fitz1[:, i])
            + conf_matrix_fitz1[i, i]
        )
        fp = sum(conf_matrix_fitz1[:, i]) - conf_matrix_fitz1[i, i]
        fn = sum(conf_matrix_fitz1[i, :]) - conf_matrix_fitz1[i, i]
        tnr = tn / (tn + fp)
        class_tnr_fitz1.append(tnr)

        # Calculate FPR for class i
        fpr = 1 - tnr
        class_fpr_fitz1.append(fpr)

    if class_idx is not None:
        class_tpr_fitz1[class_idx] = np.nan
        class_tnr_fitz1[class_idx] = np.nan
        class_fpr_fitz1[class_idx] = np.nan
    
    # EOpp0
    EOpp0 = 0
    for c in range(len(class_tnr_fitz0)):
        val = abs(class_tnr_fitz1[c] - class_tnr_fitz0[c])
        if not np.isnan(val):
            EOpp0 += val

    # EOpp1
    EOpp1 = 0
    for c in range(len(class_tpr_fitz0)):
        val = abs(class_tpr_fitz1[c] - class_tpr_fitz0[c])
        if not np.isnan(val):
            EOpp1 += val

    # EOdd
    EOdd = 0
    for c in range(len(class_tpr_fitz0)):
        val = abs(
            class_tpr_fitz1[c]
            - class_tpr_fitz0[c]
            + class_fpr_fitz1[c]
            - class_fpr_fitz0[c]
        )
        if not np.isnan(val):
            EOdd += val

    # NAR
    NAR_binary = (
        acc_array_binary.max() - acc_array_binary.min()
    ) / acc_array_binary.mean()

    return {
        "accuracy": Accuracy,
        "acc_per_type": acc_array,
        "acc_gap": acc_array.max() - acc_array.min(),
        "F1_W": F1_W,
        "F1_per_type_W": F1_W_array,
        "F1_W_gap": max(F1_W_array) - min(F1_W_array),
        "F1_Mac": F1_Mac,
        "F1_per_type_Mac": F1_Mac_array,
        "F1_Mac_gap": max(F1_Mac_array) - min(F1_Mac_array),
        "PQD": PQD,
        "DPM": DPM,
        "EOM": EOM,
        "EOpp0": EOpp0,
        "EOpp1": EOpp1,
        "EOdd": EOdd,
        "NAR": NAR,
        "NFR_W": NFR_W,
        "NFR_Mac": NFR_Mac,
        "AUC": AUC,
        "AUC_per_type": AUC_per_type,
        "AUC_Gap": AUC_Gap,
        "AUC_min": min(AUC_per_type),
        "acc_avg_binary": avg_acc_binary,
        "acc_per_type_binary": acc_array_binary,
        "PQD_binary": PQD_binary,
        "DPM_binary": DPM_binary,
        "EOM_binary": EOM_binary,
        "NAR_binary": NAR_binary,
    }

# Baseline - Fitzpatrick17k

In [61]:
df = pd.read_csv('/home/ali/Outputs/Fitzpatrick17k/XTranPrune_baseline_High/validation_results_DiT_S_LRP_level=high_epoch=50_random_holdout.csv')
cal_metrics(df=df)

{'accuracy': 86.16921635966281,
 'acc_per_type': array([83.94648829, 84.39869989, 85.90116279, 89.66725044, 90.93851133,
        83.33333333]),
 'acc_gap': 7.605177993527491,
 'F1_W': 85.57893397463474,
 'F1_per_type_W': array([83.40888099, 83.78928604, 85.37177898, 89.01159925, 90.69159775,
        79.75499454]),
 'F1_W_gap': 10.936603202838214,
 'F1_Mac': 76.062091572557,
 'F1_per_type_Mac': array([75.65462165, 75.15406073, 75.42657695, 79.19319497, 81.97530864,
        58.58452951]),
 'F1_Mac_gap': 23.390779136393164,
 'PQD': 0.916370106761566,
 'DPM': 0.5278897673948356,
 'EOM': 0.6284678839710286,
 'EOpp0': 0.09241948268945266,
 'EOpp1': 0.13334040160654603,
 'EOdd': 0.19503690128792983,
 'NAR': 0.08805933919376732,
 'NFR_W': 0.12815627580814173,
 'NFR_Mac': 0.31468241923335377,
 'AUC': -1,
 'AUC_per_type': [-1, -1, -1, -1, -1, -1],
 'AUC_Gap': -1,
 'AUC_min': -1,
 'acc_avg_binary': 86.16921635966281,
 'acc_per_type_binary': array([0.84744228, 0.89336016]),
 'PQD_binary': 0.948600

In [34]:
df = pd.read_csv('/home/ali/Outputs/SkinFormer_baseline_3class/validation_results_DiT_S_LRP_level=high_epoch=50_random_holdout.csv')
cal_metrics(df=df)

{'accuracy': 86.16921635966281,
 'acc_per_type': array([83.94648829, 84.39869989, 85.90116279, 89.66725044, 90.93851133,
        83.33333333]),
 'acc_gap': 7.605177993527491,
 'F1_W': 85.57893397463474,
 'F1_per_type_W': array([83.40888099, 83.78928604, 85.37177898, 89.01159925, 90.69159775,
        79.75499454]),
 'F1_W_gap': 10.936603202838214,
 'F1_Mac': 76.062091572557,
 'F1_per_type_Mac': array([75.65462165, 75.15406073, 75.42657695, 79.19319497, 81.97530864,
        58.58452951]),
 'F1_Mac_gap': 23.390779136393164,
 'PQD': 0.916370106761566,
 'DPM': 0.5278897673948356,
 'EOM': 0.6284678839710286,
 'EOpp0': 0.09241948268945266,
 'EOpp1': 0.13334040160654603,
 'EOdd': 0.19503690128792983,
 'NAR': 0.08805933919376732,
 'NFR_W': 0.12815627580814173,
 'NFR_Mac': 0.31468241923335377,
 'AUC': -1,
 'AUC_per_type': [-1, -1, -1, -1, -1, -1],
 'AUC_Gap': -1,
 'AUC_min': -1,
 'acc_avg_binary': 86.16921635966281,
 'acc_per_type_binary': array([0.84744228, 0.89336016]),
 'PQD_binary': 0.948600

In [19]:
df = pd.read_csv('/home/ali/Outputs/SkinFormer_baseline_2class/validation_results_DiT_S_LRP_level=binary_epoch=50_random_holdout.csv')
cal_metrics(df=df)

{'accuracy': 93.78707461754605,
 'acc_per_type': array([93.81270903, 92.30769231, 92.29651163, 96.84763573, 96.76375405,
        91.22807018]),
 'acc_gap': 5.619565551356516,
 'F1_W': 93.58431062096106,
 'F1_per_type_W': [93.67097036561044,
  92.17603050992126,
  91.90241448181142,
  96.76754633797887,
  96.60765906058379,
  89.89139515455304],
 'F1_W_gap': 6.876151183425833,
 'F1_Mac': 85.80282675201939,
 'F1_per_type_Mac': [86.99350439408636,
  84.45586145880031,
  83.74631702348636,
  90.64070809353828,
  90.18174885612609,
  69.84126984126983],
 'F1_Mac_gap': 20.799438252268445,
 'PQD': 0.9419751911424128,
 'DPM': 0.6443480064767824,
 'EOM': 0.6964731050522976,
 'EOpp0': 0.044660586462973995,
 'EOpp1': 0.044660586462973995,
 'EOdd': 0.00828135841152311,
 'NAR': 0.05986153895383024,
 'AUC': 93.51590984421611,
 'AUC_per_type': [93.76476145488898,
  92.24324621935105,
  92.00706736691954,
  98.11373092926492,
  96.14759805059178,
  86.19281045751633],
 'AUC_Gap': 11.920920471748587,
 

# Baseline - HIBA

In [6]:
df = pd.read_csv('/home/ali/Outputs/HIBA/XTranPrune_baseline_Binary/validation_results_DiT_S_LRP_level=binary_epoch=50_random_holdout.csv')
cal_metrics(df=df)

  eo_array = correct_array / labels_array
  eo_array_binary = correct_array_binary / labels_array_binary
  tnr = tn / (tn + fp)
  tpr = conf_matrix_fitz1[i, i] / sum(conf_matrix_fitz1[i, :])


{'accuracy': 85.66666666666667,
 'acc_per_type': array([87.5       , 85.38812785, 86.79245283, 75.        ]),
 'acc_gap': 12.5,
 'F1_W': 85.6767054655683,
 'F1_per_type_W': array([87.33396584, 85.36796933, 86.91742747, 85.71428571]),
 'F1_W_gap': 1.9659965097861374,
 'F1_Mac': 85.65885871196541,
 'F1_per_type_Mac': array([86.33776091, 85.35117057, 84.04301075, 42.85714286]),
 'F1_Mac_gap': 43.48061805367309,
 'PQD': 0.8571428571428571,
 'DPM': 0.4097222222222222,
 'EOM': 0.8476890756302522,
 'EOpp0': 0.08766233766233766,
 'EOpp1': 0.08766233766233766,
 'EOdd': 0,
 'NAR': 0.14939617918016804,
 'NFR_W': 0.022772139571147803,
 'NFR_Mac': 0.5824810111954692,
 'AUC': 91.83455161347834,
 'AUC_per_type': [94.07407407407409,
  90.95460614152204,
  92.63157894736842,
  nan],
 'AUC_Gap': 3.1194679325520553,
 'AUC_min': 90.95460614152204,
 'acc_avg_binary': 85.66666666666667,
 'acc_per_type_binary': array([0.85810811, 0.75      ]),
 'PQD_binary': 0.8740157480314961,
 'DPM_binary': 0.5754954954954

# Baseline - PAD-UFES-20

In [62]:
df = pd.read_csv('/home/ali/Outputs/PAD-UFES-20/XTranPrune_baseline_low/validation_results_DiT_S_LRP_level=low_epoch=50_random_holdout.csv')
cal_metrics(df=df)

INFO: class 2 is not in both binary subgroups


  eo_array = correct_array / labels_array
  eo_array_binary = correct_array_binary / labels_array_binary
  tpr = conf_matrix_fitz1[i, i] / sum(conf_matrix_fitz1[i, :])


{'accuracy': 66.22073578595318,
 'acc_per_type': array([ 68.96551724,  61.01694915,  72.97297297,  83.33333333,
        100.        ]),
 'acc_gap': 38.983050847457626,
 'F1_W': 67.22827227129834,
 'F1_per_type_W': array([ 69.87983281,  62.72280974,  72.95562572,  84.03880071,
        100.        ]),
 'F1_W_gap': 37.27719026033031,
 'F1_Mac': 62.895744610129825,
 'F1_per_type_Mac': array([ 60.3219697 ,  55.84299982,  74.63985166,  81.58730159,
        100.        ]),
 'F1_Mac_gap': 44.157000176066404,
 'PQD': 0.6101694915254238,
 'DPM': 0.009259259259259259,
 'EOM': 0.5622301445830857,
 'EOpp0': 0.4939087859850173,
 'EOpp1': 1.504948692485932,
 'EOdd': 1.3459118783119046,
 'NAR': 0.5045843110448057,
 'NFR_W': 0.47840696490262474,
 'NFR_Mac': 0.5928831126666089,
 'AUC': -1,
 'AUC_per_type': [-1, -1, -1, -1, -1],
 'AUC_Gap': -1,
 'AUC_min': -1,
 'acc_avg_binary': 66.22073578595318,
 'acc_per_type_binary': array([0.65      , 0.84210526]),
 'PQD_binary': 0.7718750000000001,
 'DPM_binary': 0

# FairDisCo

In [38]:
df = pd.read_csv('/home/ali/Repos/FairDisCo/results_FairDisCo_20_high_random_holdout.csv')
df["fitzpatrick"] = df["fitzpatrick"] -1
df["fitzpatrick_binary"] = df['fitzpatrick'].apply(lambda x: 0 if x in [0,1,2] else 1)
cal_metrics(df=df)

{'accuracy': 85.32625663440524,
 'acc_per_type': array([84.94983278, 82.6652221 , 85.61046512, 88.7915937 , 88.99676375,
        79.8245614 ]),
 'acc_gap': 9.172202350536523,
 'F1_W': 85.10218028482439,
 'F1_per_type_W': array([84.73788129, 82.2500512 , 85.47753997, 88.70104838, 89.17545859,
        76.82774187]),
 'F1_W_gap': 12.347716721470704,
 'F1_Mac': 75.7750768226713,
 'F1_per_type_Mac': array([78.42927265, 72.70779094, 75.69122663, 79.69134744, 78.7585146 ,
        53.54775828]),
 'F1_Mac_gap': 26.1435891529335,
 'PQD': 0.8969377990430624,
 'DPM': 0.5066866704135623,
 'EOM': 0.5554191219408611,
 'EOpp0': 0.050108404368123804,
 'EOpp1': 0.07123415483175255,
 'EOdd': 0.0677418573614258,
 'NAR': 0.10773115317525356,
 'NFR_W': 0.14607792463934507,
 'NFR_Mac': 0.357457322258498,
 'AUC': -1,
 'AUC_per_type': [-1, -1, -1, -1, -1, -1],
 'AUC_Gap': -1,
 'AUC_min': -1,
 'acc_avg_binary': 85.32625663440524,
 'acc_per_type_binary': array([0.84200996, 0.87826962]),
 'PQD_binary': 0.95871466

# FairME

In [67]:
df = pd.read_csv("/home/ali/Repos/Fair-Multi-Exit-Framework/outputs/train_me_resnet18_FITZ_test/Epoch135/df_pred_thresh=0.99.csv")
cal_metrics(df=df)


{'accuracy': 84.3896347174524,
 'acc_per_type': array([82.10702341, 81.14842904, 85.90116279, 88.26619965, 88.34951456,
        83.33333333]),
 'acc_gap': 7.201085527353811,
 'F1_W': 83.79698686438083,
 'F1_per_type_W': array([81.53662703, 80.28385514, 85.41788345, 87.81107499, 88.29674461,
        80.78498686]),
 'F1_W_gap': 8.012889463850755,
 'F1_Mac': 73.41447403109328,
 'F1_per_type_Mac': array([73.10886533, 69.71596197, 75.41725642, 77.25383431, 77.53727754,
        61.79167562]),
 'F1_Mac_gap': 15.745601915814682,
 'PQD': 0.918493207767314,
 'DPM': 0.5814818703985271,
 'EOM': 0.6718756840785239,
 'EOpp0': 0.03545556328320365,
 'EOpp1': 0.11998013005156838,
 'EOdd': 0.12317334672794966,
 'NAR': 0.08486747707313225,
 'NFR_W': 0.09536672089516343,
 'NFR_Mac': 0.2172681871619068,
 'AUC': -1,
 'AUC_per_type': [-1, -1, -1, -1, -1, -1],
 'AUC_Gap': -1,
 'AUC_min': -1,
 'acc_avg_binary': 84.3896347174524,
 'acc_per_type_binary': array([0.82888185, 0.87726358]),
 'PQD_binary': 0.94484926

# Update old experiments metrics and plots

In [36]:
df = pd.read_csv('/home/ali/Outputs/Pruning/Main_0.8/PruningEXP10/validation_results_DeiT_S_LRP_PIter8_epoch=50_random_holdout.csv')
cal_metrics(df=df)

{'accuracy': 83.67155791445519,
 'acc_per_type': array([80.76923077, 80.82340195, 85.02906977, 87.04028021, 88.34951456,
        84.21052632]),
 'acc_gap': 7.580283793876021,
 'F1_W': 83.55995676460883,
 'F1_per_type_W': array([80.69009702, 80.69035505, 85.01351736, 87.06719509, 88.19010871,
        82.73190307]),
 'F1_W_gap': 7.500011697736653,
 'F1_Mac': 73.4787801269564,
 'F1_per_type_Mac': array([72.60159202, 71.44735369, 75.14482288, 75.83441673, 76.12043555,
        67.68328446]),
 'F1_Mac_gap': 8.437151093873723,
 'PQD': 0.9142011834319528,
 'DPM': 0.5560025407099024,
 'EOM': 0.7476783818940681,
 'EOpp0': 0.08877277961363683,
 'EOpp1': 0.051210296994573135,
 'EOdd': 0.11736182212804014,
 'NAR': 0.08984536556110118,
 'NFR_W': 0.08921802371909038,
 'NFR_Mac': 0.11535830906694038,
 'AUC': -1,
 'AUC_per_type': [-1, -1, -1, -1, -1, -1],
 'AUC_Gap': -1,
 'AUC_min': -1,
 'acc_avg_binary': 83.67155791445519,
 'acc_per_type_binary': array([0.82118606, 0.87122736]),
 'PQD_binary': 0.94256

In [39]:
df = pd.read_csv('/home/ali/Datasets/Fitz17k/FairME/new_train.csv')
df

Unnamed: 0.1,Unnamed: 0,md5hash,fitzpatrick_scale,fitzpatrick_centaur,label,nine_partition_label,three_partition_label,qc,url,url_alphanum,Path,binary_label,skin_type,skin_binary,low,high
0,2002,ad60e22c45bedeab890da4971d65c46a,1,1,erythema nodosum,inflammatory,non-neoplastic,,https://www.dermaamin.com/site/images/clinical...,httpwwwdermaamincomsiteimagesclinicalpiceeryth...,Images/ad60e22c45bedeab890da4971d65c46a.jpg,0,0,0,29,2
1,11225,600fa1053d015e9110a3ed7888e3c7d8,4,5,prurigo nodularis,benign epidermal,benign,,https://www.dermaamin.com/site/images/clinical...,httpwwwdermaamincomsiteimagesclinicalpicppruri...,Images/600fa1053d015e9110a3ed7888e3c7d8.jpg,0,3,1,85,0
2,6485,c98ac6d896cd630653360d1fb6db7ec4,3,1,pyogenic granuloma,benign dermal,benign,,https://www.dermaamin.com/site/images/clinical...,httpwwwdermaamincomsiteimagesclinicalpicppyoge...,Images/c98ac6d896cd630653360d1fb6db7ec4.jpg,0,2,0,88,0
3,14554,5d397d83fe773d3f20229663f2df0db2,2,2,pilomatricoma,benign dermal,benign,,http://atlasdermatologico.com.br/img?imageId=5475,httpwwwatlasdermatologicocombrimgimageId5475.jpg,Images/5d397d83fe773d3f20229663f2df0db2.jpg,0,1,0,77,0
4,11604,129058f2ba2ceb4be5091e5b133a66ca,4,4,mucinosis,inflammatory,non-neoplastic,,https://www.dermaamin.com/site/images/clinical...,httpwwwdermaamincomsiteimagesclinicalpicmmucin...,Images/129058f2ba2ceb4be5091e5b133a66ca.jpg,0,3,1,58,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12804,5945,6e61009e6c362a51c8000d4fc14c8bca,1,1,allergic contact dermatitis,inflammatory,non-neoplastic,,https://www.dermaamin.com/site/images/clinical...,httpwwwdermaamincomsiteimagesclinicalpicIirrit...,Images/6e61009e6c362a51c8000d4fc14c8bca.jpg,0,0,0,6,2
12805,14470,aae474e9c797f5bd5cd7956b479e6622,5,2,basal cell carcinoma,malignant epidermal,malignant,,http://atlasdermatologico.com.br/img?imageId=675,httpwwwatlasdermatologicocombrimgimageId675.jpg,Images/aae474e9c797f5bd5cd7956b479e6622.jpg,1,4,1,8,1
12806,12181,b06d3a806581e6188a42d140100d376e,3,4,papilomatosis confluentes and reticulate,inflammatory,non-neoplastic,1 Diagnostic,https://www.dermaamin.com/site/images/clinical...,httpwwwdermaamincomsiteimagesclinicalpicppapil...,Images/b06d3a806581e6188a42d140100d376e.jpg,0,2,0,71,2
12807,3727,bd49fd2eede16f45b49f08e72628caa4,1,2,scleroderma,inflammatory,non-neoplastic,,https://www.dermaamin.com/site/images/clinical...,httpwwwdermaamincomsiteimagesclinicalpicpprogr...,Images/bd49fd2eede16f45b49f08e72628caa4.jpg,0,0,0,93,2
