In [None]:
import pandas as pd
import numpy as np
import os
import sys
import matplotlib.pyplot as plt
import utils

In [None]:
def load_data(path):
    df_summary = pd.read_csv("./medical/data_summary.csv")
    df_summary["filename"] = df_summary["filename"].apply(lambda x: x.split(".")[0])
    df_summary.head()

    FOLDER_PATH = path


    IMAGE_EXTENSIONS = ['.png', '.jpg', '.jpeg']
    data_list = []

    for filename in os.listdir(FOLDER_PATH):
        if filename.endswith('.npz'):
            file_stem = os.path.splitext(filename)[0]
            npz_path = os.path.join(FOLDER_PATH, filename)
            npz_data = np.load(npz_path, allow_pickle=True)
            id = file_stem.split("_")[-1]


            data_entry = {
                "filename": file_stem,
                "slo_fundus": npz_data["slo_fundus"],
            }

            data_list.append(data_entry)

    print(f"Loaded {len(data_list)} entries successfully.")
    df = pd.DataFrame(data_list)
    df = pd.merge(df, df_summary, on='filename', how='inner').drop(columns = ["use"])
    return df

df_train = load_data("./medical/Training")
df_test = load_data("./medical/Test")
df_val = load_data("./medical/Validation")

# Analyse de la distribution 

In [None]:
import plotly.graph_objs as go
from plotly.subplots import make_subplots

def plot_distributions(df_train, df_test, df_val):
    df_train['age_rounded'] = df_train['age'].round()
    df_test['age_rounded'] = df_test['age'].round()
    df_val['age_rounded'] = df_val['age'].round()

    fig = make_subplots(
        rows=7, cols=3,
        subplot_titles=[
            "Race Distribution (Train)", "Race Distribution (Test)", "Race Distribution (Val)",
            "Gender Distribution (Train)", "Gender Distribution (Test)", "Gender Distribution (Val)",
            "Age Distribution (Train)", "Age Distribution (Test)", "Age Distribution (Val)",
            "Ethnicity Distribution (Train)", "Ethnicity Distribution (Test)", "Ethnicity Distribution (Val)",
            "Language Distribution (Train)", "Language Distribution (Test)", "Language Distribution (Val)",
            "Marital Status Distribution (Train)", "Marital Status Distribution (Test)", "Marital Status Distribution (Val)",
            "Healthy vs Sick (Train)", "Healthy vs Sick (Test)", "Healthy vs Sick (Val)"
        ],
        specs=[
            [{"type": "pie"}, {"type": "pie"}, {"type": "pie"}],
            [{"type": "pie"}, {"type": "pie"}, {"type": "pie"}],
            [{"type": "scatter"}, {"type": "scatter"}, {"type": "scatter"}],
            [{"type": "pie"}, {"type": "pie"}, {"type": "pie"}],
            [{"type": "pie"}, {"type": "pie"}, {"type": "pie"}],
            [{"type": "pie"}, {"type": "pie"}, {"type": "pie"}],
            [{"type": "pie"}, {"type": "pie"}, {"type": "pie"}]
        ]
    )

    for i, (df, title) in enumerate(zip([df_train, df_test, df_val], ["Train", "Test", "Val"])):
        race_counts = df['race'].value_counts()
        fig.add_trace(go.Pie(labels=race_counts.index, values=race_counts.values, name=f"Race {title}"), row=1, col=i + 1)

        gender_counts = df['gender'].value_counts()
        fig.add_trace(go.Pie(labels=gender_counts.index, values=gender_counts.values, name=f"Gender {title}"), row=2, col=i + 1)

        age_counts = df['age_rounded'].value_counts().sort_index()
        fig.add_trace(go.Scatter(x=age_counts.index, y=age_counts.values, mode='lines+markers', name=f"Age {title}"), row=3, col=i + 1)

        ethnicity_counts = df['ethnicity'].value_counts()
        fig.add_trace(go.Pie(labels=ethnicity_counts.index, values=ethnicity_counts.values, name=f"Ethnicity {title}"), row=4, col=i + 1)

        language_counts = df['language'].value_counts()
        fig.add_trace(go.Pie(labels=language_counts.index, values=language_counts.values, name=f"Language {title}"), row=5, col=i + 1)

        marital_status_counts = df['maritalstatus'].value_counts()
        fig.add_trace(go.Pie(labels=marital_status_counts.index, values=marital_status_counts.values, name=f"Marital Status {title}"), row=6, col=i + 1)

        glaucoma_counts = df['glaucoma'].value_counts()
        fig.add_trace(go.Pie(labels=glaucoma_counts.index, values=glaucoma_counts.values, name=f"Healthy vs Sick {title}"), row=7, col=i + 1)

    fig.update_layout(
        title="Comparison of Distributions Across Datasets",
        height=2100,  # Adjusted height for 7 rows
        width=1200,
        showlegend=False
    )

    fig.show()

plot_distributions(df_train, df_test, df_val)

In [None]:
df_combined = pd.concat([df_train, df_test, df_val], axis=0, ignore_index=True)
df_combined['dataset'] = ['Train'] * len(df_train) + ['Test'] * len(df_test) + ['Val'] * len(df_val)
df_combined.head()

In [None]:
from aif360.sklearn.metrics import *


def get_group_metrics(
    y_true,
    y_pred=None,
    prot_attr=None,
    priv_group=1,
    pos_label=1,
    sample_weight=None,
):
    group_metrics = {}
    group_metrics["base_rate"] = base_rate(
        y_true=y_true, pos_label=pos_label, sample_weight=sample_weight
    )
    group_metrics["statistical_parity_difference"] = statistical_parity_difference(
        y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, priv_group=priv_group, pos_label=pos_label, sample_weight=sample_weight
    )
    group_metrics["disparate_impact_ratio"] = disparate_impact_ratio(
        y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, priv_group=priv_group, pos_label=pos_label, sample_weight=sample_weight
    )
    if not y_pred is None:
        group_metrics["equal_opportunity_difference"] = equal_opportunity_difference(
            y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, priv_group=priv_group, pos_label=pos_label, sample_weight=sample_weight
        )
        group_metrics["average_odds_difference"] = average_odds_difference(
            y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, priv_group=priv_group, pos_label=pos_label, sample_weight=sample_weight
        )
        group_metrics["conditional_demographic_disparity"] = conditional_demographic_disparity(
            y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, pos_label=pos_label, sample_weight=sample_weight
        )
        group_metrics["smoothed_edf"] = smoothed_edf(
        y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, pos_label=pos_label, sample_weight=sample_weight
        )
        group_metrics["df_bias_amplification"] = df_bias_amplification(
        y_true=y_true, y_pred=y_pred, prot_attr=prot_attr, pos_label=pos_label, sample_weight=sample_weight
        )
    return group_metrics




ideal_values = {
    "statistical_parity_difference": 0.0,
    "disparate_impact_ratio": 1.0,
    "equal_opportunity_difference": 0.0,
    "average_odds_difference": 0.0,
    "conditional_demographic_disparity": 0.0,
    "smoothed_edf": 1.0,
    "df_bias_amplification": 0.0
}



def check_distance_to_ideal(metrics):
    dist = 0.0
    for metric in ideal_values.keys():
        dist += (ideal_values[metric]-metrics[metric])**2
    return np.sqrt(dist)

In [None]:
from sklearn.preprocessing import LabelEncoder
from aif360.datasets import BinaryLabelDataset

df_test_numeric = df_test.drop(columns=['filename', 'slo_fundus', 'age', 'note', 'gpt4_summary'])

def convert_to_numerical_with_encodings(df):
    df_numeric = df.copy()
    encodings = {}
    for col in df_numeric.columns:
        if df_numeric[col].dtype == 'object':  # Check if the column is categorical
            le = LabelEncoder()
            df_numeric[col] = le.fit_transform(df_numeric[col])
            encodings[col] = dict(zip(le.classes_, le.transform(le.classes_)))  # Store the encoding
    return df_numeric, encodings


df_test_numeric, encodings = convert_to_numerical_with_encodings(df_test_numeric)


def create_binary_label_dataset(df, label_column, protected_attributes):
    return BinaryLabelDataset(
        df=df,
        label_names=[label_column],
        protected_attribute_names=protected_attributes
    )


label_column = 'glaucoma'  
protected_attributes = ['gender', 'race', 'ethnicity', 'language', 'maritalstatus']  


dataset = create_binary_label_dataset(df_test_numeric, label_column, protected_attributes)

In [None]:
encodings

In [None]:
priviliged_groups = [0,2,1,0,2]
protected_attributes = dataset.protected_attributes
y_true = dataset.labels


n_prot_attrs = protected_attributes.shape[1]
attribute_names = ['gender', 'race', 'ethnicity', 'language', 'maritalstatus']  

all_metrics = []

for i, attr_name in enumerate(attribute_names):
    prot_attr = protected_attributes[:, i]
    priv_group = priviliged_groups[i]
    
    try:
        metrics = get_group_metrics(
            y_true=y_true,
            y_pred=None,  
            prot_attr=prot_attr,
            priv_group=priv_group,
            pos_label=1
        )
        metrics['attribute'] = attr_name
        all_metrics.append(metrics)
    except Exception as e:
        print(f"[{attr_name}] Error computing metrics: {e}")


In [None]:
df_metrics = pd.DataFrame(all_metrics)
df_melted = df_metrics.melt(id_vars='attribute', var_name='metric', value_name='value')
df_melted = df_melted.dropna(subset=['value'])
fig = go.Figure()

for metric_name in df_melted['metric'].unique():
    if metric_name == 'attribute':
        continue
    df_metric = df_melted[df_melted['metric'] == metric_name]
    fig.add_trace(go.Bar(
        x=df_metric['attribute'],
        y=df_metric['value'],
        name=metric_name
    ))

fig.update_layout(
    barmode='group',
    title='Fairness Metrics by Protected Attribute',
    xaxis_title='Protected Attribute',
    yaxis_title='Metric Value',
    legend_title='Metric'
)

fig.show()


Loaded test set with predictions and prepared for fairness analysis

In [None]:
protected_attributes =  ['gender', 'race', 'ethnicity', 'language', 'maritalstatus']  

def compute_fairness_metrics_from_csv(
    csv_path,
    label_column,
    protected_attributes,
    attribute_names,
    priviliged_groups
):
    df = pd.read_csv(csv_path)
    df['filename'] = df['filename'].apply(lambda x: x.split(".")[0])
    # Drop columns if they exist
    drop_cols = [col for col in ['slo_fundus', 'age', 'note', 'gpt4_summary'] if col in df.columns]
    df = df.drop(columns=drop_cols)
    df_numeric, _ = convert_to_numerical_with_encodings(df)
    dataset = create_binary_label_dataset(df_numeric, label_column, protected_attributes)
    protected_attrs = dataset.protected_attributes
    y_true = dataset.labels
    y_pred = df_numeric['pred'].values

    all_metrics = []
    for i, attr_name in enumerate(protected_attributes):
        prot_attr = protected_attrs[:, i]
        priv_group = priviliged_groups[i]
        try:
            metrics = get_group_metrics(
                y_true=y_true,
                y_pred=y_pred,
                prot_attr=prot_attr,
                priv_group=priv_group,
                pos_label=1
            )
            metrics['attribute'] = attr_name
            all_metrics.append(metrics)
        except Exception as e:
            print(f"[{attr_name}] Error computing metrics: {e}")
    return pd.DataFrame(all_metrics)

In [None]:
df_metrics_preds = compute_fairness_metrics_from_csv(
    "./medical/df_test_with_preds.csv",
    label_column,
    protected_attributes,
    attribute_names,
    priviliged_groups
)
df_melted_preds = df_metrics_preds.melt(id_vars='attribute', var_name='metric', value_name='value')
df_melted_preds = df_melted_preds.dropna(subset=['value'])
fig_preds = go.Figure()
for metric_name in df_melted_preds['metric'].unique():
    if metric_name == 'attribute':
        continue
    df_metric = df_melted_preds[df_melted_preds['metric'] == metric_name]
    fig_preds.add_trace(go.Bar(
        x=df_metric['attribute'],
        y=df_metric['value'],
        name=metric_name
    ))
fig_preds.update_layout(
    barmode='group',
    title='Fairness Metrics by Protected Attribute (with Predictions)',
    xaxis_title='Protected Attribute',
    yaxis_title='Metric Value',
    legend_title='Metric',
    width=1200,
    height=700
)
fig_preds.show()

In [None]:
import plotly.colors

common_metrics = set(df_melted['metric']).intersection(set(df_melted_preds['metric']))
df_before = df_melted[df_melted['metric'].isin(common_metrics)]
df_after = df_melted_preds[df_melted_preds['metric'].isin(common_metrics)]

metric_names = list(common_metrics)
palette = plotly.colors.qualitative.Plotly  # or use another palette if you prefer
color_map = {metric: palette[i % len(palette)] for i, metric in enumerate(metric_names)}

fig_compare = make_subplots(
    rows=1, cols=2,
    subplot_titles=["Fairness Metrics Before Predictions", "Fairness Metrics After Predictions"],
    shared_yaxes=True
)

for metric in metric_names:
    df_b = df_before[df_before['metric'] == metric]
    df_a = df_after[df_after['metric'] == metric]
    color = color_map[metric]
    fig_compare.add_trace(
        go.Bar(
            x=df_b['attribute'],
            y=df_b['value'],
            name=metric,
            legendgroup=metric,
            showlegend=True,
            marker_color=color
        ),
        row=1, col=1
    )
    fig_compare.add_trace(
        go.Bar(
            x=df_a['attribute'],
            y=df_a['value'],
            name=metric,
            legendgroup=metric,
            showlegend=False,
            marker_color=color
        ),
        row=1, col=2
    )

fig_compare.update_layout(
    height=500,
    width=1100,
    barmode='group',
    title_text="Comparison of Fairness Metrics Before and After Predictions",
    legend_title="Metric"
)
fig_compare.show()

loading the fine tunned version

In [None]:

df_after = df_metrics_preds.melt(id_vars='attribute', var_name='metric', value_name='value')
df_after = df_after.dropna(subset=['value'])

df_metrics_fine_tuned = compute_fairness_metrics_from_csv(
    "./medical/df_fine_tuned_preds.csv",
    label_column,
    protected_attributes,
    attribute_names,
    priviliged_groups
)
df_fine_tuned = df_metrics_fine_tuned.melt(id_vars='attribute', var_name='metric', value_name='value')
df_fine_tuned = df_fine_tuned.dropna(subset=['value'])

common_metrics = sorted(list(set(df_after['metric']).intersection(df_fine_tuned['metric'])))
metric_names = list(common_metrics)
palette = plotly.colors.qualitative.Plotly
color_map = {metric: palette[i % len(palette)] for i, metric in enumerate(metric_names)}


fig_compare = make_subplots(
    rows=1, cols=2,
    subplot_titles=[
        "Fairness Metrics After Predictions",
        "Fairness Metrics After Fine-Tuning"
    ],
    shared_yaxes=True
)

for metric in metric_names:
    df_a = df_after[df_after['metric'] == metric]
    df_f = df_fine_tuned[df_fine_tuned['metric'] == metric]
    color = color_map[metric]
    fig_compare.add_trace(
        go.Bar(
            x=df_a['attribute'],
            y=df_a['value'],
            name=metric,
            legendgroup=metric,
            showlegend=True,
            marker_color=color
        ),
        row=1, col=1
    )
    fig_compare.add_trace(
        go.Bar(
            x=df_f['attribute'],
            y=df_f['value'],
            name=metric,
            legendgroup=metric,
            showlegend=False,
            marker_color=color
        ),
        row=1, col=2
    )

fig_compare.update_layout(
    height=500,
    width=1200,
    barmode='group',
    title_text="Comparison of Fairness Metrics: After Predictions vs. After Fine-Tuning",
    legend_title="Metric"
)
fig_compare.show()