#### Imports & Paths

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

ROOT = Path.cwd().parents[0] if (Path.cwd() / 'scripts').exists() else Path.cwd()
# fallback for notebooks
ROOT = Path(__file__).resolve().parents[3] if '__file__' in globals() else Path.cwd().parents[1] / "fairvoice"

EVAL_DIR = ROOT / "evaluation_results"
CHART_DIR = EVAL_DIR / "bias_charts"
CHART_DIR.mkdir(parents=True, exist_ok=True)

PRED_FP = EVAL_DIR / "predictions_test.csv"
META_FP = ROOT / "data" / "processed" / "metadata_test.csv"

print("Using:", PRED_FP)
print("Using:", META_FP)


Using: /Users/pc/Desktop/CODING/Others/fairvoice/evaluation_results/predictions_test.csv
Using: /Users/pc/Desktop/CODING/Others/fairvoice/data/processed/metadata_test.csv


#### Load Predictions + Metadata

In [14]:
pred = pd.read_csv(PRED_FP)
meta = pd.read_csv(META_FP)

print(pred.head())
print(meta.head())


              file  true_idx  pred_idx true pred  Stimulus_Number  ActorID  \
0  1006_IEO_NEU_XX         4         5  NEU  SAD              410     1006   
1  1006_IEO_HAP_LO         3         4  HAP  NEU              411     1006   
2  1006_IEO_HAP_MD         3         3  HAP  HAP              412     1006   
3  1006_IEO_HAP_HI         3         3  HAP  HAP              413     1006   
4  1006_IEO_SAD_LO         5         5  SAD  SAD              414     1006   

   Age     Sex       Race     Ethnicity  \
0   58  Female  Caucasian  Not Hispanic   
1   58  Female  Caucasian  Not Hispanic   
2   58  Female  Caucasian  Not Hispanic   
3   58  Female  Caucasian  Not Hispanic   
4   58  Female  Caucasian  Not Hispanic   

                                          audio_path  \
0  /Users/pc/Desktop/CODING/Others/fairvoice/data...   
1  /Users/pc/Desktop/CODING/Others/fairvoice/data...   
2  /Users/pc/Desktop/CODING/Others/fairvoice/data...   
3  /Users/pc/Desktop/CODING/Others/fairvoice/dat

#### Merge Predictions & Metadata

In [15]:
meta['file_stem'] = meta['file'].astype(str)
pred['file_stem'] = pred['file_x'].astype(str) if 'file_x' in pred.columns else pred['file'].astype(str)

df = pred.merge(meta, on='file_stem', how='left')

print("Merged columns:", df.columns.tolist())
df.head()


Merged columns: ['file_x', 'true_idx', 'pred_idx', 'true', 'pred', 'Stimulus_Number_x', 'ActorID_x', 'Age_x', 'Sex_x', 'Race_x', 'Ethnicity_x', 'audio_path_x', 'clean_path_x', 'emotion_x', 'demo_x', 'feature_exists_x', 'split_x', 'file_stem', 'Stimulus_Number_y', 'file_y', 'ActorID_y', 'Age_y', 'Sex_y', 'Race_y', 'Ethnicity_y', 'audio_path_y', 'clean_path_y', 'emotion_y', 'demo_y', 'feature_exists_y', 'split_y']


Unnamed: 0,file_x,true_idx,pred_idx,true,pred,Stimulus_Number_x,ActorID_x,Age_x,Sex_x,Race_x,...,Age_y,Sex_y,Race_y,Ethnicity_y,audio_path_y,clean_path_y,emotion_y,demo_y,feature_exists_y,split_y
0,1006_IEO_NEU_XX,4,5,NEU,SAD,410,1006,58,Female,Caucasian,...,58,Female,Caucasian,Not Hispanic,/Users/pc/Desktop/CODING/Others/fairvoice/data...,/Users/pc/Desktop/CODING/Others/fairvoice/data...,NEU,Female__Caucasian,True,test
1,1006_IEO_HAP_LO,3,4,HAP,NEU,411,1006,58,Female,Caucasian,...,58,Female,Caucasian,Not Hispanic,/Users/pc/Desktop/CODING/Others/fairvoice/data...,/Users/pc/Desktop/CODING/Others/fairvoice/data...,HAP,Female__Caucasian,True,test
2,1006_IEO_HAP_MD,3,3,HAP,HAP,412,1006,58,Female,Caucasian,...,58,Female,Caucasian,Not Hispanic,/Users/pc/Desktop/CODING/Others/fairvoice/data...,/Users/pc/Desktop/CODING/Others/fairvoice/data...,HAP,Female__Caucasian,True,test
3,1006_IEO_HAP_HI,3,3,HAP,HAP,413,1006,58,Female,Caucasian,...,58,Female,Caucasian,Not Hispanic,/Users/pc/Desktop/CODING/Others/fairvoice/data...,/Users/pc/Desktop/CODING/Others/fairvoice/data...,HAP,Female__Caucasian,True,test
4,1006_IEO_SAD_LO,5,5,SAD,SAD,414,1006,58,Female,Caucasian,...,58,Female,Caucasian,Not Hispanic,/Users/pc/Desktop/CODING/Others/fairvoice/data...,/Users/pc/Desktop/CODING/Others/fairvoice/data...,SAD,Female__Caucasian,True,test


#### Helper Functions (Metrics + Column Detection + Plots)

In [22]:
def detect_column(df, candidates):
    """
    Returns the first column in df matching any candidate name (case-insensitive).
    Handles _x/_y suffixes.
    """
    for base in candidates:
        bl = base.lower()
        for col in df.columns:
            if col.lower().startswith(bl):
                return col
    return None


def acc_f1_by_group(df, group_col):
    groups = []
    for g in df[group_col].unique():
        subset = df[df[group_col] == g]
        acc = accuracy_score(subset['true'], subset['pred'])
        f1  = f1_score(subset['true'], subset['pred'], average='weighted')
        groups.append([g, acc, f1])
    return pd.DataFrame(groups, columns=[group_col, 'accuracy', 'f1'])


def per_class_tpr_fpr(df, group_col, classes):
    rows = []
    for g in df[group_col].unique():
        df_g = df[df[group_col] == g]
        for cls in classes:
            tp = np.sum((df_g['true']==cls) & (df_g['pred']==cls))
            fn = np.sum((df_g['true']==cls) & (df_g['pred']!=cls))
            fp = np.sum((df_g['true']!=cls) & (df_g['pred']==cls))
            tn = np.sum((df_g['true']!=cls) & (df_g['pred']!=cls))

            tpr = tp / (tp + fn + 1e-6)
            fpr = fp / (fp + tn + 1e-6)

            rows.append([g, cls, tpr, fpr])

    return pd.DataFrame(rows, columns=[group_col, 'class', 'TPR', 'FPR'])


def plot_group_metrics(df_metrics, group_col, save_path):
    plt.figure(figsize=(8,5))
    plt.bar(df_metrics[group_col].astype(str), df_metrics['accuracy'])
    plt.xlabel(group_col)
    plt.ylabel('Accuracy')
    plt.title(f"Accuracy by {group_col}")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()


def plot_tpr_fpr(df_tprfpr, group_col, save_path):
    # Convert Path to string
    save_path = str(save_path)

    # ---------------- TPR Plot ----------------
    plt.figure(figsize=(10,5))
    for g in df_tprfpr[group_col].unique():
        subset = df_tprfpr[df_tprfpr[group_col] == g]
        plt.plot(subset['class'], subset['TPR'], marker='o', label=f"{g}")
    plt.title(f"TPR by Class — grouped by {group_col}")
    plt.xlabel("Class")
    plt.ylabel("TPR")
    plt.legend()
    plt.tight_layout()
    plt.savefig(save_path.replace(".png", "_TPR.png"))
    plt.close()

    # ---------------- FPR Plot ----------------
    plt.figure(figsize=(10,5))
    for g in df_tprfpr[group_col].unique():
        subset = df_tprfpr[df_tprfpr[group_col] == g]
        plt.plot(subset['class'], subset['FPR'], marker='o', label=f"{g}")
    plt.title(f"FPR by Class — grouped by {group_col}")
    plt.xlabel("Class")
    plt.ylabel("FPR")
    plt.legend()
    plt.tight_layout()
    plt.savefig(save_path.replace(".png", "_FPR.png"))
    plt.close()


#### Prepare true and pred Columns

In [23]:
df['true'] = df['true_idx'] if 'true_idx' in df.columns else df['true']
df['pred'] = df['pred_idx'] if 'pred_idx' in df.columns else df['pred']

df[['true','pred']].head()


Unnamed: 0,true,pred
0,4,5
1,3,4
2,3,3
3,3,3
4,5,5


#### SECTION A: GENDER BIAS ANALYSIS

In [24]:
gcol = detect_column(df, ['Sex','sex','Gender','gender'])
print("Detected gender column:", gcol)


Detected gender column: Sex_x


In [25]:
gender_metrics = acc_f1_by_group(df, gcol)
gender_metrics


Unnamed: 0,Sex_x,accuracy,f1
0,Female,0.450617,0.430476
1,Male,0.383275,0.340962


In [26]:
gender_metrics.to_csv(CHART_DIR / f"accuracy_by_{gcol}.csv", index=False)
plot_group_metrics(gender_metrics, gcol, CHART_DIR / f"accuracy_by_{gcol}.png")


In [27]:
gender_tprfpr = per_class_tpr_fpr(df, gcol, sorted(df['true'].unique()))
gender_tprfpr.to_csv(CHART_DIR / f"tprfpr_by_{gcol}.csv", index=False)

plot_tpr_fpr(gender_tprfpr, gcol, CHART_DIR / f"tprfpr_by_{gcol}.png")
gender_tprfpr.head()


Unnamed: 0,Sex_x,class,TPR,FPR
0,Female,0,0.60241,0.032258
1,Female,1,0.301205,0.076923
2,Female,2,0.13253,0.052109
3,Female,3,0.638554,0.188586
4,Female,4,0.28169,0.074699


#### SECTION B: RACE BIAS ANALYSIS

In [28]:
rcol = detect_column(df, ['Race','race'])
print("Detected race column:", rcol)


Detected race column: Race_x


In [29]:
race_metrics = acc_f1_by_group(df, rcol)
race_metrics.to_csv(CHART_DIR / f"accuracy_by_{rcol}.csv", index=False)
plot_group_metrics(race_metrics, rcol, CHART_DIR / f"accuracy_by_{rcol}.png")

race_metrics


Unnamed: 0,Race_x,accuracy,f1
0,Caucasian,0.419399,0.398357
1,African American,0.426829,0.388672
2,Asian,0.329268,0.253682


In [30]:
race_tprfpr = per_class_tpr_fpr(df, rcol, sorted(df['true'].unique()))
race_tprfpr.to_csv(CHART_DIR / f"tprfpr_by_{rcol}.csv", index=False)

plot_tpr_fpr(race_tprfpr, rcol, CHART_DIR / f"tprfpr_by_{rcol}.png")

race_tprfpr.head()


Unnamed: 0,Race_x,class,TPR,FPR
0,Caucasian,0,0.664,0.024712
1,Caucasian,1,0.2,0.06425
2,Caucasian,2,0.112,0.037891
3,Caucasian,3,0.488,0.16804
4,Caucasian,4,0.28972,0.1216


#### SECTION C: ETHNICITY BIAS ANALYSIS

In [31]:
ecol = detect_column(df, ['Ethnicity','ethnicity'])
print("Detected ethnicity column:", ecol)


Detected ethnicity column: Ethnicity_x


In [32]:
eth_metrics = acc_f1_by_group(df, ecol)
eth_metrics.to_csv(CHART_DIR / f"accuracy_by_{ecol}.csv", index=False)
plot_group_metrics(eth_metrics, ecol, CHART_DIR / f"accuracy_by_{ecol}.png")

eth_metrics


Unnamed: 0,Ethnicity_x,accuracy,f1
0,Not Hispanic,0.402902,0.374764
1,Hispanic,0.47561,0.43372


In [33]:
eth_tprfpr = per_class_tpr_fpr(df, ecol, sorted(df['true'].unique()))
eth_tprfpr.to_csv(CHART_DIR / f"tprfpr_by_{ecol}.csv", index=False)

plot_tpr_fpr(eth_tprfpr, ecol, CHART_DIR / f"tprfpr_by_{ecol}.png")

eth_tprfpr.head()


Unnamed: 0,Ethnicity_x,class,TPR,FPR
0,Not Hispanic,0,0.679739,0.065949
1,Not Hispanic,1,0.169935,0.060565
2,Not Hispanic,2,0.098039,0.034993
3,Not Hispanic,3,0.392157,0.145357
4,Not Hispanic,4,0.358779,0.142484


#### SECTION D: AGE BIAS ANALYSIS

In [35]:
acol = detect_column(df, ['Age','age'])
print("Detected age column:", acol)


Detected age column: Age_x


In [37]:
df['Age_Group'] = pd.cut(
    df[acol],
    bins=[0, 25, 40, 60, 100],
    labels=['Youth','Adult','Middle','Senior']
)

df[['Age_x', 'Age_Group']].head()


Unnamed: 0,Age_x,Age_Group
0,58,Middle
1,58,Middle
2,58,Middle
3,58,Middle
4,58,Middle


In [38]:
age_metrics = acc_f1_by_group(df, 'Age_Group')
age_metrics.to_csv(CHART_DIR / "accuracy_by_age_group.csv", index=False)
plot_group_metrics(age_metrics, "Age_Group", CHART_DIR / "accuracy_by_age_group.png")

age_metrics


Unnamed: 0,Age_Group,accuracy,f1
0,Middle,0.386831,0.358796
1,Youth,0.432927,0.401886
2,Adult,0.463415,0.45211


In [39]:
age_tprfpr = per_class_tpr_fpr(df, 'Age_Group', sorted(df['true'].unique()))
age_tprfpr.to_csv(CHART_DIR / "tprfpr_by_age_group.csv", index=False)

plot_tpr_fpr(age_tprfpr, 'Age_Group', CHART_DIR / "tprfpr_by_age_group.png")

age_tprfpr.head()


Unnamed: 0,Age_Group,class,TPR,FPR
0,Middle,0,0.638554,0.111663
1,Middle,1,0.180723,0.059553
2,Middle,2,0.108434,0.039702
3,Middle,3,0.373494,0.129032
4,Middle,4,0.309859,0.081928
