In [27]:
import pandas as pd
import numpy as np
import polars as pl
import random
import joblib
import pathlib

from sklearn.metrics import roc_auc_score, classification_report

# Display options (optional)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

random.seed(42)
np.random.seed(42)

## Step 1 — Load artifacts

In [28]:
# Load path and get all the required files
def get_paths(base="fairness_artifacts"):
    base = pathlib.Path(base)
    return {
        "X_train": base / "X_train.parquet",
        "X_test": base / "X_test.parquet",
        "y_train": base / "y_train.parquet",
        "y_test": base / "y_test.parquet",
        "df_meta": base / "df_meta.parquet",
        "ids_test": base / "ids_test.parquet",
        "model": base / "final_lightgbm_model.pkl",
    }


def load_artifacts(base="fairness_artifacts"):
    paths = get_paths(base)

    X_train = pd.read_parquet(paths["X_train"])
    X_test = pd.read_parquet(paths["X_test"])
    y_train = pd.read_parquet(paths["y_train"])["hospitalized"]
    y_test = pd.read_parquet(paths["y_test"])["hospitalized"]
    df_meta = pd.read_parquet(paths["df_meta"])
    ids_test = pd.read_parquet(paths["ids_test"])["person_id"]
    model = joblib.load(paths["model"])

    return X_train, X_test, y_train, y_test, df_meta, ids_test, model

In [29]:
X_train, X_test, y_train, y_test, df_meta, ids_test, final_lgb = load_artifacts()

In [30]:
print("Shape:")
print("X Train:", X_train.shape)
print("X Test:", X_test.shape)
print("y Train:", y_train.shape)
print("y Test:", y_test.shape)
print("ids_test:", ids_test.shape)
print("df_meta:", df_meta.shape)

Shape:
X Train: (15135, 65)
X Test: (3784, 65)
y Train: (15135,)
y Test: (3784,)
ids_test: (3784,)
df_meta: (18919, 10)


## Step 2 — Build base evaluation table

For the fairness playground, I need a clean table that captures what the model actually did on the test set. Thus, I will now build a base evaluation table on the test set that includes person_id, the true label, the predicted probability, and the final 0/1 prediction. This is the core model-output layer I’ll use when joining with demographics and computing group-wise fairness metrics.

In [31]:
def make_base_eval_table(X_test, y_test, ids_test, model, threshold: float = 0.5):
    """
    Build a base evaluation table on the test set.
    """
    proba = model.predict_proba(X_test)[:, 1]
    y_pred = (proba >= threshold).astype(int)

    df_eval = pd.DataFrame(
        {
            "person_id": ids_test.values,
            "y_true": y_test.values,
            "y_pred_proba": proba,
            "y_pred": y_pred,
        }
    )
    return df_eval

In [32]:
df_eval = make_base_eval_table(X_test, y_test, ids_test, final_lgb)
df_eval.head()

Unnamed: 0,person_id,y_true,y_pred_proba,y_pred
0,2795536102,0,0.000125,0
1,2796668104,0,0.000609,0
2,2815945102,0,0.000196,0
3,2813232102,0,0.002306,0
4,2810968102,0,0.000353,0


### Step 3 — Merge with demographics

In [33]:
def merge_eval_and_meta(df_eval, df_meta):
    df_merged = df_eval.merge(df_meta, on="person_id", how="left")
    return df_merged

In [None]:
# merge model outputs with demographics
df_fair = merge_eval_and_meta(df_eval, df_meta)
print(df_fair.head())

    person_id  y_true  y_pred_proba  y_pred  age  sex  race_ethnicity  \
0  2795536102       0      0.000125       0   44    1               2   
1  2796668104       0      0.000609       0    4    1               1   
2  2815945102       0      0.000196       0   60    2               1   
3  2813232102       0      0.002306       0   59    1               2   
4  2810968102       0      0.000353       0   30    1               2   

   hispanic  poverty_category  insurance_coverage  family_income  \
0         2                 4                   1         142202   
1         1                 1                   2              0   
2         1                 4                   3          64010   
3         2                 5                   1         335489   
4         2                 4                   3          47840   

   self_rated_health  self_rated_mental_health  
0                2.0                       2.0  
1                4.0                       3.0  
2    

### Step 4: Recoding demographic columns with human-readable labels

The demographic variables in MEPS are all encoded as numbers, so before I can compute fairness metrics or show any results in the playground, I need them in human-readable form. This step converts all the MEPS-coded fields — sex, race/ethnicity, Hispanic status, poverty category, insurance coverage, and self-rated health — into clear labels. This makes the fairness results interpretable and avoids exposing raw codes in the playground.

In [35]:
# Recode sex
def recode_sex(df):
    sex_map = {
        1: "Male",
        2: "Female",
    }
    df["sex"] = df["sex"].map(sex_map)
    return df

In [36]:
# Recode Race/Ethnicity (RACETHX)
def recode_race_ethnicity(df):
    race_map = {
        1: "Hispanic",
        2: "White",
        3: "Black",
        4: "Asian",
        5: "Other OR Multiple",
    }
    df["race_ethnicity"] = df["race_ethnicity"].map(race_map)
    return df

In [37]:
# Recode Hispanic Flag (HISPANX)
def recode_hispanic(df):
    hisp_map = {
        1: "Hispanic",
        2: "Not Hispanic",
    }
    df["hispanic"] = df["hispanic"].map(hisp_map)
    return df

In [38]:
# Poverty Category (POVCAT23)
def recode_poverty(df):
    pov_map = {
        1: "Poor OR negative",
        2: "low income",
        3: "Middle income",
        4: "High income",
        5: "Unclassifiable",
    }
    df["poverty_category"] = df["poverty_category"].map(pov_map)
    return df

In [39]:
# Insurance Coverage (INSCOV23)
def recode_insurance(df):
    ins_map = {
        1: "Any private",
        2: "Public only",
        3: "Uninsured",
    }
    df["insurance_coverage"] = df["insurance_coverage"].map(ins_map)
    return df

In [40]:
# Recode Self-Rated Health (RTHLTH53)
def recode_self_rated_health(df):
    health_map = {
        1: "Excellent",
        2: "Very good",
        3: "Good",
        4: "Fair",
        5: "Poor",
    }

    df["self_rated_health"] = (
        df["self_rated_health"].round().astype("Int64").map(health_map)
    )
    return df

In [41]:
# Recode Self-Rated Mental Health (MNHLTH53)
def recode_self_rated_mental(df):
    mental_map = {
        1: "Excellent",
        2: "Very good",
        3: "Good",
        4: "Fair",
        5: "Poor",
    }

    df["self_rated_mental_health"] = (
        df["self_rated_mental_health"].round().astype("Int64").map(mental_map)
    )
    return df

In [42]:
# Apply all recodings
def apply_all_recodings(df):
    return (
        df.pipe(recode_sex)
        .pipe(recode_race_ethnicity)
        .pipe(recode_hispanic)
        .pipe(recode_poverty)
        .pipe(recode_insurance)
        .pipe(recode_self_rated_health)
        .pipe(recode_self_rated_mental)
    )

In [43]:
df = apply_all_recodings(df_fair)
print(df_fair.head())

    person_id  y_true  y_pred_proba  y_pred  age     sex race_ethnicity  \
0  2795536102       0      0.000125       0   44    Male          White   
1  2796668104       0      0.000609       0    4    Male       Hispanic   
2  2815945102       0      0.000196       0   60  Female       Hispanic   
3  2813232102       0      0.002306       0   59    Male          White   
4  2810968102       0      0.000353       0   30    Male          White   

       hispanic  poverty_category insurance_coverage  family_income  \
0  Not Hispanic       High income        Any private         142202   
1      Hispanic  Poor OR negative        Public only              0   
2      Hispanic       High income          Uninsured          64010   
3  Not Hispanic    Unclassifiable        Any private         335489   
4  Not Hispanic       High income          Uninsured          47840   

  self_rated_health self_rated_mental_health  
0         Very good                Very good  
1              Fair         

### Step 5: Group level fairness Metrics

Now that the fairness dataset is fully assembled, the next step is to compute standard group-level performance metrics. This helps quantify how the model behaves for different demographic groups. For example, I can compare recall, false negative rate, or positive prediction rate across race, sex, or income levels. These metrics form the backbone of the fairness playground and make it easy to see where the model treats groups differently.

In [44]:
from sklearn.metrics import accuracy_score, precision_score, recall_score


def compute_group_metrics(df, group_col):
    """
    Compute standard model performance metrics for each demographic group.
    These metrics feed the fairness playground (tables + plots).
    """
    results = []

    for group, g in df.groupby(group_col):
        y_true = g["y_true"]
        y_pred = g["y_pred"]

        # Core metrics
        acc = accuracy_score(y_true, y_pred)
        prec = precision_score(y_true, y_pred, zero_division=0)
        rec = recall_score(y_true, y_pred, zero_division=0)

        # Derived metrics
        fnr = 1 - rec
        fpr = ((y_pred.eq(1) & y_true.eq(0)).sum()) / max((y_true == 0).sum(), 1)
        pos_rate = y_pred.mean()

        avg_proba = g["y_pred_proba"].mean()

        results.append(
            {
                group_col: group,
                "accuracy": acc,
                "precision": prec,
                "recall": rec,
                "false_negative_rate": fnr,
                "false_positive_rate": fpr,
                "positive_prediction_rate": pos_rate,
                "avg_pred_probability": avg_proba,
                "count": len(g),
            }
        )

    return pd.DataFrame(results)

In [45]:
race_metrics = compute_group_metrics(df_fair, "race_ethnicity")
sex_metrics = compute_group_metrics(df_fair, "sex")
poverty_metrics = compute_group_metrics(df_fair, "poverty_category")
insurance_metrics = compute_group_metrics(df_fair, "insurance_coverage")

### Step 6 - Disparity Gap Metrics


Group metrics tell me how the model performs within each demographic group, but they don’t directly show how far apart the groups are. In this step, I compute simple disparity metrics by picking a reference group and measuring gaps in key quantities like positive prediction rate, recall, and false negative rate. These gaps are what the fairness playground will display when highlighting where the model is more or less sensitive for different groups.

In [46]:
def compute_disparities(group_df, group_col, reference_group=None):
    """
    Take a group metrics table (from compute_group_metrics)
    and compute gaps vs a reference group.

    If reference_group is None, uses the group with the largest count.
    """
    df = group_df.copy()

    # Choose reference
    if reference_group is None:
        reference_group = df.sort_values("count", ascending=False)[group_col].iloc[0]

    ref_row = df[df[group_col] == reference_group].iloc[0]

    # Metrics we want gaps for
    gap_metrics = [
        "positive_prediction_rate",
        "recall",
        "precision",
        "false_negative_rate",
        "false_positive_rate",
    ]

    for m in gap_metrics:
        gap_col = f"{m}_gap_vs_ref"
        df[gap_col] = df[m] - ref_row[m]

    df["reference_group"] = reference_group
    return df

### Step 7 -The Explainaibility Layer

##

#### 7A. Global Explanations (Model-Level)

To make the model’s behavior more interpretable, I compute global SHAP values to understand which features drive predictions overall. This gives me a ranked list of factors that the model relies on the most, which will be displayed in the playground to give users a simple sense of what the model cares about at a high level.

##### Global SHAP values

In [47]:
import shap
import numpy as np


def compute_global_shap(model, X_test):
    """
    Compute SHAP values for the LightGBM model on the test set.
    """
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_test)[1]  # class 1 (hospitalized)
    return explainer, shap_values

In [48]:
explainer, shap_values_test = compute_global_shap(final_lgb, X_test)



##### Rank Global Importance

In [49]:
def global_shap_importance(shap_values, feature_names, top_n=10):
    mean_abs = np.abs(shap_values).mean(axis=0)

    df_imp = (
        pd.DataFrame({"feature": feature_names, "mean_abs_shap": mean_abs})
        .sort_values("mean_abs_shap", ascending=False)
        .head(top_n)
        .reset_index(drop=True)
    )
    return df_imp

In [50]:
global_importance = global_shap_importance(shap_values_test, X_test.columns)
global_importance

Unnamed: 0,feature,mean_abs_shap
0,age,0.094713
1,insurance_category_3,0.094713
2,insurance_category_5,0.094713
3,insurance_category_6,0.094713
4,insurance_category_7,0.094713
5,insurance_category_8,0.094713
6,region_1,0.094713
7,region_2,0.094713
8,region_3,0.094713
9,region_4,0.094713


##### 7B. Group Explanations (Fairness + Explainability)

To understand whether the model reasons differently for different groups, I compute SHAP feature importance separately within each demographic group. These group-specific explanations highlight whether certain features are more influential for one group than another, which makes it easier to see how model behavior varies across demographics.

In [51]:
def attach_shap_to_df(df_fair, shap_values, feature_names):
    shap_df = pd.DataFrame(shap_values, columns=feature_names)
    shap_df["person_id"] = df_fair["person_id"].values
    return df_fair.merge(shap_df, on="person_id")

In [52]:
df_fair_shap = attach_shap_to_df(df_fair, shap_values_test, X_test.columns)

ValueError: Shape of passed values is (65, 1), indices imply (65, 65)

In [None]:
def group_shap_importance(df_with_shap, group_col, top_n=10):
    exclude = {
        "person_id",
        "y_true",
        "y_pred",
        "y_pred_proba",
        "sex",
        "race_ethnicity",
        "hispanic",
        "poverty_category",
        "insurance_coverage",
        "self_rated_health",
        "self_rated_mental_health",
        "age",
        "age_group",
    }

    shap_cols = [c for c in df_with_shap.columns if c not in exclude]

    results = {}
    for group, g in df_with_shap.groupby(group_col):
        avg_abs = g[shap_cols].abs().mean().sort_values(ascending=False)
        results[group] = avg_abs.head(top_n)
    return results

In [None]:
race_shap = group_shap_importance(df_fair_shap, "race_ethnicity")
sex_shap = group_shap_importance(df_fair_shap, "sex")

#### 7C. Local Explanations (Per-Person)

For user-selected individuals, I compute a local SHAP explanation that breaks down which features most increased or decreased that person’s predicted risk. This forms the basis of the personalized “explanation card” in the playground, showing exactly why the model made its prediction.

In [None]:
def local_explanation(explainer, model, X_row, top_k=5):
    x = X_row.values.reshape(1, -1)

    # SHAP
    shap_vals = explainer.shap_values(x)[1][0]
    base = explainer.expected_value[1]

    # Prediction
    proba = model.predict_proba(x)[0, 1]

    contrib = pd.Series(shap_vals, index=X_row.index)
    pos = contrib[contrib > 0].sort_values(ascending=False).head(top_k)
    neg = contrib[contrib < 0].sort_values(ascending=True).head(top_k)

    return {
        "prediction_proba": float(proba),
        "base_value": float(base),
        "top_positive": list(pos.items()),
        "top_negative": list(neg.items()),
    }

In [None]:
idx = X_test.index[0]
row = X_test.loc[idx]
local_exp = local_explanation(explainer, final_lgb, row)
local_exp




### Step 8 (local explanations)

### Step 9 (SHAP integration)

### Step 10 (counterfactuals — later)