In [33]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, f1_score, brier_score_loss, confusion_matrix

In [2]:
df = pd.read_csv('/Users/hrishityelchuri/Desktop/github/grabhack/driver-credit-scoring-engine/data/raw/partners.csv')

In [3]:
sensitive_attr = "city_tier"  # pick city_tier for fairness analysis
X = df.drop(columns=["good_repayment", "partner_id"])
y = df["good_repayment"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

In [4]:
num_cols = [
    "tenure_months","days_active","trips_per_week","earnings_avg","earnings_var",
    "on_time_rate","cancel_rate","customer_rating","complaints","accidents",
    "night_shift_pct","cashless_ratio","wallet_txn_volume","vehicle_age",
]
cat_cols = ["role","gender","age_group","city_tier"]

In [5]:
pre = ColumnTransformer([
    ("num", StandardScaler(), num_cols),
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
])

In [6]:
# Base model
base_model = Pipeline([
    ("pre", pre),
    ("clf", LogisticRegression(max_iter=200, solver="lbfgs"))
])

In [7]:
base_model.fit(X_train, y_train)
probs_base = base_model.predict_proba(X_test)[:,1]

In [8]:
# Choose a policy threshold: approve top K% globally (e.g., 40%)
approve_rate = 0.40
thr = np.quantile(probs_base, 1 - approve_rate)
yhat_base = (probs_base >= thr).astype(int)

In [9]:
def group_metrics(y_true, y_pred, y_prob, groups):
    # confusion per group
    out = {}
    for g in sorted(np.unique(groups)):
        mask = (groups == g)
        yt, yp, yp2 = y_true[mask], y_pred[mask], y_prob[mask]
        tn, fp, fn, tp = confusion_matrix(yt, yp, labels=[0,1]).ravel()
        tpr = tp / (tp + fn) if (tp + fn) else np.nan  # recall for positive class
        fpr = fp / (fp + tn) if (fp + tn) else np.nan
        ppv = tp / (tp + fp) if (tp + fp) else np.nan  # precision
        rate_pos = yp.mean()
        out[int(g)] = {"TPR": float(tpr), "FPR": float(fpr), "PPV": float(ppv), "PositiveRate": float(rate_pos), "Count": int(mask.sum())}
    return out

def disparity(metric_name, table):
    vals = [v[metric_name] for v in table.values() if not math.isnan(v[metric_name])]
    if len(vals) <= 1:
        return 0.0
    return float(max(vals) - min(vals))

In [10]:
auc_base = roc_auc_score(y_test, probs_base)
f1_base = f1_score(y_test, yhat_base)
brier_base = brier_score_loss(y_test, probs_base)

In [11]:
gm_base = group_metrics(y_test.values, yhat_base, probs_base, X_test[sensitive_attr].values)
dpd_base = disparity("PositiveRate", gm_base)            # Demographic Parity Difference
eod_base = disparity("TPR", gm_base)                     # Equal Opportunity Difference
ppd_base = disparity("PPV", gm_base)                     # Predictive Parity Difference

In [12]:
train_df = X_train.copy()
train_df["y"] = y_train.values
A = sensitive_attr
weights = np.ones(len(train_df))

In [13]:
# Compute weights per (A, y) cell so that P(y|A) matches global P(y)
global_pos = train_df["y"].mean()
for a_val in train_df[A].unique():
    mask_a = train_df[A] == a_val
    pos_a = train_df.loc[mask_a, "y"].mean()
    # weight for positives in group a
    w_pos = (global_pos / pos_a) if pos_a > 0 else 1.0
    # weight for negatives in group a
    w_neg = ((1 - global_pos) / (1 - pos_a)) if pos_a < 1 else 1.0
    # assign
    idx_pos = mask_a & (train_df["y"] == 1)
    idx_neg = mask_a & (train_df["y"] == 0)
    weights[idx_pos.values] = w_pos
    weights[idx_neg.values] = w_neg

deb_model = Pipeline([
    ("pre", pre),
    ("clf", LogisticRegression(max_iter=300, solver="lbfgs"))
])
deb_model.fit(X_train, y_train, clf__sample_weight=weights)

0,1,2
,steps,"[('pre', ...), ('clf', ...)]"
,transform_input,
,memory,
,verbose,False

0,1,2
,transformers,"[('num', ...), ('cat', ...)]"
,remainder,'drop'
,sparse_threshold,0.3
,n_jobs,
,transformer_weights,
,verbose,False
,verbose_feature_names_out,True
,force_int_remainder_cols,'deprecated'

0,1,2
,copy,True
,with_mean,True
,with_std,True

0,1,2
,categories,'auto'
,drop,
,sparse_output,True
,dtype,<class 'numpy.float64'>
,handle_unknown,'ignore'
,min_frequency,
,max_categories,
,feature_name_combiner,'concat'

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,300


In [14]:
probs_deb = deb_model.predict_proba(X_test)[:,1]
thr_deb = np.quantile(probs_deb, 1 - approve_rate)
yhat_deb = (probs_deb >= thr_deb).astype(int)

auc_deb = roc_auc_score(y_test, probs_deb)
f1_deb = f1_score(y_test, yhat_deb)
brier_deb = brier_score_loss(y_test, probs_deb)

gm_deb = group_metrics(y_test.values, yhat_deb, probs_deb, X_test[sensitive_attr].values)
dpd_deb = disparity("PositiveRate", gm_deb)
eod_deb = disparity("TPR", gm_deb)
ppd_deb = disparity("PPV", gm_deb)

In [15]:
# --- Score mapping (300-900) using debiased calibrated probability (logistic is already calibrated-ish) ---
def proba_to_score(p):
    return np.clip(300 + (p * 600), 300, 900)

scores_test = proba_to_score(probs_deb)
scores_df = pd.DataFrame({
    "partner_id": df.loc[X_test.index, "partner_id"].values,
    "score_300_900": scores_test,
    "prob_good": probs_deb,
    "approved_at_policy": yhat_deb,
    sensitive_attr: X_test[sensitive_attr].values
})
scores_df.to_csv("/Users/hrishityelchuri/Desktop/github/grabhack/driver-credit-scoring-engine/data/interim/credit_scores_test.csv", index=False)

In [29]:
auc_deb

0.647888771435283

In [30]:
f1_deb

0.5794947994056464

In [16]:
scores_df

Unnamed: 0,partner_id,score_300_900,prob_good,approved_at_policy,city_tier
0,3279,872.246328,0.953744,0,3
1,4045,867.957197,0.946595,0,2
2,2928,884.867861,0.974780,1,1
3,418,843.942542,0.906571,0,2
4,2481,864.685138,0.941142,0,3
...,...,...,...,...,...
1995,5461,881.986674,0.969978,1,2
1996,6714,881.314825,0.968858,1,1
1997,558,872.674021,0.954457,0,1
1998,5438,845.604835,0.909341,0,1


In [17]:
# --- Feature importance via permutation importance (fast approximation) ---
# For speed and simplicity, we'll approximate coefficients' absolute values for the linear model on the processed features.
# Extract feature names:
ohe = base_model.named_steps["pre"].named_transformers_["cat"]
num_features = num_cols
cat_features = list(ohe.get_feature_names_out(cat_cols))
feature_names = num_features + cat_features

coef_deb = deb_model.named_steps["clf"].coef_.ravel()
# Align with transformed order: pre -> [num, cat]
feat_importance = pd.DataFrame({
    "feature": feature_names,
    "coef_abs": np.abs(coef_deb[:len(feature_names)])
}).sort_values("coef_abs", ascending=False)
feat_importance.to_csv("/Users/hrishityelchuri/Desktop/github/grabhack/driver-credit-scoring-engine/data/interim/feature_importance.csv", index=False)

In [18]:
feat_importance

Unnamed: 0,feature,coef_abs
12,wallet_txn_volume,0.312963
3,earnings_avg,0.297747
11,cashless_ratio,0.210908
14,role_driver,0.168147
15,role_merchant,0.164703
6,cancel_rate,0.144761
8,complaints,0.131738
0,tenure_months,0.127223
9,accidents,0.117995
19,age_group_36-50,0.110863


### Plots

In [27]:
# ROC-like probability distribution and group approval rates before/after
plt.figure()
plt.hist(probs_base, bins=30, alpha=0.7)
plt.title("Baseline: Score Probabilities (Test)")
plt.xlabel("P(good repayment)")
plt.ylabel("Count")
plt.savefig("/Users/hrishityelchuri/Desktop/github/grabhack/driver-credit-scoring-engine/data/plots/baseline_prob_hist.png", bbox_inches="tight")
plt.close()

plt.figure()
plt.hist(probs_deb, bins=30, alpha=0.7)
plt.title("Debiased: Score Probabilities (Test)")
plt.xlabel("P(good repayment)")
plt.ylabel("Count")
plt.savefig("/Users/hrishityelchuri/Desktop/github/grabhack/driver-credit-scoring-engine/data/plots/debiased_prob_hist.png", bbox_inches="tight")
plt.close()

In [28]:
# Group positive (approval) rates
base_rates = pd.Series({k: v["PositiveRate"] for k,v in gm_base.items()})
deb_rates = pd.Series({k: v["PositiveRate"] for k,v in gm_deb.items()})

plt.figure()
base_rates.sort_index().plot(kind="bar")
plt.title("Baseline Approval Rate by City Tier")
plt.xlabel("City Tier")
plt.ylabel("Approval Rate")
plt.tight_layout()
plt.savefig("/Users/hrishityelchuri/Desktop/github/grabhack/driver-credit-scoring-engine/data/plots/baseline_group_rates.png", bbox_inches="tight")
plt.close()

plt.figure()
deb_rates.sort_index().plot(kind="bar")
plt.title("Debiased Approval Rate by City Tier")
plt.xlabel("City Tier")
plt.ylabel("Approval Rate")
plt.tight_layout()
plt.savefig("/Users/hrishityelchuri/Desktop/github/grabhack/driver-credit-scoring-engine/data/plots/debiased_group_rates.png", bbox_inches="tight")
plt.close()

In [34]:
metrics = {
    "policy": {"approve_rate": approve_rate, "threshold_base": float(thr), "threshold_deb": float(thr_deb)},
    "performance": {
        "baseline": {"AUC": float(auc_base), "F1": float(f1_base), "Brier": float(brier_base)},
        "debiased": {"AUC": float(auc_deb), "F1": float(f1_deb), "Brier": float(brier_deb)},
    },
    "fairness_city_tier": {
        "baseline": {"DPD": float(dpd_base), "EOD": float(eod_base), "PPD": float(ppd_base), "by_group": gm_base},
        "debiased": {"DPD": float(dpd_deb), "EOD": float(eod_deb), "PPD": float(ppd_deb), "by_group": gm_deb},
    }
}
with open("/Users/hrishityelchuri/Desktop/github/grabhack/driver-credit-scoring-engine/data/outputs/metrics.json", "w") as f:
    json.dump(metrics, f, indent=2)