In [1]:
import sys
import torch
sys.path.append('../src')

from generator import *
from evaluation import *
from fair_model import FairModel
from baselines import LR, CvxFairModel, EOFairModel
from utils import gen_plot_data, plot_axes, combine_tuples

In [6]:
from simulator import *

In [2]:
# Build Bank model
bank = Bank()
agent_train = Agent(n_samples=4000, protect_ratio=0.5, eps=0.5, base=[0.2, 1.0], seed=2021)
agent_test = Agent(n_samples=1000, protect_ratio=0.5, eps=0.5, base=[0.2, 1.0], seed=2020)


In [None]:
# Generate datasets
s_train, Xs_train, Ys_train = gen_multi_step_profiles(bank, agent_train, steps=5)
s_test, Xs_test, Ys_test = gen_multi_step_profiles(bank, agent_test, steps=5)
s_comb, X_comb, Y_comb = combine_tuples(s_train, Xs_train, Ys_train)

In [None]:
steps = 5

s_train, adj, edges, Xs_train, Ys_train, Ds_train, Ps, Os, Us, As_train = run_simulation(
    decision_model=bank,
    repayment_model=bank,
    agent=agent_train,
    steps=steps,
    enforce_demographic_mixing=True,
    k_same=8,
    k_other=2,
    directed=False,
    graph_seed=2026,
    seed=2026,
    decision_coef=0.8,
    repayment_coef=0.8,
 )

s_test, adj, edges, Xs_test, Ys_test, Ds_test, Ps, Os, Us, As_test = run_simulation(
    decision_model=bank,
    repayment_model=bank,
    agent=agent_test,
    steps=steps,
    enforce_demographic_mixing=True,
    k_same=8,
    k_other=2,
    directed=False,
    graph_seed=2026,
    seed=2026,
    decision_coef=0.8,
    repayment_coef=0.8,
 )
s_comb, X_comb, Y_comb = combine_tuples(s_train, Xs_train, Ys_train)

In [None]:

import numpy as np
# out_path = "../src/simulation_results_neighbors.csv"
# save_agent_panel_csv(
#     out_path,
#     s=s,
#     Xs=Xs,
#     adj=adj,
#     Ds=Ds,
#     Ys=Ys,
#     Ps=Ps,
#     Us=Us,
#     As=As,
#     Os=Os,
#     t0=0,
#     neighbor_k=10,
#  )

# print("Wrote:", out_path)

try:
    import pandas as pd
    # Build the combined data frame as before
    data = np.hstack([s_comb.reshape(-1, 1), X_comb, Y_comb.reshape(-1, 1)])
    base_columns = ["s"] + [f"x{i}" for i in range(X_comb.shape[1])] + ["y"]
    df = pd.DataFrame(data, columns=base_columns)

    # Infer per-step sizes from the original lists (Xs_train/Ys_train)
    # Each Xs_train[t] is (n_t, d); we accumulate to compute global row indices per step.
    step_sizes = [len(x_step) for x_step in Xs_train]
    cum = np.cumsum([0] + step_sizes)  # boundaries

    # Create arrays for (step, i-in-step)
    idx = np.arange(len(df))
    # Find step for each global index using cumulative boundaries
    step = np.searchsorted(cum[1:], idx, side="right")
    i_in_step = idx - cum[step]

    df.insert(0, "step", step)
    df.insert(1, "i", i_in_step)
    df.insert(2, "comb_idx", idx)

    # Show first 10 rows with mapping columns
    print(df.head(10).to_string(index=False))
except Exception as e:
    print("Failed to annotate with step/index due to:", e)
    # Fallback to the simple 10-row preview
    try:
        import pandas as pd
        data = np.hstack([s_comb.reshape(-1, 1), X_comb, Y_comb.reshape(-1, 1)])
        columns = ["s"] + [f"x{i}" for i in range(X_comb.shape[1])] + ["y"]
        df_preview = pd.DataFrame(data, columns=columns)
        print(df_preview.head(10).to_string(index=False))
    except Exception as e2:
        print("pandas not available (", e2, ") â€” showing raw arrays instead:\n")
        print("s:\n", s_comb[:10])
        print("\nX:\n", X_comb[:10])
        print("\ny:\n", Y_comb[:10])

 step  i  comb_idx   s        x0        x1   y
    0  0         0 0.0 -2.444625 -0.748918 0.0
    0  1         1 0.0  1.068578 -2.294532 1.0
    0  2         2 1.0  0.510187  2.447242 0.0
    0  3         3 1.0 -1.314112  2.678183 0.0
    0  4         4 1.0  0.289965 -0.172380 0.0
    0  5         5 0.0 -7.492339 -4.960221 0.0
    0  6         6 0.0 -0.852900 -4.204035 1.0
    0  7         7 0.0 -4.804054 -3.397319 0.0
    0  8         8 1.0  1.531151  1.575370 1.0
    0  9         9 1.0  1.287972  0.066610 0.0


In [5]:
# Print s, x0, x1, y for steps 0..5 at i=1 and i=2
import numpy as np

# Derive per-step sizes and cumulative boundaries from Xs_train
step_sizes = [len(x_step) for x_step in Xs_train]
cum = np.cumsum([0] + step_sizes)

steps_to_show = [0, 1, 2, 3, 4, 5]
indices_to_show = [1, 2]

rows = []
for t in steps_to_show:
    if t < 0 or t >= len(step_sizes):
        rows.append((t, None, None, None, None, None, "step out of range"))
        continue
    for i in indices_to_show:
        if i < 0 or i >= step_sizes[t]:
            rows.append((t, i, None, None, None, None, "i out of range for this step"))
            continue
        gi = cum[t] + i  # global index in combined arrays
        s_val = float(s_comb[gi])
        x0 = float(X_comb[gi, 0])
        x1 = float(X_comb[gi, 1])
        y_val = float(Y_comb[gi])
        rows.append((t, i, s_val, x0, x1, y_val, ""))

try:
    import pandas as pd
    out_df = pd.DataFrame(rows, columns=["step", "i", "s", "x0", "x1", "y", "note"])
    print(out_df.to_string(index=False))
except Exception:
    for r in rows:
        print(r)

 step   i   s        x0        x1   y              note
    0 1.0 1.0 -2.560764  1.697506 0.0                  
    0 2.0 0.0 -2.026388 -4.358015 1.0                  
    1 1.0 1.0 -1.560764  2.697506 0.0                  
    1 2.0 0.0 -2.052291 -4.045064 1.0                  
    2 1.0 1.0 -0.560764  3.697506 0.0                  
    2 2.0 0.0 -1.852291 -3.845064 1.0                  
    3 1.0 1.0  0.439236  4.697506 0.0                  
    3 2.0 0.0 -1.435847 -3.753286 0.0                  
    4 1.0 1.0  1.439236  5.697506 1.0                  
    4 2.0 0.0 -1.497099 -3.422660 0.0                  
    5 NaN NaN       NaN       NaN NaN step out of range


### Baseline: LR

In [12]:
lr = LR(l2_reg=1e-5)
lr.train(s_comb, X_comb, Y_comb)

steps = 5

# run_simulation returns: s, adj, edges, Xs, Ys, Ds, Ps, Os, Us, As
s, adj, edges, Xs, Ys, Ds, Ps, Os, Us, As = run_simulation(
    decision_model=lr,
    repayment_model=bank,
    agent=agent_test,
    steps=steps,
    enforce_demographic_mixing=True,
    k_same=8,
    k_other=2,
    directed=False,
    graph_seed=2026,
    seed=2026,
    decision_coef=0.8,
    repayment_coef=0.8,
 )

compute_statistics(s, Xs, Ds, lr, OYs=Ys,As=As)

------------------------------ Step 1 - Logistic Regression ------------------------------
Acc: 71.0%
Retention: 100.0%
Short Fairness: 0.088
Long fairness: 0.090
------------------------------ Step 2 - Logistic Regression ------------------------------
Acc: 72.4%
Retention: 68.9%
Short Fairness: 0.092
Long fairness: 0.132
------------------------------ Step 3 - Logistic Regression ------------------------------
Acc: 72.6%
Retention: 40.9%
Short Fairness: 0.086
Long fairness: 0.229
------------------------------ Step 4 - Logistic Regression ------------------------------
Acc: 70.7%
Retention: 31.7%
Short Fairness: 0.088
Long fairness: 0.427
------------------------------ Step 5 - Logistic Regression ------------------------------
Acc: 73.6%
Retention: 25.2%
Short Fairness: 0.084
Long fairness: 0.796




### Baseline: FMDP

In [14]:
d = X_comb.shape[1]            # number of X columns
cfm = CvxFairModel(n_features=d + 1, l2_reg=1e-5, tao=1.565)  # s + X

# cfm = CvxFairModel(n_features=len(Xs_train[0][0])+2, l2_reg=1e-5, tao=1.565)

cfm.train(s_comb, X_comb, Y_comb)

s, adj, edges, Xs, Ys, Ds, Ps, Os, Us, As = run_simulation(
    decision_model=cfm,
    repayment_model=bank,
    agent=agent_test,
    steps=steps,
    enforce_demographic_mixing=True,
    k_same=8,
    k_other=2,
    directed=False,
    graph_seed=2026,
    seed=2026,
    decision_coef=0.8,
    repayment_coef=0.8,
 )

compute_statistics(s_test, Xs, Ds, cfm, OYs=Ys,As=As) 

------------------------------ Step 1 - Fair Model with Demographic Parity ------------------------------
Acc: 35.0%
Retention: 100.0%
Short Fairness: 0.044
Long fairness: 0.004
------------------------------ Step 2 - Fair Model with Demographic Parity ------------------------------
Acc: 35.7%
Retention: 81.7%
Short Fairness: 0.044
Long fairness: 0.004
------------------------------ Step 3 - Fair Model with Demographic Parity ------------------------------
Acc: 36.2%
Retention: 58.6%
Short Fairness: 0.044
Long fairness: 0.208
------------------------------ Step 4 - Fair Model with Demographic Parity ------------------------------
Acc: 38.4%
Retention: 44.8%
Short Fairness: 0.044
Long fairness: 1.003
------------------------------ Step 5 - Fair Model with Demographic Parity ------------------------------
Acc: 38.7%
Retention: 35.6%
Short Fairness: 0.042
Long fairness: 4.034




## Baseline: FMEO

In [17]:
eqm = EOFairModel(n_features=len(Xs_train[0][0])+2, l2_reg=1e-5, tao=1.5)
eqm.train(s_comb, X_comb, Y_comb)

s, adj, edges, Xs, Ys, Ds, Ps, Os, Us, As = run_simulation(
    decision_model=eqm,
    repayment_model=bank,
    agent=agent_test,
    steps=steps,
    enforce_demographic_mixing=True,
    k_same=8,
    k_other=2,
    directed=False,
    graph_seed=2026,
    seed=2026,
    decision_coef=0.8,
    repayment_coef=0.8,
 )

compute_statistics(s_test, Xs, Ds, eqm, OYs=Ys,As=As) 



optimal_inaccurate
------------------------------ Step 1 - Fair Model with Equal Oppertunity ------------------------------
Acc: 68.6%
Retention: 100.0%
Short Fairness: 0.106
Long fairness: 0.160
------------------------------ Step 2 - Fair Model with Equal Oppertunity ------------------------------
Acc: 69.1%
Retention: 78.9%
Short Fairness: 0.104
Long fairness: 0.199
------------------------------ Step 3 - Fair Model with Equal Oppertunity ------------------------------
Acc: 68.5%
Retention: 53.3%
Short Fairness: 0.102
Long fairness: 0.226
------------------------------ Step 4 - Fair Model with Equal Oppertunity ------------------------------
Acc: 68.3%
Retention: 40.8%
Short Fairness: 0.102
Long fairness: 0.327
------------------------------ Step 5 - Fair Model with Equal Oppertunity ------------------------------
Acc: 68.0%
Retention: 30.6%
Short Fairness: 0.104
Long fairness: 0.458




### FM

In [19]:
fm = FairModel(n_features=len(Xs_train[0][0])+1, lr=5e-3, l2_reg=1e-5, sf_reg=0.119, lf_reg=0.154)
fm.train(s_train, Xs_train, Ys_train, Xs_train, Ys_train, epochs=1000, plot=False)

num_iters = 50

theta_true = fm.params
theta_list     = [np.copy(theta_true)]
theta_gaps     = []


# inital theta
theta = np.copy(theta_true)

for t in range(num_iters):
    # adjust distribution to current theta
    s, adj, edges, Xs, Ys, Ds, Ps, Os, Us, As = run_simulation(
    decision_model=fm,
    repayment_model=bank,
    agent=agent_train,
    steps=steps,
    enforce_demographic_mixing=True,
    k_same=8,
    k_other=2,
    directed=False,
    graph_seed=2026,
    seed=2026,
    decision_coef=0.8,
    repayment_coef=0.8,
 )
    # learn on induced distribution
    fm.train(s_train, Xs_train, Ys_train, Xs, Ys, epochs=10, plot=False)
    
    # keep track of statistic
    theta_new = fm.params
    theta_gaps.append(np.linalg.norm(theta_new - theta))
    theta_list.append(np.copy(theta_new))

    theta = np.copy(theta_new)
print("Retraining Done!")

Retraining Done!


In [20]:
s, adj, edges, Xs, Ys, Ds, Ps, Os, Us, As = run_simulation(
    decision_model=fm,
    repayment_model=bank,
    agent=agent_test,
    steps=steps,
    enforce_demographic_mixing=True,
    k_same=8,
    k_other=2,
    directed=False,
    graph_seed=2026,
    seed=2026,
    decision_coef=0.8,
    repayment_coef=0.8,
 )

compute_statistics(s_test, Xs, Ds, fm, OYs=Ys,As=As) 

------------------------------ Step 1 - Long-term Fair Model ------------------------------
Acc: 68.9%
Retention: 100.0%
Short Fairness: 0.132
Long fairness: 0.074
------------------------------ Step 2 - Long-term Fair Model ------------------------------
Acc: 69.7%
Retention: 79.7%
Short Fairness: 0.130
Long fairness: 0.102
------------------------------ Step 3 - Long-term Fair Model ------------------------------
Acc: 68.9%
Retention: 55.9%
Short Fairness: 0.130
Long fairness: 0.110
------------------------------ Step 4 - Long-term Fair Model ------------------------------
Acc: 67.3%
Retention: 43.1%
Short Fairness: 0.136
Long fairness: 0.218
------------------------------ Step 5 - Long-term Fair Model ------------------------------
Acc: 68.0%
Retention: 32.7%
Short Fairness: 0.134
Long fairness: 0.443




In [None]:
# Save trained parameters to disk (portable: NumPy .npz)
from pathlib import Path
import numpy as np
import torch

ckpt_dir = Path("checkpoints")
ckpt_dir.mkdir(parents=True, exist_ok=True)

params_path = ckpt_dir / "fairmodel_params.npz"
state_path  = ckpt_dir / "fairmodel_state_dict.pt"

theta = np.asarray(fm.params, dtype=np.float32)  # shape: (n_features + 1,)

np.savez(
    params_path,
    params=theta,
    n_features=int(fm.linear.weight.shape[1]),
)

# Optional: save full torch state_dict too (lets you restore optimizer, etc. if you want later)
torch.save(fm.state_dict(), state_path)

print(f"Saved params to: {params_path.resolve()}")
print(f"Saved state_dict to: {state_path.resolve()}")
print("theta shape:", theta.shape)

In [None]:
# Load parameters somewhere else and apply to a fresh FairModel
# (Works even in a different notebook/script, as long as you have the same n_features)
from pathlib import Path
import numpy as np
import torch


def load_params_npz(npz_path):
    npz_path = Path(npz_path)
    data = np.load(npz_path)
    return np.asarray(data["params"], dtype=np.float32)


def apply_theta_to_fairmodel(model: FairModel, theta: np.ndarray) -> None:
    """theta is [w0, w1, ..., w_{d-1}, b] matching FairModel.params."""
    theta = np.asarray(theta, dtype=np.float32).ravel()
    d = int(model.linear.weight.shape[1])
    if theta.shape[0] != d + 1:
        raise ValueError(f"theta has len {theta.shape[0]} but expected {d + 1} (d={d})")

    w = torch.from_numpy(theta[:d]).view(1, d)
    b = torch.tensor([float(theta[-1])], dtype=w.dtype)

    with torch.no_grad():
        model.linear.weight.copy_(w)
        model.linear.bias.copy_(b)

    # keep old_* in sync with the loaded weights
    model.save_params()


params_path = Path("checkpoints") / "fairmodel_params.npz"
loaded_theta = load_params_npz(params_path)

# Example: create a new model with the same n_features and load the theta
fm_loaded = FairModel(
    n_features=int(fm.linear.weight.shape[1]),
    lr=5e-3,
    l2_reg=1e-5,
    sf_reg=0.119,
    lf_reg=0.154,
)
apply_theta_to_fairmodel(fm_loaded, loaded_theta)

print("Loaded theta OK. First 5 entries:", loaded_theta[:5])
print("Params match:", np.allclose(fm_loaded.params, loaded_theta))