In [None]:
import sys
import torch
sys.path.append('../src')

from generator import *
from evaluation import *
from fair_model import FairModel
from baselines import LR, CvxFairModel, EOFairModel
from utils import gen_plot_data, plot_axes, combine_tuples

In [2]:
# Build Bank model
bank = Bank()
agent_train = Agent(n_samples=4000, protect_ratio=0.5, eps=0.5, base=[0.2, 1.0], seed=2021)
agent_test = Agent(n_samples=1000, protect_ratio=0.5, eps=0.5, base=[0.2, 1.0], seed=2020)


In [3]:
print(agent_train)

<generator.Agent object at 0x3047cf3d0>


In [4]:
# Generate datasets
s_train, Xs_train, Ys_train = gen_multi_step_profiles(bank, agent_train, steps=5)
s_test, Xs_test, Ys_test = gen_multi_step_profiles(bank, agent_test, steps=5)
s_comb, X_comb, Y_comb = combine_tuples(s_train, Xs_train, Ys_train)

In [6]:
# Preview the first 10 rows with time step mapping
# We reconstruct (step, sample index) from the structure of s_train/Xs_train/Ys_train and how combine_tuples flattens them.
# Assumes combine_tuples concatenates steps along rows in order: step 0, step 1, ...
import numpy as np

try:
    import pandas as pd
    # Build the combined data frame as before
    data = np.hstack([s_comb.reshape(-1, 1), X_comb, Y_comb.reshape(-1, 1)])
    base_columns = ["s"] + [f"x{i}" for i in range(X_comb.shape[1])] + ["y"]
    df = pd.DataFrame(data, columns=base_columns)

    # Infer per-step sizes from the original lists (Xs_train/Ys_train)
    # Each Xs_train[t] is (n_t, d); we accumulate to compute global row indices per step.
    step_sizes = [len(x_step) for x_step in Xs_train]
    cum = np.cumsum([0] + step_sizes)  # boundaries

    # Create arrays for (step, i-in-step)
    idx = np.arange(len(df))
    # Find step for each global index using cumulative boundaries
    step = np.searchsorted(cum[1:], idx, side="right")
    i_in_step = idx - cum[step]

    df.insert(0, "step", step)
    df.insert(1, "i", i_in_step)
    df.insert(2, "comb_idx", idx)

    # Show first 10 rows with mapping columns
    print(df.head(10).to_string(index=False))
except Exception as e:
    print("Failed to annotate with step/index due to:", e)
    # Fallback to the simple 10-row preview
    try:
        import pandas as pd
        data = np.hstack([s_comb.reshape(-1, 1), X_comb, Y_comb.reshape(-1, 1)])
        columns = ["s"] + [f"x{i}" for i in range(X_comb.shape[1])] + ["y"]
        df_preview = pd.DataFrame(data, columns=columns)
        print(df_preview.head(10).to_string(index=False))
    except Exception as e2:
        print("pandas not available (", e2, ") â€” showing raw arrays instead:\n")
        print("s:\n", s_comb[:10])
        print("\nX:\n", X_comb[:10])
        print("\ny:\n", Y_comb[:10])

 step  i  comb_idx   s        x0        x1   y
    0  0         0 0.0 -0.430098  0.281743 0.0
    0  1         1 1.0 -2.560764  1.697506 0.0
    0  2         2 0.0 -2.026388 -4.358015 1.0
    0  3         3 1.0  0.031396  2.650109 0.0
    0  4         4 1.0  4.189555  3.041892 1.0
    0  5         5 0.0 -0.607038 -5.042286 0.0
    0  6         6 0.0 -5.328782  0.289171 0.0
    0  7         7 0.0 -5.758294 -3.710050 0.0
    0  8         8 0.0 -4.338420 -0.032810 0.0
    0  9         9 0.0 -1.432677 -3.118676 0.0


In [7]:
# Print s, x0, x1, y for steps 0..5 at i=1 and i=2
import numpy as np

# Derive per-step sizes and cumulative boundaries from Xs_train
step_sizes = [len(x_step) for x_step in Xs_train]
cum = np.cumsum([0] + step_sizes)

steps_to_show = [0, 1, 2, 3, 4, 5]
indices_to_show = [1, 2]

rows = []
for t in steps_to_show:
    if t < 0 or t >= len(step_sizes):
        rows.append((t, None, None, None, None, None, "step out of range"))
        continue
    for i in indices_to_show:
        if i < 0 or i >= step_sizes[t]:
            rows.append((t, i, None, None, None, None, "i out of range for this step"))
            continue
        gi = cum[t] + i  # global index in combined arrays
        s_val = float(s_comb[gi])
        x0 = float(X_comb[gi, 0])
        x1 = float(X_comb[gi, 1])
        y_val = float(Y_comb[gi])
        rows.append((t, i, s_val, x0, x1, y_val, ""))

try:
    import pandas as pd
    out_df = pd.DataFrame(rows, columns=["step", "i", "s", "x0", "x1", "y", "note"])
    print(out_df.to_string(index=False))
except Exception:
    for r in rows:
        print(r)

 step   i   s        x0        x1   y              note
    0 1.0 1.0 -2.560764  1.697506 0.0                  
    0 2.0 0.0 -2.026388 -4.358015 1.0                  
    1 1.0 1.0 -1.560764  2.697506 0.0                  
    1 2.0 0.0 -2.052291 -4.045064 1.0                  
    2 1.0 1.0 -0.560764  3.697506 0.0                  
    2 2.0 0.0 -1.852291 -3.845064 1.0                  
    3 1.0 1.0  0.439236  4.697506 0.0                  
    3 2.0 0.0 -1.435847 -3.753286 0.0                  
    4 1.0 1.0  1.439236  5.697506 1.0                  
    4 2.0 0.0 -1.497099 -3.422660 0.0                  
    5 NaN NaN       NaN       NaN NaN step out of range


### Baseline: LR

In [4]:
lr = LR(l2_reg=1e-5)
lr.train(s_comb, X_comb, Y_comb)

_, Xs_te, Ys_te = gen_multi_step_profiles(lr, agent_test, steps=5)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, lr, OYs=OYs_te)

------------------------------ Step 1 - Logistic Regression ------------------------------
Acc: 91.2%
Short Fairness: 0.152
Long fairness: 0.058
------------------------------ Step 2 - Logistic Regression ------------------------------
Acc: 89.4%
Short Fairness: 0.160
Long fairness: 0.117
------------------------------ Step 3 - Logistic Regression ------------------------------
Acc: 91.7%
Short Fairness: 0.166
Long fairness: 0.173
------------------------------ Step 4 - Logistic Regression ------------------------------
Acc: 92.1%
Short Fairness: 0.164
Long fairness: 0.246
------------------------------ Step 5 - Logistic Regression ------------------------------
Acc: 91.7%
Short Fairness: 0.174
Long fairness: 0.340




### Baseline: FMDP

In [7]:
d = X_comb.shape[1]            # number of X columns
cfm = CvxFairModel(n_features=d + 1, l2_reg=1e-5, tao=1.565)  # s + X

# cfm = CvxFairModel(n_features=len(Xs_train[0][0])+2, l2_reg=1e-5, tao=1.565)

cfm.train(s_comb, X_comb, Y_comb)

_, Xs_te, Ys_te = gen_multi_step_profiles(cfm, agent_test, steps=5)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, cfm, OYs=OYs_te) 

ValueError: negative axis 0 index: -2147481067

## Baseline: FMEO

In [3]:
eqm = EOFairModel(n_features=len(Xs_train[0][0])+2, l2_reg=1e-5, tao=1.5)
eqm.train(s_comb, X_comb, Y_comb)

_, Xs_te, Ys_te = gen_multi_step_profiles(eqm, agent_test, steps=5)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, eqm, OYs=OYs_te)

NameError: name 'EOFairModel' is not defined

### Ours

In [None]:
fm = FairModel(n_features=len(Xs_train[0][0])+1, lr=5e-3, l2_reg=1e-5, sf_reg=0.119, lf_reg=0.154)
fm.train(s_train, Xs_train, Ys_train, Xs_train, Ys_train, epochs=1000, plot=False)

num_iters = 50

theta_true = fm.params
theta_list     = [np.copy(theta_true)]
theta_gaps     = []


# inital theta
theta = np.copy(theta_true)

for t in range(num_iters):
    # adjust distribution to current theta
    _, NXs_train, NYs_train = gen_multi_step_profiles(fm, agent_train, steps=5)
    # learn on induced distribution
    fm.train(s_train, Xs_train, Ys_train, NXs_train, NYs_train, epochs=10, plot=False)
    
    # keep track of statistic
    theta_new = fm.params
    theta_gaps.append(np.linalg.norm(theta_new - theta))
    theta_list.append(np.copy(theta_new))

    theta = np.copy(theta_new)
print("Retraining Done!")

Retraining Done!


In [None]:
_, Xs_te, Ys_te = gen_multi_step_profiles(fm, agent_test, steps=5)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, fm, OYs=OYs_te)

------------------------------ Step 1 - Long-term Fair Model ------------------------------
Acc: 80.0%
Short Fairness: 0.016
Long fairness: 0.034
------------------------------ Step 2 - Long-term Fair Model ------------------------------
Acc: 74.9%
Short Fairness: 0.018
Long fairness: 0.026
------------------------------ Step 3 - Long-term Fair Model ------------------------------
Acc: 72.8%
Short Fairness: 0.018
Long fairness: 0.012
------------------------------ Step 4 - Long-term Fair Model ------------------------------
Acc: 70.8%
Short Fairness: 0.010
Long fairness: 0.004
------------------------------ Step 5 - Long-term Fair Model ------------------------------
Acc: 69.0%
Short Fairness: 0.010
Long fairness: 0.006




 # Group