In [1]:
import sys
import torch
sys.path.append('../src')

from evaluation import *
from taiwan_generator import *
from fair_model import FairModel
from utils import gen_plot_data, plot_data, combine_tuples
from baselines import LR, CvxFairModel, EOFairModel

In [2]:
# Reload taiwan_generator to pick up latest source changes
import sys, importlib
if '../src' not in sys.path:
    sys.path.append('../src')
import taiwan_generator
importlib.reload(taiwan_generator)
from taiwan_generator import *
print("taiwan_generator reloaded.")

taiwan_generator reloaded.


In [3]:
# Load dataset
S, X, Y, PARAMS = preprocess_data()
(s_train, X_train, y_train), (s_test, X_test, y_test) = split_data(S, X, Y)

In [4]:
# Build Bank model
bank = Bank(params=PARAMS)
agent_train = Agent(s_train, X_train, y_train, eps=0.1, base=[0.0, 0.1], seed=2021)
agent_test = Agent(s_test, X_test, y_test, eps=0.1, base=[0.0, 0.1], seed=2020)

In [5]:
# Generate datasets
s_train, Xs_train, Ys_train = gen_multi_step_profiles(bank, agent_train, steps=4)
s_test, Xs_test, Ys_test = gen_multi_step_profiles(bank, agent_test, steps=4)
s_comb, X_comb, Y_comb = combine_tuples(s_train, Xs_train, Ys_train)

In [6]:
# Print s, all X columns, y for steps 0..4 at i=1 and i=2
import numpy as np

# Compute per-step sizes and cumulative boundaries from Xs_train
step_sizes = [len(x_step) for x_step in Xs_train]
cum = np.cumsum([0] + step_sizes)

steps_to_show = [0, 1, 2, 3, 4]
indices_to_show = [1, 2]

rows = []
for t in steps_to_show:
    if t < 0 or t >= len(step_sizes):
        rows.append((t, None, None, None, "step out of range"))
        continue
    for i in indices_to_show:
        if i < 0 or i >= step_sizes[t]:
            rows.append((t, i, None, None, "i out of range for this step"))
            continue
        gi = cum[t] + i  # global index in combined arrays
        s_val = float(s_comb[gi])
        x_vals = X_comb[gi]
        y_val = float(Y_comb[gi])
        rows.append((t, i, s_val, x_vals, y_val))

try:
    import pandas as pd
    # Expand x_vals into separate columns x0..x{d-1}
    max_d = X_comb.shape[1]
    def to_row(r):
        t, i, s_val, x_vals, y_val = r
        if x_vals is None:
            return {"step": t, "i": i, "s": s_val, **{f"x{j}": None for j in range(max_d)}, "y": y_val}
        return {"step": t, "i": i, "s": s_val, **{f"x{j}": float(x_vals[j]) for j in range(max_d)}, "y": y_val}
    out_df = pd.DataFrame([to_row(r) for r in rows])
    print(out_df.to_string(index=False))
except Exception:
    for r in rows:
        print(r)

 step   i   s       x0       x1                 y
    0 1.0 0.0 0.538964 0.069038               0.0
    0 2.0 0.0 2.467062 1.878806               1.0
    1 1.0 0.0 0.538964 0.069038               0.0
    1 2.0 0.0 2.478389 1.889695               1.0
    2 1.0 0.0 0.538964 0.069038               0.0
    2 2.0 0.0 2.489734 1.900600               1.0
    3 1.0 0.0 0.538964 0.069038               0.0
    3 2.0 0.0 2.501096 1.911522               1.0
    4 NaN NaN      NaN      NaN step out of range


### Baseline: LR

In [None]:
lr = LR(l2_reg=1e-5)
lr.train(s_comb, X_comb, Y_comb)

_, Xs_te, Ys_te = gen_multi_step_profiles(lr, agent_test, steps=4)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, lr, OYs=OYs_te)

------------------------------ Step 1 - Logistic Regression ------------------------------
Acc: 82.8%
Short Fairness: 0.015
Long fairness: 0.038
------------------------------ Step 2 - Logistic Regression ------------------------------
Acc: 82.6%
Short Fairness: 0.018
Long fairness: 0.088
------------------------------ Step 3 - Logistic Regression ------------------------------
Acc: 84.1%
Short Fairness: 0.021
Long fairness: 0.243
------------------------------ Step 4 - Logistic Regression ------------------------------
Acc: 81.6%
Short Fairness: 0.012
Long fairness: 0.433




### Baseline: FMDP

In [None]:
cfm = CvxFairModel(n_features=len(Xs_train[0][0])+2, l2_reg=1e-5, tao=1.6)
cfm.train(s_comb, X_comb, Y_comb)

_, Xs_te, Ys_te = gen_multi_step_profiles(cfm, agent_test, steps=4)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, cfm, OYs=OYs_te)

optimal
------------------------------ Step 1 - Fair Model with Demographic Parity ------------------------------
Acc: 83.0%
Short Fairness: 0.063
Long fairness: 0.038
------------------------------ Step 2 - Fair Model with Demographic Parity ------------------------------
Acc: 84.3%
Short Fairness: 0.066
Long fairness: 0.076
------------------------------ Step 3 - Fair Model with Demographic Parity ------------------------------
Acc: 84.6%
Short Fairness: 0.075
Long fairness: 0.223
------------------------------ Step 4 - Fair Model with Demographic Parity ------------------------------
Acc: 84.1%
Short Fairness: 0.069
Long fairness: 0.397




### Baseline: FMEO

In [None]:
eqm = EOFairModel(n_features=len(Xs_train[0][0])+2, l2_reg=1e-5, tao=1.67)
eqm.train(s_comb, X_comb, Y_comb)

_, Xs_te, Ys_te = gen_multi_step_profiles(eqm, agent_test, steps=4)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, eqm, OYs=OYs_te)

optimal
------------------------------ Step 1 - Fair Model with Equal Oppertunity ------------------------------
Acc: 82.4%
Short Fairness: 0.072
Long fairness: 0.006
------------------------------ Step 2 - Fair Model with Equal Oppertunity ------------------------------
Acc: 83.0%
Short Fairness: 0.075
Long fairness: 0.045
------------------------------ Step 3 - Fair Model with Equal Oppertunity ------------------------------
Acc: 83.0%
Short Fairness: 0.087
Long fairness: 0.156
------------------------------ Step 4 - Fair Model with Equal Oppertunity ------------------------------
Acc: 81.3%
Short Fairness: 0.078
Long fairness: 0.295




### Ours

In [7]:
fm = FairModel(n_features=len(Xs_train[0][0])+1, lr=5e-3, l2_reg=1e-5, sf_reg=0.0223, lf_reg=0.715)
fm.train(s_train, Xs_train, Ys_train, Xs_train, Ys_train, epochs=1000, plot=False, short_type='neg')

num_iters = 30
theta_true = fm.params
theta_list     = [np.copy(theta_true)]
theta_gaps     = []

# inital theta
theta = np.copy(theta_true)
for t in range(num_iters):
    # adjust distribution to current theta
    _, NXs_train, NYs_train = gen_multi_step_profiles(fm, agent_train, steps=4)
    # learn on induced distribution
    fm.train(s_train, Xs_train, Ys_train, NXs_train, NYs_train, epochs=10, plot=False, short_type='neg')
    
    # keep track of statistic
    theta_new = fm.params
    theta_gaps.append(np.linalg.norm(theta_new - theta))
    theta_list.append(np.copy(theta_new))

    theta = np.copy(theta_new)
print("Retraining Done!")

Retraining Done!


In [8]:
_, Xs_te, Ys_te = gen_multi_step_profiles(fm, agent_test, steps=4)
OYs_te = generate_y_from_bank(s_test, Xs_te, bank)
compute_statistics(s_test, Xs_te, Ys_te, fm, OYs=OYs_te)

------------------------------ Step 1 - Long-term Fair Model ------------------------------
Acc: 68.4%
Short Fairness: 0.176
Long fairness: 0.018
------------------------------ Step 2 - Long-term Fair Model ------------------------------
Acc: 65.6%
Short Fairness: 0.176
Long fairness: 0.029
------------------------------ Step 3 - Long-term Fair Model ------------------------------
Acc: 65.4%
Short Fairness: 0.174
Long fairness: 0.034
------------------------------ Step 4 - Long-term Fair Model ------------------------------
Acc: 69.5%
Short Fairness: 0.174
Long fairness: 0.073




In [None]:
# Inspect original dataset columns and map x0/x1
import pandas as pd
import numpy as np

# Load original Excel file (same path used in read_taiwan_data)
file_path = "../data/default of credit card clients.xls"
df_raw = pd.read_excel(file_path, header=1)

# Apply the same filtering as read_taiwan_data
mask = (df_raw['PAY_AMT1'] < 10000) & (df_raw['PAY_AMT1'] > 10)
mask &= (df_raw['PAY_AMT2'] < 10000) & (df_raw['PAY_AMT2'] > 10)
df_f = df_raw.loc[mask].copy()

# Balanced sampling by label/group, same as generator
label0 = df_f[(df_f['default payment next month'] == 1) & (df_f['SEX'] == 1)].sample(n=1000, replace=False, random_state=2021)
label1 = df_f[(df_f['default payment next month'] == 0) & (df_f['SEX'] == 1)].sample(n=1000, replace=False, random_state=2021)
label2 = df_f[(df_f['default payment next month'] == 1) & (df_f['SEX'] == 2)].sample(n=1000, replace=False, random_state=2021)
label3 = df_f[(df_f['default payment next month'] == 0) & (df_f['SEX'] == 2)].sample(n=1000, replace=False, random_state=2021)
df_f = pd.concat([label0, label1, label2, label3], axis=0)

# Identify the exact column names for iloc[:, 18:20]
all_cols = list(df_f.columns)
selected_cols = all_cols[18:20]
print("Selected feature columns (iloc[:, 18:20]):", selected_cols)

# Construct S, X, Y exactly like read_taiwan_data
X_raw = df_f.iloc[:, 18:20].copy()
X_scaled = X_raw.apply(lambda x: 3 * (x - np.min(x)) / (np.max(x) - np.min(x)))
S_vec = df_f['SEX'] - 1
Y_vec = df_f['default payment next month'].replace({0:1, 1:0})

# Show first 5 rows mapping
out = pd.DataFrame({
    's': S_vec.head(5).astype(int),
    'x0_raw': X_raw.iloc[:5, 0].values,
    'x1_raw': X_raw.iloc[:5, 1].values,
    'x0': X_scaled.iloc[:5, 0].values,
    'x1': X_scaled.iloc[:5, 1].values,
    'y': Y_vec.head(5).astype(int)
})
print("Sample mapping (first 5 rows):")
print(out.to_string(index=False))

In [None]:
# Small multi-step simulation: show s, x0, x1, y across steps
import numpy as np
import pandas as pd

# Use already built bank and agent_test if available; else rebuild quickly
try:
    bank
    agent_test
except NameError:
    S_sim, X_sim, Y_sim, PARAMS_sim = preprocess_data()
    (_, X_train_sim, y_train_sim), (s_test, X_test, y_test) = split_data(S_sim, X_sim, Y_sim)
    bank = Bank(params=PARAMS_sim)
    agent_test = Agent(s_test, X_test, y_test, eps=0.1, base=[0.0, 0.1], seed=2020)

# Generate a short trajectory (5 steps)
s_test_sim, Xs_test_sim, Ys_test_sim = gen_multi_step_profiles(bank, agent_test, steps=5)

# Pick a few indices to track
indices = [0, 1, 2]
rows = []
for t in range(len(Xs_test_sim)):
    X_step = Xs_test_sim[t]
    Y_step = Ys_test_sim[t]
    for i in indices:
        if i >= len(X_step):
            continue
        s_val = float(s_test_sim[i])
        x0, x1 = float(X_step[i][0]), float(X_step[i][1])
        y_val = float(Y_step[i])
        rows.append({"step": t, "i": i, "s": s_val, "x0": x0, "x1": x1, "y": y_val})

print(pd.DataFrame(rows).to_string(index=False))
