In [13]:
import sys
import warnings
import time
import copy
import json
from datetime import datetime
from itertools import product

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.autograd import Function
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import torch.optim as optim
import torch.nn.functional as F

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import cvxpy as cp

# Add custom paths
sys.path.insert(0, '/Users/dennis/Downloads/2024-fall/research/Fairness-Decision-Focused-Loss/FDFL/helper')
sys.path.insert(0, '/Users/dennis/Downloads/2024-fall/research/Fairness-Decision-Focused-Loss/fold-opt-package/fold_opt')

from myutil import *
from features import get_all_features

# Suppress warnings
warnings.filterwarnings("ignore")

from GMRES import *
from fold_opt import *

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(device)

from src.utils.myOptimization import (
    solveGroupProblem, closed_form_group_alpha, AlphaFairnesstorch,
    solveIndProblem, solve_closed_form, solve_coupled_group_alpha, solve_coupled_group_grad,
    compute_coupled_group_obj
)
from src.utils.myPrediction import generate_random_features, customPredictionModel
from src.utils.plots import visLearningCurve
from src.fairness.cal_fair_penalty import atkinson_loss

cpu


## Define Alpha & Q

In [14]:
alpha, Q = 1.5, 100
beta = 1.5

In [15]:
def to_numpy_1d(x):
    """Return a 1-D NumPy array; error if the length is not > 1."""
    if isinstance(x, torch.Tensor):
        x = x.detach().cpu().numpy()
    x = np.asarray(x).reshape(-1)
    assert x.ndim == 1, f"expected 1-D, got shape {x.shape}"
    return x


In [16]:
df = pd.read_csv('/Users/dennis/Downloads/2024-fall/research/Fairness-Decision-Focused-Loss/Organized-FDFL/src/data/data.csv')

df = df.sample(n=50, random_state=42)

columns_to_keep = [
    'risk_score_t', 'program_enrolled_t', 'cost_t', 'cost_avoidable_t', 'race', 'dem_female', 'gagne_sum_tm1', 'gagne_sum_t', 
    'risk_score_percentile', 'screening_eligible', 'avoidable_cost_mapped', 'propensity_score', 'g_binary', 
    'g_continuous', 'utility_binary', 'utility_continuous'
]
# for race 0 is white, 1 is black
df_stat = df[columns_to_keep]
df_feature = df[[col for col in df.columns if col not in columns_to_keep]]

# ---------- basic 1-D helpers ----------
def as_1d(a, dtype=np.float32):
    a = np.asarray(a, dtype=dtype).reshape(-1)   # (N,)
    if a.ndim != 1:
        raise ValueError(f"expect 1-D, got {a.shape}")
    return a

# transform the features
scaler = StandardScaler()

risk   = as_1d(df['risk_score_t']) * 100
risk = np.maximum(risk,0.1)         # or whatever the true column is
gainF  = np.ones_like(risk, dtype=np.float32)
cost   = as_1d(df['cost_t_capped']) * 10.0
cost   = np.maximum(cost, 0.1)              # keep strictly positive
race   = as_1d(df['race'], dtype=np.int64)  # keep as int

feats  = scaler.fit_transform(df[get_all_features(df)]).astype(np.float32)   # (N,p)



In [17]:
cost.shape

(50,)

In [18]:
class optDataset(Dataset):
    def __init__(self, optmodel, feats, risk, gainF, cost, race, alpha=alpha, Q=Q):
        # Store as numpy arrays for now
        self.feats = feats
        self.risk = risk
        self.gainF = gainF
        self.cost = cost
        self.race = race
        self.optmodel = optmodel

        # Call optmodel (expects numpy arrays)
        sol = self.optmodel(self.risk, self.cost, self.race, Q=Q, alpha=alpha, beta=beta)
        obj = compute_coupled_group_obj(sol, self.risk, self.race, alpha=alpha, beta=beta)

        # Convert everything to torch tensors for storage
        self.feats = torch.from_numpy(self.feats).float()
        self.risk = torch.from_numpy(self.risk).float()
        self.gainF = torch.from_numpy(self.gainF).float()
        self.cost = torch.from_numpy(self.cost).float()
        self.race = torch.from_numpy(self.race).float()
        self.sol = torch.from_numpy(sol).float()
        self.obj = torch.tensor(obj).float()

    def __len__(self):
        return len(self.feats)

    # def __getitem__(self, idx):
    #     return self.feats, self.risk, self.gainF, self.cost, self.race, self.sol, self.obj

    def __getitem__(self, idx):
        
        return ( self.feats[idx],
                self.risk[idx],
                self.gainF[idx],
                self.cost[idx],
                self.race[idx],
                self.sol[idx],    # or store per-item solutions; see note ▼
                self.obj )  


## Prediction Model

In [19]:
class FairRiskPredictor(nn.Module):
    def __init__(self, input_dim, dropout_rate=0.1):
        super().__init__()
        self.model = nn.Sequential(
            # First layer with batch normalization
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            # Output layer
            nn.Linear(64, 1),
            nn.Softplus()
        )
            
    def forward(self, x):
        return self.model(x).squeeze(-1)

#  Setup training parameters

In [20]:
# Setup training parameters

optmodel = solve_coupled_group_alpha

feats_np  = np.asarray(feats)              # 2-D OK
gainF_np  = to_numpy_1d(gainF)
risk_np   = to_numpy_1d(risk)
cost_np   = to_numpy_1d(cost)
race_np   = to_numpy_1d(df['race'].values)


# Perform train-test split
feats_train, feats_test, gainF_train, gainF_test, risk_train, risk_test, cost_train, cost_test, race_train, race_test = train_test_split(
    feats, gainF, risk, cost, df['race'].values, test_size=0.5, random_state=2
)

print(f"Train size: {feats_train.shape[0]}")
print(f"Test size: {feats_test.shape[0]}")

dataset_train = optDataset(optmodel, feats_train, risk_train, gainF_train, cost_train, race_train, alpha=alpha, Q=Q)
dataset_test = optDataset(optmodel, feats_test, risk_test, gainF_test, cost_test, race_test, alpha=alpha, Q=Q)

# Create dataloaders
dataloader_train = DataLoader(dataset_train, batch_size=len(dataset_train), shuffle=False)
dataloader_test = DataLoader(dataset_test, batch_size=len(dataset_train), shuffle=False)

predmodel = FairRiskPredictor(feats_train.shape[1])
predmodel.to(device)
# save the initial model
# torch.save(predmodel.state_dict(), 'initial_model.pth')
# load the initial model

# self.sol is (N,) – __getitem__ returns a scalar component;
# DataLoader stacks to (B,), which is exactly what the training loop expects.


Train size: 25
Test size: 25


FairRiskPredictor(
  (model): Sequential(
    (0): Linear(in_features=152, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=64, out_features=1, bias=True)
    (4): Softplus(beta=1, threshold=20)
  )
)

In [21]:
def proj_budget(x, cost, Q, max_iter=100):
    """
    x : (B,n)   or (n,)   –– internally promoted to (B,n)
    cost : (n,) positive
    Q : scalar or length‑B tensor
    """
    batched = x.dim() == 2
    if not batched:                       # (n,)  →  (1,n)
        x = x.unsqueeze(0)

    B, n = x.shape
    cost = cost.to(x)
    Q    = torch.as_tensor(Q, dtype=x.dtype, device=x.device).reshape(-1, 1)  # (B,1)

    d    = x.clamp(min=0.)                # enforce non‑neg
    viol = (d @ cost) > Q.squeeze(1)      # which rows violate the budget?

    if viol.any():
        dv, Qv = d[viol], Q[viol]
        lam_lo = torch.zeros_like(Qv.squeeze(1))
        lam_hi = (dv / cost).max(1).values   # upper bound for λ⋆

        for _ in range(max_iter):
            lam_mid = 0.5 * (lam_lo + lam_hi)
            trial   = (dv - lam_mid[:, None] * cost).clamp(min=0.)
            too_big = (trial @ cost) > Qv.squeeze(1)
            lam_lo[too_big] = lam_mid[too_big]
            lam_hi[~too_big]= lam_mid[~too_big]

        d[viol] = (dv - lam_hi[:, None] * cost).clamp(min=0.)

    return d if batched else d.squeeze(0)   # restore original rank


def alpha_fair(u, alpha):
    if alpha == 1:
        return torch.log(u).sum(-1)
    elif alpha == 0:
        return u.sum(-1)
    elif alpha == 'inf':
        return u.min(-1).values
    return (u.pow(1-alpha)/(1-alpha)).sum(-1)

def pgd_step(r, d, g, cost, Q, alpha, lr):
    d = d.clone().requires_grad_(True)
    obj     = alpha_fair(d * r * g, alpha).sum()
    grad_d, = torch.autograd.grad(obj, d, create_graph=True)
    return proj_budget(d + lr * grad_d, cost, Q)

def closed_form_solver_torch(r, cost, group_idx, alpha, Q, beta):

    # assert cost.dim() == 1,  f"cost shape expected (n,), got {cost.shape}"
    # assert race.shape == cost.shape, f"race {group_idx.shape} vs cost {cost.shape}"

    if r.dim() == 1:
        r = r.unsqueeze(0)
    out = []
    for r_i in r:
        d_np = solve_coupled_group_alpha(
                    r_i.detach().cpu().numpy(),
                    cost.detach().cpu().numpy(),
                    group_idx.detach().cpu().numpy(),
                    Q=Q, alpha=alpha, beta=beta)
        out.append(torch.as_tensor(d_np, dtype=r.dtype, device=r.device))
    return torch.stack(out)
              # (B,n) even if B=1

def make_foldopt_layer(g, cost, group, alpha, Q,
                       lr=5e-3, n_fixedpt=400, rule='GMRES'):
    g    = g.detach() # gainF
    cost = cost.detach() # cost

    # -------- solver: no gradients flow ----------------------------
    def solver_fn(r):
        return closed_form_solver_torch(r, cost, group, alpha, Q, beta)

    # -------- one differentiable PGD step --------------------------
    def update_fn(r, x_star, *_):
        # promote to (B,n) if needed
        if r.dim() == 1:       r = r.unsqueeze(0)
        if x_star.dim() == 1:  x_star = x_star.unsqueeze(0)

        g_b = g.expand_as(r) if g.dim() == 1 else g
        return pgd_step(r, x_star, g_b, cost, Q, alpha, lr)  # (B,n)

    return FoldOptLayer(solver_fn, update_fn,
                        n_iter=n_fixedpt, backprop_rule=rule)


In [22]:
import time
import torch
import torch.nn as nn

# assume make_foldopt_layer and alpha_fair are already in scope from your fold-opt code
# from fold_opt import make_foldopt_layer, alpha_fair

def trainFairModelFoldOpt(
    predmodel,
    loader_train,
    loader_test,
    alpha,
    Q,
    lambda_fairness=0.1,
    num_epochs=10,
    lr_pred=1e-3,
    weight_decay=1e-4,
    pgd_lr=1e-2,
    n_fixedpt=200,
    backprop_rule="GMRES"
):
    device = next(predmodel.parameters()).device
    optimizer = torch.optim.Adam(predmodel.parameters(), lr=lr_pred, weight_decay=weight_decay)

    logs = {
        "train_loss": [],
        "train_mse": [],
        "train_fair": [],
        "train_regret": []
    }

    predmodel.train()
    for epoch in range(1, num_epochs + 1):
        t0 = time.time()

                # --- pull the one-and-only batch (the entire dataset) ---
        feats, risk, gainF, cost, race, opt_d, opt_obj = next(iter(loader_train))

        # move to device
        feats   = feats.to(device)           # (N,p)
        risk    = risk.to(device)            # (N,)
        gainF   = gainF.to(device)           # (N,)
        cost    = cost.to(device)            # (N,)
        race    = race.to(device)            # (N,)
        opt_d   = opt_d.to(device)           # (N,)
        opt_obj = opt_obj[0].to(device)      # scalar (all identical, keep one)

        # --- forward pass: predict risk ---
        pred_risk = predmodel(feats).clamp(min=1e-3)  # (n,)

        # --- build a Fold-Opt layer for this batch ---
        #    it will map r_batch (1,n) → d_pred (1,n)
        # fold-opt layer needs 1-D input; it returns (1,N)
        d_pred = make_foldopt_layer(
                    gainF, cost, race, alpha, Q,
                    lr=pgd_lr, n_fixedpt=n_fixedpt, rule=backprop_rule
                )(pred_risk.unsqueeze(0)).squeeze(0) 
        # d_pred = proj_budget(d_pred, cost, Q, max_iter=500).clamp(min=0.)

        # used = cost.to(d_pred.device) @ d_pred
        # if used > Q + 1e-6:
        #     print(f"[WARN] budget overshoot: {used - Q:.2e}")
        # assert used <= Q + 1e-6

        # --- compute regret loss via alpha‐fairness ---
        u_pred    = d_pred * risk * gainF               # (n,)
        pred_obj  = alpha_fair(u_pred.unsqueeze(0), alpha)       # scalar

        regret_l1 = (opt_obj - pred_obj) / (opt_obj.abs() + 1e-7)  # (1,)

        # --- fairness penalty: difference in MSE across race groups ---
        m0 = (pred_risk[race == 0] - risk[race == 0]).pow(2).mean() if (race==0).any() else torch.tensor(0., device=device)
        m1 = (pred_risk[race == 1] - risk[race == 1]).pow(2).mean() if (race==1).any() else torch.tensor(0., device=device)
        fair_reg = torch.abs(m0 - m1)

        # --- total loss & backward ---
        loss = regret_l1 + lambda_fairness * fair_reg
        loss.to(device)  # move to device
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # --- compute simple MSE for logging ---
        mse_train = (pred_risk - risk).pow(2).mean()

        # --- log everything ---
        logs["train_loss"].append(loss.item())
        logs["train_mse"].append(mse_train.item())
        logs["train_fair"].append(fair_reg.item())
        logs["train_regret"].append(regret_l1.item())

        # (optional) print progress
        if epoch % 1 == 0:
            print(f"Epoch {epoch:2d} | Loss={loss.item():.4f} | MSE={mse_train.item():.4f} | Fair={fair_reg.item():.4f} | Regret={regret_l1.item():.4f} | {time.time()-t0:.1f}s")

    predmodel.eval()
    return logs


In [23]:
import time
import numpy as np
import torch
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

def run_multiple_trials_foldopt(
    feats,
    gainF,
    risk,
    cost,
    df,
    optmodel,
    n_trials=10,
    test_size=0.5,
    lambda_fairness=0.0,
    num_epochs=50,
    lr_pred=5e-3,
    weight_decay=1e-4,
    pgd_lr=1e-2,
    n_fixedpt=200,
    backprop_rule="GMRES",
    alpha=2,
    Q=1000,
    device="cpu"
):
    """
    Runs multiple random-split trials of fold-opt DFL training, collects 
    TRAINING curves, and plots mean ± 1 std over trials for Loss, Regret, 
    MSE, and Fairness.
    """
    all_loss   = []
    all_regret = []
    all_mse    = []
    all_fair   = []
    times      = []

    start_all = time.time()
    for t in range(n_trials):
        # print(f"[Trial {t+1}/{n_trials}]")
        t0 = time.time()

        # 1) random train/test split
        split = train_test_split(
            feats, gainF, risk, cost, df['race'].values,
            test_size=test_size, random_state=t
        )
        feats_tr, feats_te, g_tr, g_te, r_tr, r_te, c_tr, c_te, race_tr, race_te = split

        # 2) build datasets / loaders
        ds_tr = optDataset(optmodel, feats_tr, r_tr, g_tr, c_tr, race_tr, alpha=alpha, Q=Q)
        loader_tr = DataLoader(ds_tr, batch_size=len(r_tr), shuffle=False)

        # 3) init predictor
        pred = FairRiskPredictor(feats_tr.shape[1]).to(device)

        # 4) train fold-opt
        logs = trainFairModelFoldOpt(
            predmodel=pred,
            loader_train=loader_tr,
            loader_test=None,            # ignored by this function
            alpha=alpha,
            Q=Q,
            lambda_fairness=lambda_fairness,
            num_epochs=num_epochs,
            lr_pred=lr_pred,
            weight_decay=weight_decay,
            pgd_lr=pgd_lr,
            n_fixedpt=n_fixedpt,
            backprop_rule=backprop_rule
        )

        all_loss.append(logs["train_loss"])
        all_regret.append(logs["train_regret"])
        all_mse.append(logs["train_mse"])
        all_fair.append(logs["train_fair"])

        times.append(time.time() - t0)

    total_time = time.time() - start_all
    mean_time, std_time = np.mean(times), np.std(times)
    print(f"\nAll {n_trials} trials done in {total_time:.1f}s  (avg {mean_time:.1f}±{std_time:.1f}s each)")

    # to arrays: shape (n_trials, num_epochs)
    all_loss   = np.array(all_loss)
    all_regret = np.array(all_regret)
    all_mse    = np.array(all_mse)
    all_fair   = np.array(all_fair)
    epochs     = np.arange(all_loss.shape[1])

    # compute mean ± std
    m_loss,   s_loss   = all_loss.mean(0),   all_loss.std(0)
    m_regret, s_regret = all_regret.mean(0), all_regret.std(0)
    m_mse,    s_mse    = all_mse.mean(0),    all_mse.std(0)
    m_fair,   s_fair   = all_fair.mean(0),   all_fair.std(0)

    # --- plot ---
    fig, axes = plt.subplots(1,4, figsize=(24,4))
    titles = ["Training Loss","Training Regret","Training MSE","Training Fairness"]
    means  = [m_loss, m_regret, m_mse, m_fair]
    stds   = [s_loss, s_regret, s_mse, s_fair]
    colors = ["C0","C1","C2","C3"]

    for ax, title, mean, std, col in zip(axes, titles, means, stds, colors):
        ax.plot(epochs, mean,   color=col, lw=2)
        ax.fill_between(epochs,
                        mean - std,
                        mean + std,
                        color=col, alpha=0.2)
        ax.set_title(f"{title}\n(avg of {n_trials} trials)", fontsize=14)
        ax.set_xlabel("Epoch", fontsize=12)
        ax.grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

    return {
        "mean_train_loss":     m_loss,
        "std_train_loss":      s_loss,
        "mean_train_regret":   m_regret,
        "std_train_regret":    s_regret,
        "mean_train_mse":      m_mse,
        "std_train_mse":       s_mse,
        "mean_train_fair":     m_fair,
        "std_train_fair":      s_fair,
        "times":               times,
        "total_time":          total_time,
        "mean_time":           mean_time,
        "std_time":            std_time,
    }


In [None]:
results_2 = run_multiple_trials_foldopt(
    feats, gainF, risk, cost, df,
    optmodel=solve_coupled_group_alpha,
    n_trials=1,
    test_size=0.5,
    lambda_fairness=0,
    num_epochs=50,
    lr_pred=1e-3,
    weight_decay=1e-4,
    pgd_lr=5e-3,
    n_fixedpt=400,
    backprop_rule="GMRES",
    alpha=2,
    Q=100,
    device="cpu"
)

Epoch  1 | Loss=-0.9853 | MSE=384103.7188 | Fair=384103.7188 | Regret=-0.9853 | 0.5s
Epoch  2 | Loss=-0.9853 | MSE=384112.4688 | Fair=384112.4688 | Regret=-0.9853 | 0.4s
Epoch  3 | Loss=-0.9853 | MSE=384135.2812 | Fair=384135.2812 | Regret=-0.9853 | 0.4s
Epoch  4 | Loss=-0.9854 | MSE=384135.6875 | Fair=384135.6875 | Regret=-0.9854 | 0.4s
Epoch  5 | Loss=-0.9852 | MSE=384158.0000 | Fair=384158.0000 | Regret=-0.9852 | 0.4s
Epoch  6 | Loss=-0.9853 | MSE=384171.2500 | Fair=384171.2500 | Regret=-0.9853 | 0.4s
Epoch  7 | Loss=-0.9852 | MSE=384191.1250 | Fair=384191.1250 | Regret=-0.9852 | 0.4s
Epoch  8 | Loss=-0.9852 | MSE=384208.4375 | Fair=384208.4375 | Regret=-0.9852 | 0.4s
Epoch  9 | Loss=-0.9851 | MSE=384219.1875 | Fair=384219.1875 | Regret=-0.9851 | 0.4s
Epoch 10 | Loss=-0.9851 | MSE=384236.5625 | Fair=384236.5625 | Regret=-0.9851 | 0.4s


KeyboardInterrupt: 

In [None]:
def _inspect_for_split(feats, gainF, risk, cost, race):
    for name, arr in zip(
            ["feats", "gainF", "risk", "cost", "race"],
            [feats,  gainF,   risk,   cost,   race]):
        if isinstance(arr, torch.Tensor):
            print(f"{name:5s}: torch.Tensor, shape={arr.shape}, ndim={arr.ndim}")
        else:
            arr = np.asarray(arr)
            print(f"{name:5s}: np.ndarray, shape={arr.shape}, ndim={arr.ndim}")
_inspect_for_split(feats, gainF, risk, cost, df['race'].values)


feats: np.ndarray, shape=(500, 152), ndim=2
gainF: np.ndarray, shape=(500,), ndim=1
risk : np.ndarray, shape=(500,), ndim=1
cost : np.ndarray, shape=(500,), ndim=1
race : np.ndarray, shape=(500,), ndim=1
