In [1]:
from src.utils.myOptimization import solveGroupProblem
from src.utils.myPrediction import generate_random_features, customPredictionModel
import numpy as np
import cvxpy as cp
import torch
import torch.nn as nn
import pandas as pd
from src.utils.features import get_all_features

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings("ignore")

In [2]:
df = pd.read_csv('E:\\User\\Stevens\\MyRepo\\Organized-FDFL\\src\\data\\data.csv')
df = df.sample(n=200,random_state=1)

# Normalized cost 
cost = np.array(df['cost_t_capped'].values).reshape(-1, 1) * 10
cost = np.maximum(cost, 0.1)


# All features, standardized
features = df[get_all_features(df)].values
scaler = StandardScaler()
features = scaler.fit_transform(features)

# True benefit, predictor label 
true_benefit = np.array(df['benefit'].values).reshape(-1, 1) * 100
true_benefit = np.maximum(true_benefit, 0.1) 


# Group labels, 0 is White (Majority), 1 is Black
race = np.array(df['race'].values).reshape(-1, 1)

In [4]:
def compute_individual_fairness(pred: torch.Tensor,
                                 true: torch.Tensor,
                                 race: torch.Tensor,
                                 d_func=None) -> torch.Tensor:
    """
    Computes individual fairness: similar individuals across groups should receive similar predictions.
    """
    # Flatten everything to 1D
    pred = pred.view(-1)
    true = true.view(-1)
    race = race.view(-1)

    if d_func is None:
        d_func = lambda y1, y2: torch.exp(-(y1 - y2).pow(2))

    mask0 = (race == 0)
    mask1 = (race == 1)

    pred0, pred1 = pred[mask0], pred[mask1]
    true0, true1 = true[mask0], true[mask1]

    n0, n1 = pred0.shape[0], pred1.shape[0]
    if n0 == 0 or n1 == 0:
        return torch.tensor(0.0, device=pred.device)

    pred_diff = pred0.unsqueeze(1) - pred1.unsqueeze(0)       # (n0, n1)
    true_sim = d_func(true0.unsqueeze(1), true1.unsqueeze(0)) # (n0, n1)

    fairness_penalty = (true_sim * pred_diff.pow(2)).mean()
    return fairness_penalty


In [5]:
def alpha_fairness_group_utilities(benefit, allocation, group, alpha):
    """
    Compute group-wise alpha-fairness utilities.
    """
    groups = np.unique(group)
    utils = []
    for k in groups:
        mask = (group == k)
        Gk = float(mask.sum())
        # Compute average utility in group k
        util_k = (benefit[mask] * allocation[mask]).sum(axis=0).mean()  # mean total utility per individual in group
        if alpha == 1:
            val = np.log(util_k) if util_k > 0 else -np.inf
        elif alpha == 0:
            val = util_k
        elif alpha == float('inf'):
            # Min utility as min total utility)
            val = (benefit[mask] * allocation[mask]).sum(axis=0).min()
        else:
            val = util_k**(1 - alpha) / (1 - alpha)
        utils.append(val)
    return np.array(utils).sum()

In [6]:
def run_prediction_and_optimization(n=4, T=1, n_features=3, n_groups=2, alpha=1.0, Q=5.0):
    # Step 1: Generate random data
    features, costs, groups, budget = generate_random_features(n, n_features, T, n_groups, Q)
    # Flatten features per individual and time for model input, shape (n*T, n_features)
    # Or aggregate features across time depending on your model design
    X = features.reshape(n * T, n_features)
    
    # Step 2: Predict benefit using a simple linear model
    # Convert to torch tensor
    X_torch = torch.tensor(X, dtype=torch.float32)
    model = nn.Linear(n_features, 1)  # simple linear regression
    with torch.no_grad():
        preds = model(X_torch).numpy().reshape(n, T)
    
    # Optional: ensure positivity of predicted benefits (important for alpha-fairness)
    preds = np.maximum(preds, 1e-3)
    
    # Step 3: Solve group problem
    allocation, obj_value = solveGroupProblem(preds, costs, groups, alpha, budget)
    
    # Step 4: Compute group-wise utility values
    group_utils = alpha_fairness_group_utilities(preds, allocation, groups, alpha)
    
    return {
        'predicted_benefit': preds,
        'allocation': allocation,
        'objective_value': obj_value,
        'group_utilities': group_utils,
        'groups': groups
    }

# Run example
result = run_prediction_and_optimization()
print("Predicted Benefit:\n", np.round(result['predicted_benefit'], 3))
print("Allocation:\n", np.round(result['allocation'], 3))
print("Objective value:", result['objective_value'])
print("Group utilities:", result['group_utilities'])
print("Groups:", result['groups'])

Predicted Benefit:
 [[0.001]
 [0.001]
 [0.001]
 [0.001]]
Allocation:
 [[ 0.   ]
 [ 0.   ]
 [14.471]
 [14.423]]
Objective value: -8.474528929210692
Group utilities: -8.474528929210692
Groups: [1 1 1 0]


In [7]:
class FairRiskPredictor(nn.Module):
    def __init__(self, input_dim, dropout_rate=0.1):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(64, 32),      # added hidden layer
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(32, 1),
            nn.Softplus()
        )
            
    def forward(self, x):
        return self.model(x)

In [8]:
from torch.utils.data import TensorDataset, DataLoader

# Convert the preprocessed features, target, and race labels to torch tensors
X_tensor = torch.tensor(features, dtype=torch.float32)
y_tensor = torch.tensor(true_benefit, dtype=torch.float32)
race_tensor = torch.tensor(race, dtype=torch.int64)  # assumes race is a 1D array of 0/1

# Split the data into training and testing sets (80/20)
X_train, X_test, y_train, y_test, race_train, race_test = train_test_split(
    X_tensor, y_tensor, race_tensor, test_size=0.2, random_state=42
)

# Create a DataLoader for the training data
train_dataset = TensorDataset(X_train, y_train, race_train)
train_loader = DataLoader(train_dataset, batch_size=len(y_train), shuffle=True)

# Define model
input_dim = X_train.shape[1]
model = FairRiskPredictor(input_dim, dropout_rate=0.1)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)

# Fairness regularization weight
lambda_fair = 1.0  # tune this hyperparameter

# Training loop
num_epochs = 30
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for X_batch, y_batch, race_batch in train_loader:
        optimizer.zero_grad()
        pred_batch = model(X_batch).squeeze()  # shape: (batch,)
        
        mse_loss = criterion(pred_batch, y_batch)
        fair_penalty = compute_individual_fairness(
            pred_batch.squeeze(),    # ensure shape (n,)
            y_batch.squeeze(),
            race_batch.squeeze()
        )
        loss = mse_loss + lambda_fair * fair_penalty

        loss = mse_loss + lambda_fair * fair_penalty # type: ignore

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * X_batch.size(0)

    avg_loss = running_loss / len(train_dataset)
    if (epoch + 1) % 5 == 0 or epoch == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")

# Inference on full data
model.eval()
with torch.no_grad():
    pred_benefit = model(X_tensor).squeeze().numpy()

# Ensure positivity for decision optimization
pred_benefit = np.maximum(pred_benefit, 1e-1)

# Fairness evaluation
fairness_score = compute_individual_fairness(
    torch.tensor(pred_benefit, dtype=torch.float32),
    y_tensor.view(-1),
    torch.tensor(race, dtype=torch.int64)
)
print(f"Individual fairness score: {fairness_score.item():.4f}")

Epoch 1/30, Loss: 423.8417
Epoch 5/30, Loss: 389.8014
Epoch 10/30, Loss: 315.7221
Epoch 15/30, Loss: 296.7944
Epoch 20/30, Loss: 298.0353
Epoch 25/30, Loss: 291.8675
Epoch 30/30, Loss: 292.7226
Individual fairness score: 0.4415


1. I have PTO first half
2. Need to calculate `regret`
3. Train end-to-end.

In [9]:
alpha = 2
Q = 50

In [10]:
sol, _ = solveGroupProblem(pred_benefit, cost, race, alpha=alpha, Q=Q)

true_sol, _ = solveGroupProblem(true_benefit, cost, race, alpha=alpha, Q=Q)
true_obj = alpha_fairness_group_utilities(true_benefit, true_sol, race, alpha=alpha)

print("True Objective Value:", true_obj)

pred_obj = alpha_fairness_group_utilities(true_benefit, sol, race, alpha=alpha)

print("Predicted Objective Value:", pred_obj)

ValueError: not enough values to unpack (expected 2, got 1)

In [None]:
normalized_regret = (true_obj - pred_obj) / (abs(true_obj) + 1e-7)
print("Normalized Regret:", normalized_regret)

Normalized Regret: 0.017005703424885735


In [None]:
race.shape, true_benefit.shape, cost.shape

((200, 1), (200, 1), (200, 1))

In [None]:
def closed_form_group_alpha(b_hat, cost, group, Q, alpha):
    """
    b_hat : (N,)  or (N,1) or (N,T)    strictly positive
    cost  : same shape as b_hat (broadcast OK)
    group : (N,)  or (N,1)  integer labels 0 … K-1
    Q     : scalar > 0
    alpha : 0, 1, np.inf, or positive float
    """
    # ------------ 1. normalise shapes ---------------------------------
    b_hat = np.asarray(b_hat, dtype=float)
    cost  = np.asarray(cost,  dtype=float)

    if b_hat.ndim == 1:                        # promote to 2-D (N,1)
        b_hat = b_hat[:, None]
    if cost.ndim == 1:
        cost  = cost[:, None]
    if cost.shape[1] == 1 and b_hat.shape[1] > 1:
        cost = np.repeat(cost, b_hat.shape[1], axis=1)
    if b_hat.shape[1] == 1 and cost.shape[1] > 1:
        b_hat = np.repeat(b_hat, cost.shape[1], axis=1)
    assert b_hat.shape == cost.shape, "benefit & cost must broadcast"

    # ------------ 2. squeeze group to 1-D int array -------------------
    group = np.asarray(group).astype(int).reshape(-1)
    if group.ndim != 1:
        raise ValueError("`group` must be 1-D after reshape")
    N, T = b_hat.shape
    if group.size != N:
        raise ValueError("length of `group` must equal #rows of b_hat")

    K  = group.max() + 1
    G  = np.bincount(group, minlength=K)       # each G_k > 0 ?

    # ------------ 3. best ratio per group ----------------------------
    rho   = np.empty(K)
    idx_k = np.empty((K, 2), dtype=int)

    for k in range(K):
        rows = np.flatnonzero(group == k)
        ratio_sub = b_hat[rows] / cost[rows]   # shape (|rows|, T)
        flat_idx  = ratio_sub.argmax()         # 0 … |rows|·T−1
        r_loc, t_star = divmod(flat_idx, T)
        i_star = rows[r_loc] # type: ignore
        rho[k]  = ratio_sub.flat[flat_idx]
        idx_k[k] = (i_star, t_star)

    p = rho / G                                # p_k = ρ_k / G_k

    # ------------ 4. allocate budgets x_k ----------------------------
    if alpha == 0:                             # utilitarian
        winners = np.flatnonzero(p == p.max())
        x = np.zeros(K)
        x[winners] = Q / len(winners)
    elif alpha == 1:                           # log utility
        x = np.full(K, Q / K)
    elif alpha == np.inf:                      # max–min
        inv = 1 / p
        x = Q * inv / inv.sum()
    else:                                      # generic α
        beta   = 1.0 / alpha
        w      = p ** (beta - 1)
        x = Q * w / w.sum()

    # ------------ 5. build decision matrix ---------------------------
    d_star = np.zeros_like(b_hat)
    for k, (i, t) in enumerate(idx_k):
        d_star[i, t] = x[k] / cost[i, t]

    return d_star, idx_k, x, rho

In [None]:
sol_c, _, _, _ = closed_form_group_alpha(true_benefit, cost, race, Q, alpha)
obj_c = alpha_fairness_group_utilities(true_benefit, sol_c, race, alpha)
print("Closed Form Objective Value:", obj_c)

Closed Form Objective Value: -0.0005896091691414507


In [None]:
np.allclose(sol_c, np.array(true_sol), rtol=1e-3, atol=1e-3)

True

In [None]:
def AlphaFairness(util, alpha):
    if isinstance(util, torch.Tensor):
        util = util.detach().cpu().numpy() if isinstance(util, torch.Tensor) else util
    if alpha == 1:
        return np.sum(np.log(util))
    elif alpha == 0:
        return np.sum(util)
    elif alpha == 'inf':
        return np.min(util)
    else:
        return np.sum(util**(1-alpha) / (1-alpha))

def solve_optimization(gainF, risk, cost, alpha, Q):
    gainF = gainF.detach().cpu().numpy() if isinstance(gainF, torch.Tensor) else gainF
    risk = risk.detach().cpu().numpy() if isinstance(risk, torch.Tensor) else risk
    cost = cost.detach().cpu().numpy() if isinstance(cost, torch.Tensor) else cost

    risk = risk.clip(0.001)
    gainF, risk, cost = gainF.flatten(), risk.flatten(), cost.flatten()
    d = cp.Variable(risk.shape, nonneg=True)

    if gainF.shape != risk.shape or risk.shape != cost.shape:
        raise ValueError("Dimensions of gainF, risk, and cost do not match")

    utils = cp.multiply(cp.multiply(gainF, risk), d)
    constraints = [d >= 0, cp.sum(cost * d) <= Q]

    if alpha == 'inf':
        t = cp.Variable()
        objective = cp.Maximize(t)
        constraints.append(utils >= t)
    elif alpha == 1:
        objective = cp.Maximize(cp.sum(cp.log(utils)))
    elif alpha == 0:
        objective = cp.Maximize(cp.sum(utils))
    else:
        objective = cp.Maximize(cp.sum(utils**(1-alpha)) / (1-alpha))

    problem = cp.Problem(objective, constraints)
    problem.solve(solver=cp.MOSEK, verbose=False, warm_start=True, mosek_params={'MSK_IPAR_LOG': 1})

    if problem.status != 'optimal':
        print(f"Warning: Problem status is {problem.status}")

    optimal_decision = d.value
    optimal_value = AlphaFairness(optimal_decision * gainF * risk, alpha)

    return optimal_decision, optimal_value
def solve_closed_form(g, r, c, alpha, Q):
    g = g.detach().cpu().numpy() if isinstance(g, torch.Tensor) else g
    r = r.detach().cpu().numpy() if isinstance(r, torch.Tensor) else r
    c = c.detach().cpu().numpy() if isinstance(c, torch.Tensor) else c

    if np.any(c <= 0) or np.any(r <= 0) or np.any(g <= 0):
        raise ValueError("Inputs must be strictly positive.")

    n = len(c)
    utility = np.maximum(r * g, 1e-6)

    if alpha == 0:
        ratios = utility / c
        sorted_indices = np.argsort(-ratios)
        d_star_closed = np.zeros(n)
        i = sorted_indices[0]
        d_star_closed[i] = Q / c[i]

    elif alpha == 1:
        weight = c / utility
        denom = np.sum(weight)
        d_star_closed = (Q / denom) * (1 / utility)

    elif alpha == 'inf':
        denom = np.sum(c * c / utility)
        d_star_closed = (Q * c) / (utility * denom)

    else:
        if alpha <= 0:
            raise ValueError("Alpha must be positive.")

        numerator = np.power(c, -1/alpha) * np.power(utility, 1/alpha - 1)
        d_unscaled = numerator
        cost_total = np.sum(c * d_unscaled)
        if cost_total == 0:
            raise ValueError("Degenerate solution: cost_total is zero")
        d_star_closed = (Q / cost_total) * d_unscaled

    obj = AlphaFairness(d_star_closed * utility, alpha)
    return d_star_closed, obj


In [None]:
true_r = true_benefit
pred_r = model(X_tensor).detach().cpu().numpy().flatten()
pred_r = np.maximum(pred_r, 1e-1)  # Ensure positivity
gainF = np.ones_like(true_r)

print("gainF.shape:", gainF.shape, gainF.dtype)
print("true_r.shape:", true_r.shape, true_r.dtype)
print("cost.shape:", cost.shape, cost.dtype)
gainF = gainF.reshape(-1)
true_r = true_r.reshape(-1)
pred_r = pred_r.reshape(-1)
cost = cost.reshape(-1)


gainF.shape: (200, 1) float64
true_r.shape: (200, 1) float64
cost.shape: (200, 1) float64


In [None]:
sol_c, obj_c = solve_closed_form(gainF, true_r, cost, alpha=alpha, Q=Q)


sol, obj = solve_optimization(gainF, true_r, cost, alpha=alpha, Q=Q)
obj, obj_c

(-489.29710409190614, -489.29710406250916)

In [None]:
# predicted solution in closed form
pred_sol_c, _ = solve_closed_form(gainF, pred_r, cost, alpha=alpha, Q=Q)
pred_obj_c = AlphaFairness(pred_sol_c * gainF * true_r, alpha)
# predicted solution in optimization solver
pred_sol, _ = solve_optimization(gainF, pred_r, cost, alpha=alpha, Q=Q)
pred_obj = AlphaFairness(pred_sol * gainF * true_r, alpha)


pred_obj, pred_obj_c

(-1131.3969020634959, -1131.396136288176)

In [None]:
normalized_regret = (obj - pred_obj) / (abs(obj) + 1e-7)
print("Normalized Regret:", normalized_regret)

Normalized Regret: 1.3122902066466209
