# The Decision Focused Learning

1. Import Data
2. Processing
3. Define `optDataset`, `optModel`, and `regretLoss`.
4. Train
5. Eval and Visualize

In [475]:
import cvxpy as cp
import numpy as np
import warnings
import sys
from IPython.core.interactiveshell import InteractiveShell
from sklearn.preprocessing import StandardScaler
import torch
import torch.optim as optim
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

sys.path.insert(0, 'E:\\User\\Stevens\\Code\\The Paper\\algorithm')

from myutil import *
from features import get_all_features

# Suppress warnings
warnings.filterwarnings("ignore")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


cuda


# Import Data

In [476]:
df = pd.read_csv('data/data.csv')

columns_to_keep = [
    'risk_score_t', 'program_enrolled_t', 'cost_t', 'cost_avoidable_t', 'race', 'dem_female', 'gagne_sum_tm1', 'gagne_sum_t', 
    'risk_score_percentile', 'screening_eligible', 'avoidable_cost_mapped', 'propensity_score', 'g_binary', 
    'g_continuous', 'utility_binary', 'utility_continuous'
]
# for race 0 is white, 1 is black
df_stat = df[columns_to_keep]
df_feature = df[[col for col in df.columns if col not in columns_to_keep]]

# Replace all values less than 0.1 with 0.1
#df['risk_score_t'] = df['risk_score_t'].apply(lambda x: 0.1 if x < 0.1 else x)
df['g_continuous'] = df['g_continuous'].apply(lambda x: 0.1 if x < 0.1 else x)

# subset a sample of 5000 rows of df
df = df.sample(n=20000, random_state=1)

df.shape

(20000, 168)

In [477]:
# Define input variables for DFL
feats = df[get_all_features(df)].values
risk = df['risk_score_t'].values
gainF = df['g_continuous'].values
decision = df['propensity_score'].values
cost = np.ones(risk.shape)
race = df['race'].values
alpha = 0.5
Q = 1000

# transform the features
scaler = StandardScaler()
feats = scaler.fit_transform(feats)

from sklearn.model_selection import train_test_split

# Perform train-test split
feats_train, feats_test, gainF_train, gainF_test, risk_train, risk_test, cost_train, cost_test, race_train, race_test = train_test_split(
    feats, gainF, risk, cost, df['race'].values, test_size=0.4, random_state=42
)

print(f"Train size: {feats_train.shape[0]}")
print(f"Test size: {feats_test.shape[0]}")

Train size: 12000
Test size: 8000


# Processing Data

## Define the optimization and prediction model

In [478]:
def AlphaFairness(util,alpha):
    if alpha == 1:
        return np.sum(np.log(util))
    elif alpha == 0:
        return np.sum(util)
    elif alpha == 'inf':
        return np.min(util)
    else:
        return np.sum(util**(1-alpha)/(1-alpha))


def solve_optimization(gainF, risk, cost, alpha=alpha, Q=Q):
    # Flatten input arrays

    # if any of the inputs are tensor, convert to numpy array
    gainF = gainF.detach().cpu().numpy() if isinstance(gainF, torch.Tensor) else gainF
    risk = risk.detach().cpu().numpy() if isinstance(risk, torch.Tensor) else risk
    cost = cost.detach().cpu().numpy() if isinstance(cost, torch.Tensor) else cost


    risk = risk.clip(min=0.001)
    gainF, risk, cost = gainF.flatten(), risk.flatten(), cost.flatten()
    d = cp.Variable(risk.shape, nonneg=True)

    # raise error if dimensions do not match
    if gainF.shape != risk.shape or risk.shape != cost.shape:
        raise ValueError("Dimensions of gainF, risk, and cost do not match")
    
    utils = cp.multiply(cp.multiply(gainF, risk), d)
    
    if alpha == 'inf':
        # Maximin formulation
        t = cp.Variable()  # auxiliary variable for minimum utility
        objective = cp.Maximize(t)
        constraints = [
            d >= 0,
            # d <= 1,
            cp.sum(cost * d) <= Q,
            utils >= t  # t is the minimum utility
        ]
    elif alpha == 1:
        # Nash welfare (alpha = 1)
        objective = cp.Maximize(cp.sum(cp.log(utils)))
        constraints = [
            d >= 0,
            # d <= 1,
            cp.sum(cost * d) <= Q
        ]
    elif alpha == 0:
        # Utilitarian welfare (alpha = 0)
        objective = cp.Maximize(cp.sum(utils))
        constraints = [
            d >= 0,
            # d <= 1,
            cp.sum(cost * d) <= Q
        ]
    else:
        # General alpha-fairness
        objective = cp.Maximize(cp.sum(utils**(1-alpha))/(1-alpha) if alpha != 0 
                              else cp.sum(utils))
        constraints = [
            d >= 0,
            # d <= 1,
            cp.sum(cost * d) <= Q
        ]
    
    # Solve the problem
    problem = cp.Problem(objective, constraints)
    problem.solve(solver=cp.MOSEK, verbose=False, warm_start=True, mosek_params={'MSK_IPAR_LOG': 1})
    
    if problem.status != 'optimal':
        print(f"Warning: Problem status is {problem.status}")
    
    optimal_decision = d.value
    optimal_value = AlphaFairness(optimal_decision * gainF * risk, alpha)
    
    return optimal_decision, optimal_value

In [479]:
# Define the prediction model
class FairRiskDataset(Dataset):
    def __init__(self, features, races, risks):
        self.features = torch.FloatTensor(features)
        self.races = torch.LongTensor(races)
        self.risks = torch.FloatTensor(risks).reshape(-1, 1)
        
    def __len__(self):
        return len(self.features)
        
    def __getitem__(self, idx):
        return self.features[idx], self.races[idx], self.risks[idx]

class FairRiskPredictor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 1),
            nn.Softplus()
        )
        
    def forward(self, x):
        return self.model(x)
        
def train_fair_model(features, races, risks, epochs=20, batch_size=32, lambda_fairness=0):
    """
    Train a fair regression model with a fairness regularizer.
    
    Args:
        features (np.ndarray): Feature array.
        races (np.ndarray): Array indicating race (0: white, 1: black).
        risks (np.ndarray): True risk values.
        epochs (int): Number of training epochs.
        batch_size (int): Batch size for training.
        lambda_fairness (float): Weight for the fairness regularizer.
        
    Returns:
        nn.Module: Trained fair regression model.
    """
    dataset = FairRiskDataset(features, races, risks)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    model = FairRiskPredictor(features.shape[1])
    model.train()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    
    for epoch in range(epochs):
        epoch_loss = 0.0
        for batch_features, batch_races, batch_risks in dataloader:
            optimizer.zero_grad()
            predictions = model(batch_features)
            mse_loss = criterion(predictions, batch_risks)
            
            # Compute fairness loss
            group0 = predictions[batch_races == 0]
            group1 = predictions[batch_races == 1]
            if len(group0) > 0 and len(group1) > 0:
                fairness_loss = torch.abs(group0.mean() - group1.mean())
            else:
                fairness_loss = torch.tensor(0.0)
            
            # Total loss
            total_loss = mse_loss + lambda_fairness * fairness_loss
            total_loss.backward()
            optimizer.step()
            
            epoch_loss += total_loss.item()
        
        if (epoch + 1) % 5 == 0 or epoch == 0:
            avg_loss = epoch_loss / len(dataloader)
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}')
    
    return model


# Train the fair model
# model = train_fair_model(feats_train, race, risk_train, epochs=20, lambda_fairness=0).to(device)
# model.eval()

# # save
# torch.save(model.state_dict(), 'risk_predictor_model.pth')


# load model from disk
model = FairRiskPredictor(feats.shape[1])
model.load_state_dict(torch.load('risk_predictor_model.pth'))
model.to(device)
model.eval()

FairRiskPredictor(
  (model): Sequential(
    (0): Linear(in_features=149, out_features=1, bias=True)
    (1): Softplus(beta=1, threshold=20)
  )
)

In [480]:
# Dataset
class FairDFLDataset(Dataset):
    def __init__(self, features, risk, gainF, cost, race, alpha=alpha, Q=Q):
        self.features = features
        self.risk = risk
        self.gainF = gainF
        self.cost = cost
        self.race = race
        self.alpha = alpha
        self.Q = Q

        self.sols, self.vals = self._get_solutions()
        self._to_tensor()

    def __len__(self):
        return len(self.features)

    def _get_solutions(self):
        sols, vals = solve_optimization(self.gainF, self.risk, self.cost, self.alpha, self.Q)
        return sols, vals

    def _to_tensor(self):
        self.features = torch.FloatTensor(self.features)
        self.risk = torch.FloatTensor(self.risk)
        self.gainF = torch.FloatTensor(self.gainF)
        self.cost = torch.FloatTensor(self.cost)
        self.race = torch.LongTensor(self.race)
        self.sols = torch.FloatTensor(self.sols)
        self.vals = torch.FloatTensor([self.vals])

    def __getitem__(self, idx):
        return self.features[idx], self.risk[idx], self.gainF[idx], self.cost[idx], self.race[idx], self.sols[idx], self.vals



# test the dataset and dataloader
dataset_train = FairDFLDataset(feats_train, risk_train, gainF_train, cost_train, race_train)
dataset_test = FairDFLDataset(feats_test, risk_test, gainF_test, cost_test, race_test)
print('The current alpha and Q values are:', alpha, Q)

The current alpha and Q values are: 0.5 1000


In [481]:
# Load the dataset into a DataLoader
loader_train = DataLoader(dataset_train, batch_size=len(dataset_train), shuffle=True)
loader_test = DataLoader(dataset_test, batch_size=len(dataset_test), shuffle=False)

In [482]:
def regret(predModel, optModel, dataloader, alphas=[alpha], Q=Q):
    """
    A function to evaluate model performance with normalized true regret.

    Args:
        predModel (nn.Module): Trained prediction model.
        optModel (nn.Module): Trained optimization model.
        dataloader (DataLoader): DataLoader for the dataset.
        alphas (list): List of alpha values for fairness.
        Q (int): Budget constraint.

    Returns:
        float: Average normalized regret across alphas.
    """
    predModel.eval()

    features, risk, gainF, cost, race, true_sols, true_vals = next(iter(dataloader))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    features, risk, gainF, cost, race, true_sols, true_vals = (
        features.to(device),
        risk.to(device),
        gainF.to(device),
        cost.to(device),
        race.to(device),
        true_sols.to(device),
        true_vals.to(device),
    )

    # Predict risk
    with torch.no_grad():
        pred_risk = predModel(features).clamp(min=0.001)  # Ensure no zero values

    risk = risk.clamp(min=0.001)

    # Convert tensors to numpy arrays
    pred_risk = pred_risk.cpu().numpy()
    risk = risk.cpu().numpy()
    gainF = gainF.cpu().numpy()
    cost = cost.cpu().numpy()

    regrets = []
    for alpha in alphas:
        # Calculate true solution and objective
        true_sol, true_obj = optModel(gainF, risk, cost, alpha, Q)
        
        # Calculate predicted solution
        pred_sol, _ = optModel(gainF, pred_risk, cost, alpha, Q)
        
        # Calculate predicted objective using true risk
        pred_obj = AlphaFairness(gainF * risk * pred_sol, alpha)
        
        # Calculate normalized regret
        normalized_regret = (true_obj - pred_obj) / (abs(true_obj) + 1e-7)
        regrets.append(normalized_regret)

    predModel.train()
    return np.mean(regrets)

In [483]:
# test the regret calculation
# regret(model, solve_optimization, loader_test, alphas=[2], Q=Q)

# DFL

In [484]:
import torch
from torch.autograd import Function
import numpy as np

class MultiDirCentralDiffFunction(Function):
    """
    A custom PyTorch autograd.Function that:
      1) In the forward pass, solves once with pred_risk to get regret.
      2) In the backward pass, uses k random directions + central difference 
         to approximate the gradient w.r.t. pred_risk.
    """
    @staticmethod
    def forward(ctx,
                pred_risk: torch.Tensor,
                true_risk: torch.Tensor,
                gainF:     torch.Tensor,
                cost:      torch.Tensor,
                alpha:     float,
                Q:         float,
                solver_func,
                k_directions: int = 4,   # number of random directions
                eps: float = 1e-4,
                normalize_dir: bool = True):
        """
        Forward pass:
         1) Solve once with pred_risk -> store regret
        Arguments:
          - k_directions: how many random directions to use in backward
          - eps: step size for central difference
          - normalize_dir: whether to L2-normalize each direction
        """
        device = pred_risk.device

        # Convert to CPU for solver
        pred_risk_cpu = pred_risk.detach().cpu().numpy()
        true_risk_cpu = true_risk.detach().cpu().numpy()
        gainF_cpu     = gainF.detach().cpu().numpy()
        cost_cpu      = cost.detach().cpu().numpy()

        # Solve with pred_risk
        d_pred, _ = solver_func(gainF_cpu, pred_risk_cpu, cost_cpu, alpha, Q)

        # Solve for best utility with true_risk
        d_opt_true, _ = solver_func(gainF_cpu, true_risk_cpu, cost_cpu, alpha, Q)

        # Compute predicted util under true_risk
        pred_util = (gainF_cpu * true_risk_cpu * d_pred).sum()
        best_util = (gainF_cpu * true_risk_cpu * d_opt_true).sum()

        regret = best_util - pred_util
        loss_tensor = torch.tensor(regret, dtype=torch.float32, device=device)

        # Save for backward
        ctx.solver_func   = solver_func
        ctx.alpha         = alpha
        ctx.Q             = Q
        ctx.k_directions  = k_directions
        ctx.eps           = eps
        ctx.normalize_dir = normalize_dir

        # We'll need these in backward
        ctx.save_for_backward(pred_risk, true_risk, gainF, cost, loss_tensor)

        # Also store these for reference
        ctx.best_util      = best_util
        ctx.orig_regret    = regret

        return loss_tensor

    @staticmethod
    def backward(ctx, grad_output):
        """
        Uses k random directions + central difference approximation:
          grad ~ (1/k) sum_j [ (f(r + eps u_j) - f(r - eps u_j)) / (2 eps) ] * u_j
        """
        solver_func   = ctx.solver_func
        alpha         = ctx.alpha
        Q             = ctx.Q
        k_directions  = ctx.k_directions
        eps           = ctx.eps
        normalize_dir = ctx.normalize_dir

        pred_risk, true_risk, gainF, cost, loss_val = ctx.saved_tensors
        best_util      = ctx.best_util
        original_regret= ctx.orig_regret

        device = pred_risk.device
        n = pred_risk.numel()

        # Move to CPU for solver
        pred_risk_cpu = pred_risk.detach().cpu().numpy().copy()
        true_risk_cpu = true_risk.detach().cpu().numpy()
        gainF_cpu     = gainF.detach().cpu().numpy()
        cost_cpu      = cost.detach().cpu().numpy()

        # Original loss = original_regret
        orig_loss = original_regret

        # We'll accumulate gradient across k directions
        grad_approx_np = np.zeros(n, dtype=np.float32)

        # For each random direction
        for _ in range(k_directions):
            # Sample a random direction u
            u = np.random.normal(size=(n,))
            if normalize_dir:
                norm_u = np.sqrt((u**2).sum())
                if norm_u > 1e-12:
                    u /= norm_u  # L2-normalize

            # Evaluate f(r + eps * u)
            pred_risk_plus = pred_risk_cpu + eps * u
            d_pred_plus, _ = solver_func(gainF_cpu, pred_risk_plus, cost_cpu, alpha, Q)
            pred_util_plus = (gainF_cpu * true_risk_cpu * d_pred_plus).sum()
            loss_plus = best_util - pred_util_plus  # regret

            # Evaluate f(r - eps * u)
            pred_risk_minus = pred_risk_cpu - eps * u
            d_pred_minus, _ = solver_func(gainF_cpu, pred_risk_minus, cost_cpu, alpha, Q)
            pred_util_minus = (gainF_cpu * true_risk_cpu * d_pred_minus).sum()
            loss_minus = best_util - pred_util_minus

            # Central diff
            fd_val = (loss_plus - loss_minus) / (2 * eps)
            grad_approx_np += fd_val * u  # accumulate

        # Average over k directions
        grad_approx_np /= float(k_directions)

        # Convert to torch
        grad_approx = torch.from_numpy(grad_approx_np).to(device)

        # Chain rule
        grad_approx *= grad_output.item()

        # Return gradient wrt pred_risk, None for the other inputs
        return grad_approx, None, None, None, None, None, None, None, None, None


###############################################################################
# Wrap the function in an nn.Module
###############################################################################
import torch.nn as nn

class MultiDirCentralDiffLoss(nn.Module):
    """
    A 'best-practice' random FD approach:
      - multiple random directions (k)
      - central difference
      - optional normalization
    This yields a higher-fidelity gradient estimate than one-sided or single-direction FD.
    """
    def __init__(self, alpha=alpha, Q=Q, solver_func=None,
                 k_directions=4, eps=1e-4, normalize_dir=True):
        super().__init__()
        self.alpha         = alpha
        self.Q             = Q
        self.solver_func   = solver_func
        self.k_directions  = k_directions
        self.eps           = eps
        self.normalize_dir = normalize_dir

    def forward(self, pred_risk, true_risk, gainF, cost):
        return MultiDirCentralDiffFunction.apply(
            pred_risk, 
            true_risk, 
            gainF, 
            cost, 
            self.alpha, 
            self.Q,
            self.solver_func,
            self.k_directions,
            self.eps,
            self.normalize_dir
        )


In [485]:
def train_decision_focused_mdcd(
    predModel,
    train_dataset,
    test_dataset,
    alpha=alpha,
    Q=Q,
    epochs=5,
    lr=1e-3,
    k_directions=4,
    eps=1e-4,
    normalize_dir=True
):
    """
    Example training function using MultiDirCentralDiffLoss
    with multiple random directions + central difference
    for more accurate (but still approximate) gradients.
    """
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    predModel.to(device)
    predModel.train()

    from torch.utils.data import DataLoader
    import time

    train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=False)
    test_loader  = DataLoader(test_dataset,  batch_size=len(test_dataset),  shuffle=False)

    # Our advanced FD-based DFL criterion
    dfl_loss = MultiDirCentralDiffLoss(
        alpha=alpha, Q=Q, solver_func=solve_optimization,
        k_directions=k_directions, eps=eps, normalize_dir=normalize_dir
    )
    mse_criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(predModel.parameters(), lr=lr)

    train_loss_log = []
    train_mse_log  = []
    test_mse_log   = []
    test_regret_log= []
    epoch_times = []

    # Evaluate initial test regret
    initial_regret = regret(predModel, solve_optimization, test_loader, alphas=[alpha], Q=Q)
    print(f"Initial Test Regret: {initial_regret:.4f}")
    test_regret_log.append(initial_regret)

    # Evaluate initial test MSE
    with torch.no_grad():
        feats_test, risk_test, gainF_test, cost_test, race_test, _, _ = next(iter(test_loader))
        feats_test, risk_test = feats_test.to(device), risk_test.to(device)
        pred_test  = predModel(feats_test).view(-1)
        init_mse   = mse_criterion(pred_test, risk_test.view(-1))
    test_mse_log.append(init_mse.item())

    for epoch in range(epochs):
        t0 = time.time()
        for feats, risk_true, gainF, cost, race, _, _ in train_loader:
            feats     = feats.to(device)
            risk_true = risk_true.to(device)
            gainF     = gainF.to(device)
            cost      = cost.to(device)

            optimizer.zero_grad()

            # Predict risk
            pred_risk = predModel(feats).view(-1)
            # Multi-direction central diff loss
            loss = dfl_loss(pred_risk, risk_true, gainF, cost)

            # Also measure MSE for logging
            mse_batch = mse_criterion(pred_risk, risk_true)
            loss.backward()
            optimizer.step()

            train_loss_log.append(loss.item())
            train_mse_log.append(mse_batch.item())

        epoch_time = time.time() - t0
        epoch_times.append(epoch_time)

        # Evaluate test regret
        test_reg = regret(predModel, solve_optimization, test_loader, alphas=[alpha], Q=Q)

        # Evaluate test MSE
        with torch.no_grad():
            feats_test, risk_test, gainF_test, cost_test, race_test, _, _ = next(iter(test_loader))
            feats_test, risk_test = feats_test.to(device), risk_test.to(device)
            pred_test = predModel(feats_test).view(-1)
            test_mse  = mse_criterion(pred_test, risk_test.view(-1))

        test_regret_log.append(test_reg)
        test_mse_log.append(test_mse.item())

        print(f"Epoch [{epoch+1}/{epochs}]"
              f" | TrainRegretLoss={loss.item():.4f}"
              f" | TrainMSE={mse_batch.item():.4f}"
              f" | TestRegret={test_reg:.4f}"
              f" | TestMSE={test_mse:.4f}"
              f" | Time={epoch_time:.2f}s")

    return {
        "train_loss": train_loss_log,
        "train_mse": train_mse_log,
        "test_regret": test_regret_log,
        "test_mse": test_mse_log,
        "epoch_times": epoch_times,
    }


In [486]:


# results = train_decision_focused_mdcd(model, dataset_train, dataset_test, alpha=alpha, Q=Q, epochs=20, lr=1e-3)


In [492]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader
import cvxpy as cp

class FairDFLDataset(Dataset):
    """Dataset class for Fair DFL"""
    def __init__(self, features, risk, gainF, cost, race, bb_problem):
        self.features = features
        self.risk = risk
        self.gainF = gainF
        self.cost = cost
        self.race = race
        self.bb_problem = bb_problem
        
        self.sols, self.vals = self._get_solutions()
        self._to_tensor()

    def __len__(self):
        return len(self.features)

    def _get_solutions(self):
        sols, vals = self.bb_problem.solve_optimization(self.gainF, self.risk, self.cost)
        return sols, vals

    def _to_tensor(self):
        self.features = torch.FloatTensor(self.features)
        self.risk = torch.FloatTensor(self.risk)
        self.gainF = torch.FloatTensor(self.gainF)
        self.cost = torch.FloatTensor(self.cost)
        self.race = torch.LongTensor(self.race)
        self.sols = torch.FloatTensor(self.sols)
        self.vals = torch.FloatTensor([self.vals])

    def __getitem__(self, idx):
        return (
            self.features[idx],
            self.risk[idx],
            self.gainF[idx],
            self.cost[idx],
            self.race[idx],
            self.sols[idx],
            self.vals
        )

class BaseProblem:
    """Base problem class defining the optimization problem"""
    def __init__(self, alpha=alpha, Q=Q):
        self.alpha = float(alpha)  # Ensure alpha is float
        self.Q = float(Q)  # Ensure Q is float
        self.num_feats = None
        self.lancer_out_activation = "relu"
    
    def solve_optimization(self, gainF, risk, cost):
        """Solves the optimization problem"""
        # Convert tensors to numpy if needed
        gainF = gainF.detach().cpu().numpy() if isinstance(gainF, torch.Tensor) else gainF
        risk = risk.detach().cpu().numpy() if isinstance(risk, torch.Tensor) else risk
        cost = cost.detach().cpu().numpy() if isinstance(cost, torch.Tensor) else cost
        
        risk = risk.clip(min=0.001)
        gainF, risk, cost = gainF.flatten(), risk.flatten(), cost.flatten()
        d = cp.Variable(risk.shape, nonneg=True)
        
        utils = cp.multiply(cp.multiply(gainF, risk), d)
        
        if self.alpha == float('inf'):
            t = cp.Variable()
            objective = cp.Maximize(t)
            constraints = [
                d >= 0,
                cp.sum(cost * d) <= self.Q,
                utils >= t
            ]
        elif self.alpha == 1:
            objective = cp.Maximize(cp.sum(cp.log(utils)))
            constraints = [
                d >= 0,
                cp.sum(cost * d) <= self.Q
            ]
        elif self.alpha == 0:
            objective = cp.Maximize(cp.sum(utils))
            constraints = [
                d >= 0,
                cp.sum(cost * d) <= self.Q
            ]
        else:
            objective = cp.Maximize(cp.sum(cp.power(utils, 1 - self.alpha)) / (1 - self.alpha))
            constraints = [
                d >= 0,
                cp.sum(cost * d) <= self.Q
            ]
        
        problem = cp.Problem(objective, constraints)
        try:
            problem.solve(solver=cp.MOSEK, verbose=False)
            
            if problem.status != 'optimal':
                print(f"Warning: Problem status is {problem.status}")
                
            optimal_decision = d.value
            optimal_value = self.eval_utility(optimal_decision, gainF, risk)
            
        except Exception as e:
            print(f"Optimization error: {e}")
            optimal_decision = np.zeros(risk.shape)
            optimal_value = 0.0
            
        return optimal_decision, optimal_value

    def eval_utility(self, decision, gainF, risk):
        """Evaluates the utility function"""
        utils = decision * gainF * risk
        if self.alpha == 1:
            return np.sum(np.log(utils + 1e-10))
        elif self.alpha == 0:
            return np.sum(utils)
        elif self.alpha == float('inf'):
            return np.min(utils)
        else:
            return np.sum(np.power(utils, 1 - self.alpha)) / (1 - self.alpha)
    
    def get_c_shapes(self):
        """Returns shapes for the C model"""
        return self.num_feats, 1
    
    def get_activations(self):
        """Returns activation functions for the models"""
        return "tanh", "relu"

In [498]:
class MLPCModel(nn.Module):
    """MLP-based C Model for predicting risk scores"""
    def __init__(self, input_dim, output_dim, hidden_dim=64, n_layers=2):
        super().__init__()
        layers = []
        dims = [input_dim] + [hidden_dim]*n_layers + [output_dim]
        
        for i in range(len(dims)-1):
            layers.extend([
                nn.Linear(dims[i], dims[i+1]),
                nn.ReLU() if i < len(dims)-2 else nn.Softplus()
            ])
            
        self.model = nn.Sequential(*layers)
        self.loss_fn = nn.MSELoss()
        
    def forward(self, x):
        return self.model(x)

class MLPLancer(nn.Module):
    """MLP-based LANCER Model for estimating decision loss"""
    def __init__(self, input_dim=1, hidden_dim=64, n_layers=2):
        super().__init__()
        layers = []
        dims = [input_dim] + [hidden_dim]*n_layers + [1]
        
        for i in range(len(dims)-1):
            layers.extend([
                nn.Linear(dims[i], dims[i+1]),
                nn.ReLU() if i < len(dims)-2 else nn.Identity()
            ])
            
        self.model = nn.Sequential(*layers)
        self.loss_fn = nn.MSELoss()
        
    def forward(self, z_pred, z_true):
        # Ensure correct dimensions
        z_pred = z_pred.view(-1, 1)
        z_true = z_true.view(-1, 1)
        diff = torch.square(z_pred - z_true)
        return self.model(diff)

class LancerLearner:
    """Main LANCER learning framework"""
    def __init__(self, bb_problem, c_model, lancer_model, device='cuda', 
                 c_lr=1e-4, lancer_lr=1e-4, weight_decay=1e-5):
        self.bb_problem = bb_problem
        self.c_model = c_model.to(device)
        self.lancer_model = lancer_model.to(device)
        self.device = device
        
        # Use lower learning rates and add weight decay
        self.c_optimizer = optim.AdamW(
            self.c_model.parameters(),
            lr=c_lr,
            weight_decay=weight_decay
        )
        self.lancer_optimizer = optim.AdamW(
            self.lancer_model.parameters(),
            lr=lancer_lr,
            weight_decay=weight_decay
        )
        
        # Add learning rate schedulers
        self.c_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.c_optimizer, mode='min', factor=0.5, patience=2
        )
        self.lancer_scheduler = optim.lr_scheduler.ReduceLROnPlateau(
            self.lancer_optimizer, mode='min', factor=0.5, patience=2
        )
        
    def train_step(self, feats, risk, gainF, cost, race, sols, vals):
        # Convert to tensors if they aren't already
        if not isinstance(feats, torch.Tensor):
            feats = torch.FloatTensor(feats)
        if not isinstance(risk, torch.Tensor):
            risk = torch.FloatTensor(risk)
        if not isinstance(gainF, torch.Tensor):
            gainF = torch.FloatTensor(gainF)
        if not isinstance(cost, torch.Tensor):
            cost = torch.FloatTensor(cost)
        if not isinstance(sols, torch.Tensor):
            sols = torch.FloatTensor(sols)
            
        feats = feats.to(self.device)
        risk = risk.to(self.device)
        gainF = gainF.to(self.device)
        cost = cost.to(self.device)
        sols = sols.to(self.device)
        
        # Train LANCER model
        self.lancer_optimizer.zero_grad()
        z_pred = self.c_model(feats)
        lancer_pred = self.lancer_model(z_pred, risk)
        lancer_loss = self.lancer_model.loss_fn(lancer_pred, sols.view(-1, 1))
        
        # Add gradient clipping
        torch.nn.utils.clip_grad_norm_(self.lancer_model.parameters(), max_norm=1.0)
        lancer_loss.backward()
        self.lancer_optimizer.step()
        
        # Train C model using LANCER loss
        self.c_optimizer.zero_grad()
        z_pred = self.c_model(feats)
        c_loss = torch.mean(self.lancer_model(z_pred, risk))
        
        # Add gradient clipping
        torch.nn.utils.clip_grad_norm_(self.c_model.parameters(), max_norm=1.0)
        c_loss.backward()
        self.c_optimizer.step()
        
        return lancer_loss.item(), c_loss.item()
    
    def train(self, train_loader, test_loader, n_epochs=10, early_stop_patience=5):
        best_loss = float('inf')
        patience_counter = 0
        
        for epoch in range(n_epochs):
            self.c_model.train()
            self.lancer_model.train()
            
            epoch_lancer_loss = 0
            epoch_c_loss = 0
            num_batches = 0
            
            for batch in train_loader:
                feats, risk, gainF, cost, race, sols, vals = batch
                lancer_loss, c_loss = self.train_step(
                    feats, risk, gainF, cost, race, sols, vals
                )
                epoch_lancer_loss += lancer_loss
                epoch_c_loss += c_loss
                num_batches += 1
            
            # Calculate average losses
            avg_lancer_loss = epoch_lancer_loss / num_batches
            avg_c_loss = epoch_c_loss / num_batches
            
            # Calculate regret on test set
            self.c_model.eval()
            train_regret = regret(self.c_model, solve_optimization, train_loader, alphas=[alpha], Q=Q)
            
            # Update learning rate schedulers
            self.lancer_scheduler.step(avg_lancer_loss)
            self.c_scheduler.step(avg_c_loss)
            
            # Early stopping check
            current_loss = avg_lancer_loss + avg_c_loss
            if current_loss < best_loss:
                best_loss = current_loss
                patience_counter = 0
            else:
                patience_counter += 1
                
            if patience_counter >= early_stop_patience:
                print(f"Early stopping triggered after {epoch+1} epochs")
                break
            
            # Print progress
            print(f"Epoch {epoch+1}/{n_epochs}")
            print(f"LANCER Loss: {avg_lancer_loss:.4f}")
            print(f"C Model Loss: {avg_c_loss:.4f}")
            print(f"Train Regret: {train_regret:.4f}")
            print(f"Learning rates - LANCER: {self.lancer_optimizer.param_groups[0]['lr']:.6f}, "
                  f"C: {self.c_optimizer.param_groups[0]['lr']:.6f}")
    
    def predict(self, feats):
        self.c_model.eval()
        with torch.no_grad():
            feats = torch.FloatTensor(feats).to(self.device)
            return self.c_model(feats).cpu().numpy()

def setup_training(feats_train, risk_train, gainF_train, cost_train, race_train, 
                  alpha=alpha, Q=Q, batch_size=32):
    # Initialize problem
    bb_problem = BaseProblem(alpha=alpha, Q=Q)
    bb_problem.num_feats = feats_train.shape[1]
    
    # Initialize models
    c_model = MLPCModel(
        input_dim=feats_train.shape[1],
        output_dim=1
    )
    
    lancer_model = MLPLancer(
        input_dim=1  # For squared difference between pred and true
    )
    
    # Create dataset
    dataset = FairDFLDataset(
        feats_train, risk_train, gainF_train, cost_train, race_train,
        bb_problem
    )
    
    # Create dataloader
    dataloader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True
    )
    
    # Initialize learner
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    learner = LancerLearner(bb_problem, c_model, lancer_model, device)
    
    return learner, dataloader

In [500]:
# Create train and test dataloaders
learner, train_dataloader = setup_training(
    feats_train, risk_train, gainF_train, cost_train, race_train,
    alpha=alpha, Q=Q, batch_size=32)

# Create test dataloader
test_dataset = FairDFLDataset(
    feats_test, risk_test, gainF_test, cost_test, race_test,
    learner.bb_problem
)
test_dataloader = DataLoader(
    test_dataset,
    batch_size=len(test_dataset),
    shuffle=False
)

# Train with regret tracking
learner.train(train_dataloader, test_dataloader, n_epochs=10, early_stop_patience=5)

Epoch 1/10
LANCER Loss: 5.3529
C Model Loss: 0.1646
Train Regret: 0.0509
Learning rates - LANCER: 0.000100, C: 0.000100
Epoch 2/10
LANCER Loss: 0.0251
C Model Loss: 0.0806
Train Regret: 0.0896
Learning rates - LANCER: 0.000100, C: 0.000100
Epoch 3/10
LANCER Loss: 0.9652
C Model Loss: 0.0897
Train Regret: 0.1863
Learning rates - LANCER: 0.000100, C: 0.000100
Epoch 4/10
LANCER Loss: 0.3630
C Model Loss: 0.0753
Train Regret: 0.1358
Learning rates - LANCER: 0.000100, C: 0.000100
Epoch 5/10
LANCER Loss: 0.1500
C Model Loss: 0.0806
Train Regret: 0.0870
Learning rates - LANCER: 0.000050, C: 0.000100
Epoch 6/10
LANCER Loss: 0.0058
C Model Loss: 0.0859
Train Regret: 0.0646
Learning rates - LANCER: 0.000050, C: 0.000100
Epoch 7/10
LANCER Loss: 0.0110
C Model Loss: 0.0837
Train Regret: 0.0345
Learning rates - LANCER: 0.000050, C: 0.000050
Epoch 8/10
LANCER Loss: 0.0503
C Model Loss: 0.0802
Train Regret: 0.1056
Learning rates - LANCER: 0.000050, C: 0.000050
Epoch 9/10
LANCER Loss: 0.3788
C Model L

In [501]:
lancer_pred_risk = learner.predict(feats_test)
lancer_pred_risk.shape

(8000, 1)

In [502]:
print("LANCER regret on Test is:", regret(learner.c_model, solve_optimization, test_dataloader, alphas=[alpha], Q=Q))
print("2-Stage Regret on Test is:",regret(model, solve_optimization, test_dataloader, alphas=[alpha], Q=Q))

LANCER regret on Test is: 0.14386002851309154
2-Stage Regret on Test is: 0.39090804755143727


In [503]:
print("LANCER regret on Train is:", regret(learner.c_model, solve_optimization, train_dataloader, alphas=[alpha], Q=Q))
print("2-Stage Regret on Train is:",regret(model, solve_optimization, train_dataloader, alphas=[alpha], Q=Q))

LANCER regret on Train is: 0.0731937018544552
2-Stage Regret on Train is: 0.40421903602272125
