In [12]:
import sys
import warnings
import time
import copy
import json
from datetime import datetime
from itertools import product

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch.autograd import Function
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import torch.optim as optim
import torch.nn.functional as F

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import cvxpy as cp

# Add custom paths
sys.path.insert(0, 'E:\\User\\Stevens\\MyRepo\\FDFL\\helper')
sys.path.insert(0, 'E:\\User\\Stevens\\MyRepo\\fold-opt-package\\fold_opt')

from myutil import *
from features import get_all_features

# Suppress warnings
warnings.filterwarnings("ignore")

from GMRES import *
from fold_opt import *

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")
print(device)

from src.utils.myOptimization import (
    solveGroupProblem, closed_form_group_alpha, AlphaFairnesstorch,
    solveIndProblem, solve_closed_form, solve_coupled_group_alpha, solve_coupled_group_grad,
    compute_coupled_group_obj
)
from src.utils.myPrediction import generate_random_features, customPredictionModel
from src.utils.plots import visLearningCurve
from src.fairness.cal_fair_penalty import atkinson_loss

cpu


## Define Alpha & Q

In [13]:
alpha, Q = 0.5, 1000

In [14]:
df = pd.read_csv('E:\\User\\Stevens\\MyRepo\\Organized-FDFL\\src\\data\\data.csv')

df = df.sample(n=500, random_state=42)

columns_to_keep = [
    'risk_score_t', 'program_enrolled_t', 'cost_t', 'cost_avoidable_t', 'race', 'dem_female', 'gagne_sum_tm1', 'gagne_sum_t', 
    'risk_score_percentile', 'screening_eligible', 'avoidable_cost_mapped', 'propensity_score', 'g_binary', 
    'g_continuous', 'utility_binary', 'utility_continuous'
]
# for race 0 is white, 1 is black
df_stat = df[columns_to_keep]
df_feature = df[[col for col in df.columns if col not in columns_to_keep]]


risk = np.array(df['benefit'].values) * 100 
risk = np.maximum(risk, 0.1) 


feats = df[get_all_features(df)].values
gainF = np.ones_like(risk)
decision = df['propensity_score'].values
cost = np.array(df['cost_t_capped'].values) * 10
cost = np.maximum(cost, 0.1)
race = np.array(df['race'].values)

# transform the features
scaler = StandardScaler()
feats = scaler.fit_transform(feats)

In [15]:
class optDataset(Dataset):
    def __init__(self, optmodel, feats, risk, gainF, cost, race, alpha=alpha, Q=Q):
        # Store as numpy arrays for now
        self.feats = feats
        self.risk = risk
        self.gainF = gainF
        self.cost = cost
        self.race = race
        self.optmodel = optmodel

        # Call optmodel (expects numpy arrays)
        sol = self.optmodel(self.risk, self.cost, self.race, Q=Q, alpha=alpha, beta=alpha)
        obj = compute_coupled_group_obj(sol, self.risk, self.race, alpha=alpha, beta=alpha)

        # Convert everything to torch tensors for storage
        self.feats = torch.from_numpy(self.feats).float()
        self.risk = torch.from_numpy(self.risk).float()
        self.gainF = torch.from_numpy(self.gainF).float()
        self.cost = torch.from_numpy(self.cost).float()
        self.race = torch.from_numpy(self.race).float()
        self.sol = torch.from_numpy(sol).float()
        self.obj = torch.tensor(obj).float()

    def __len__(self):
        return len(self.feats)

    def __getitem__(self, idx):
        return self.feats, self.risk, self.gainF, self.cost, self.race, self.sol, self.obj


## Prediction Model

In [16]:
class FairRiskPredictor(nn.Module):
    def __init__(self, input_dim, dropout_rate=0.1):
        super().__init__()
        self.model = nn.Sequential(
            # First layer with batch normalization
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            
            # Output layer
            nn.Linear(64, 1),
            nn.Softplus()
        )
            
    def forward(self, x):
        return self.model(x)

#  Setup training parameters

In [17]:
# Setup training parameters

optmodel = solve_coupled_group_alpha

# Perform train-test split
feats_train, feats_test, gainF_train, gainF_test, risk_train, risk_test, cost_train, cost_test, race_train, race_test = train_test_split(
    feats, gainF, risk, cost, df['race'].values, test_size=0.5, random_state=2
)

print(f"Train size: {feats_train.shape[0]}")
print(f"Test size: {feats_test.shape[0]}")

dataset_train = optDataset(optmodel, feats_train, risk_train, gainF_train, cost_train, race_train, alpha=alpha, Q=Q)
dataset_test = optDataset(optmodel, feats_test, risk_test, gainF_test, cost_test, race_test, alpha=alpha, Q=Q)

# Create dataloaders
dataloader_train = DataLoader(dataset_train, batch_size=1, shuffle=False)
dataloader_test = DataLoader(dataset_test, batch_size=1, shuffle=False)

predmodel = FairRiskPredictor(feats_train.shape[1])
predmodel.to(device)
# save the initial model
# torch.save(predmodel.state_dict(), 'initial_model.pth')
# load the initial model

Train size: 250
Test size: 250


FairRiskPredictor(
  (model): Sequential(
    (0): Linear(in_features=152, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.1, inplace=False)
    (3): Linear(in_features=64, out_features=1, bias=True)
    (4): Softplus(beta=1, threshold=20)
  )
)

array([[-1.32756579, -0.19324699, -0.38313051, ..., -0.0776931 ,
        -0.07543471, -0.07474351],
       [ 0.75325833, -0.19324699, -0.38313051, ..., -0.0776931 ,
        -0.07543471, -0.07474351],
       [ 0.75325833,  5.1747249 , -0.38313051, ..., -0.0776931 ,
        -0.07543471, -0.07474351],
       ...,
       [-1.32756579, -0.19324699, -0.38313051, ..., -0.0776931 ,
        -0.07543471, -0.07474351],
       [ 0.75325833, -0.19324699, -0.38313051, ..., -0.0776931 ,
        -0.07543471, -0.07474351],
       [ 0.75325833, -0.19324699, -0.38313051, ..., -0.0776931 ,
        -0.07543471, -0.07474351]])