# Next Step


Discuss general algorithm: need to approximate gradient for back propagation. Then present gradient approximation methods.
- Closed-Form Decisions
- Linear Decision Objective
- Quadratic Decision Objective
- Generic Decision Objective

Gradient Free Methods

Experiments

Methods to compare:
- Two-stage: prediction then decision, prediction then fair decision, fair prediction then decision, fair prediction then fair decision
- DFL: DFL version of each of the above two-stage settings


Performance measures to report:
- Prediction accuracy: mean square errors of $r$ and $\hat{r}$
- Decision accuracy: mean square errors of $d(r)$ and $d(\hat{r})$
- Prediction fairness: prediction fairness measure of $\hat{r}$
- Decision fairness: decision fairness measure of $d(\hat(r))$
- Runtime of algorithm

In [57]:
import cvxpy as cp
import numpy as np
import warnings
import sys
from IPython.core.interactiveshell import InteractiveShell
from sklearn.preprocessing import StandardScaler
import torch
import torch.optim as optim
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset, random_split
import matplotlib.pyplot as plt
import pandas as pd
import sys
sys.path.insert(0, 'E:\\User\\Stevens\\Code\\The Paper\\algorithm')
from torch.utils.data import Dataset, DataLoader


import warnings
warnings.filterwarnings("ignore")

from myutil import *
from features import get_all_features

In [58]:
# Train the Prediction Model
class RiskDataset(Dataset):
    def __init__(self, features, risks):
        self.features = torch.FloatTensor(features)
        self.risks = torch.FloatTensor(risks).reshape(-1, 1)
        
    def __len__(self):
        return len(self.features)
        
    def __getitem__(self, idx):
        return self.features[idx], self.risks[idx]
    
class RiskPredictor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 1),
            nn.Softplus()
        )
    
    def forward(self, x):
        return self.model(x)

# Training function
def train_model(features, risks, epochs=10, batch_size=32):
    dataset = RiskDataset(features, risks)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    model = RiskPredictor(features.shape[1])
    model.train()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    
    for epoch in range(epochs):
        for batch_features, batch_risks in dataloader:
            optimizer.zero_grad()
            predictions = model(batch_features)
            loss = criterion(predictions, batch_risks)
            loss.backward()
            optimizer.step()
            
        if (epoch + 1) % 5 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
    
    return model

In [59]:
df = pd.read_csv('data/data.csv')

columns_to_keep = [
    'risk_score_t', 'program_enrolled_t', 'cost_t', 'cost_avoidable_t', 'race', 'dem_female', 'gagne_sum_tm1', 'gagne_sum_t', 
    'risk_score_percentile', 'screening_eligible', 'avoidable_cost_mapped', 'propensity_score', 'g_binary', 
    'g_continuous', 'utility_binary', 'utility_continuous'
]
# for race 0 is white, 1 is black
df_stat = df[columns_to_keep]
df_feature = df[[col for col in df.columns if col not in columns_to_keep]]

# Replace all values less than 0.1 with 0.1
#df['risk_score_t'] = df['risk_score_t'].apply(lambda x: 0.1 if x < 0.1 else x)
df['g_continuous'] = df['g_continuous'].apply(lambda x: 0.1 if x < 0.1 else x)

# subset a sample of 5000 rows of df
# df = df.sample(n=10000, random_state=1)

df.shape

(48784, 168)

In [60]:
# Define input variables for DFL
feats = df[get_all_features(df)].values
risk = df['risk_score_t'].values
gainF = df['g_continuous'].values
decision = df['propensity_score'].values
cost = np.ones(risk.shape)
race = df['race'].values
alpha = 0.5
Q = 1000

from sklearn.model_selection import train_test_split

# Perform train-test split
feats_train, feats_test, gainF_train, gainF_test, risk_train, risk_test, cost_train, cost_test, race_train, race_test = train_test_split(
    feats, gainF, risk, cost, df['race'].values, test_size=0.4, random_state=42
)


# Prediction Stage

In [61]:
scaler = StandardScaler()
feats = scaler.fit_transform(feats)

# model = train_model(feats, risk)
# torch.save(model.state_dict(), 'risk_predictor_model.pth')

# Load the model from local
model = RiskPredictor(feats.shape[1])
model.load_state_dict(torch.load('risk_predictor_model.pth'))
model.eval()

pred_risk = model(torch.FloatTensor(feats)).detach().numpy().flatten()

pred_risk.mean(), risk.mean()

(2.7172887, 4.393691680358348)

In [62]:
# True Risk Distribution
distribution_stats = df_stat.groupby('race')['risk_score_t'].describe()
print(distribution_stats)

# Predicted Risk Distribution
pred_risk_distribution = pd.DataFrame({'race': df['race'], 'pred_risk': pred_risk})
distribution_stats_pred_risk = pred_risk_distribution.groupby('race')['pred_risk'].describe()
print(distribution_stats_pred_risk)

        count      mean       std  min       25%       50%       75%  \
race                                                                   
0     43202.0  4.266933  5.102404  0.0  1.426873  2.870732  5.282827   
1      5582.0  5.374740  7.980310  0.0  1.494819  3.023611  6.030236   

             max  
race              
0     100.000000  
1      96.381858  
        count      mean       std  min       25%       50%       75%  \
race                                                                   
0     43202.0  2.554104  4.139664  0.0  0.053036  0.775554  3.488975   
1      5582.0  3.980262  6.118409  0.0  0.111057  1.603591  5.302370   

            max  
race             
0     51.777321  
1     60.639240  


# Train a Fair Regression Model

### Specifically, we'll minimize the difference in mean predictions between the two racial groups (statistical parity). The total loss will be a combination of the Mean Squared Error and the fairness regularizer.

In [63]:
# Add 'race' to the dataset
class FairRiskDataset(Dataset):
    def __init__(self, features, races, risks):
        self.features = torch.FloatTensor(features)
        self.races = torch.LongTensor(races)
        self.risks = torch.FloatTensor(risks).reshape(-1, 1)
        
    def __len__(self):
        return len(self.features)
        
    def __getitem__(self, idx):
        return self.features[idx], self.races[idx], self.risks[idx]

class FairRiskPredictor(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 1),
            nn.Softplus()
        )
        
    def forward(self, x):
        return self.model(x)


In [64]:
def train_fair_model(features, races, risks, epochs=10, batch_size=32, lambda_fairness=1.0):
    """
    Train a fair regression model with a fairness regularizer.
    
    Args:
        features (np.ndarray): Feature array.
        races (np.ndarray): Array indicating race (0: white, 1: black).
        risks (np.ndarray): True risk values.
        epochs (int): Number of training epochs.
        batch_size (int): Batch size for training.
        lambda_fairness (float): Weight for the fairness regularizer.
        
    Returns:
        nn.Module: Trained fair regression model.
    """
    dataset = FairRiskDataset(features, races, risks)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    model = FairRiskPredictor(features.shape[1])
    model.train()
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
    
    for epoch in range(epochs):
        epoch_loss = 0.0
        for batch_features, batch_races, batch_risks in dataloader:
            optimizer.zero_grad()
            predictions = model(batch_features)
            mse_loss = criterion(predictions, batch_risks)
            
            # Compute fairness loss
            group0 = predictions[batch_races == 0]
            group1 = predictions[batch_races == 1]
            if len(group0) > 0 and len(group1) > 0:
                fairness_loss = torch.abs(group0.mean() - group1.mean())
            else:
                fairness_loss = torch.tensor(0.0)
            
            # Total loss
            total_loss = mse_loss + lambda_fairness * fairness_loss
            total_loss.backward()
            optimizer.step()
            
            epoch_loss += total_loss.item()
        
        if (epoch + 1) % 5 == 0 or epoch == 0:
            avg_loss = epoch_loss / len(dataloader)
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.4f}')
    
    return model


In [65]:
# Extract necessary columns
features = df[get_all_features(df)].values
races = df_stat['race'].values  # 0: white, 1: black
risks = df_stat['risk_score_t'].values

# Drop rows with any NaNs or Infs
mask = ~np.isnan(features).any(axis=1) & ~np.isinf(features).any(axis=1) & \
       ~np.isnan(races) & ~np.isinf(races) & \
       ~np.isnan(risks) & ~np.isinf(risks)

features = features[mask]
races = races[mask]
risks = risks[mask]


# Scale features
scaler_fair = StandardScaler()
features_scaled = scaler_fair.fit_transform(features)

# Train the fair regression model
lambda_fairness = 0.5  # Adjust this value as needed
# fair_model = train_fair_model(features_scaled, races, risks, epochs=20, batch_size=64, lambda_fairness=lambda_fairness)

# # Save the fair model
# torch.save(fair_model.state_dict(), 'fair_risk_predictor_model.pth')

# load the model
fair_model = FairRiskPredictor(features_scaled.shape[1])
fair_model.load_state_dict(torch.load('fair_risk_predictor_model.pth'))


Epoch [1/20], Loss: 37.5431
Epoch [5/20], Loss: 23.7993
Epoch [10/20], Loss: 20.6913
Epoch [15/20], Loss: 19.1335
Epoch [20/20], Loss: 17.7668


In [66]:
fair_model, model

(FairRiskPredictor(
   (model): Sequential(
     (0): Linear(in_features=149, out_features=1, bias=True)
     (1): Softplus(beta=1, threshold=20)
   )
 ),
 RiskPredictor(
   (model): Sequential(
     (0): Linear(in_features=149, out_features=1, bias=True)
     (1): Softplus(beta=1, threshold=20)
   )
 ))

# Solve Optimization Problem

In [67]:
def AlphaFairness(util,alpha):
    if alpha == 1:
        return np.sum(np.log(util))
    elif alpha == 0:
        return np.sum(util)
    elif alpha == 'inf':
        return np.min(util)
    else:
        return np.sum(util**(1-alpha)/(1-alpha))
    
def solve_optimization(gainF, risk, cost, alpha, Q):
    # Flatten input arrays
    gainF, risk, cost = gainF.flatten(), risk.flatten() + 0.001, cost.flatten()
    d = cp.Variable(risk.shape, nonneg=True)
    
    utils = cp.multiply(cp.multiply(gainF, risk), d)
    
    if alpha == 'inf':
        # Maximin formulation
        t = cp.Variable()  # auxiliary variable for minimum utility
        objective = cp.Maximize(t)
        constraints = [
            d >= 0,
            # d <= 1,
            cp.sum(cost * d) <= Q,
            utils >= t  # t is the minimum utility
        ]
    elif alpha == 1:
        # Nash welfare (alpha = 1)
        objective = cp.Maximize(cp.sum(cp.log(utils)))
        constraints = [
            d >= 0,
            # d <= 1,
            cp.sum(cost * d) <= Q
        ]
    elif alpha == 0:
        # Utilitarian welfare (alpha = 0)
        objective = cp.Maximize(cp.sum(utils))
        constraints = [
            d >= 0,
            # d <= 1,
            cp.sum(cost * d) <= Q
        ]
    else:
        # General alpha-fairness
        objective = cp.Maximize(cp.sum(utils**(1-alpha))/(1-alpha) if alpha != 0 
                              else cp.sum(utils))
        constraints = [
            d >= 0,
            # d <= 1,
            cp.sum(cost * d) <= Q
        ]
    
    # Solve the problem
    problem = cp.Problem(objective, constraints)
    problem.solve(solver=cp.MOSEK, verbose=False, warm_start=True, mosek_params={'MSK_IPAR_LOG': 1})
    
    if problem.status != 'optimal':
        print(f"Warning: Problem status is {problem.status}")
    
    optimal_decision = d.value
    optimal_value = AlphaFairness(optimal_decision * gainF * risk, alpha)
    
    return optimal_decision, optimal_value

In [68]:
# pred_sol,_ = solve_optimization(gainF, predicted_risk, cost, alpha='inf', Q=Q)
# pred_obj = np.sum((risk * gainF * pred_sol)**(1-alpha)/(1-alpha))
# true_obj = np.sum((optimal_decision * gainF * risk)**(1-alpha)/(1-alpha))

In [69]:
def twoStagePTO(model, feats, gainF, risk, cost, Q, alphas=[0.5]):
    """
    Perform a two-stage optimization analysis with predictions and calculate normalized regrets.

    Args:
        model (nn.Module): A regression neural network for risk prediction.
        feats (np.ndarray): Feature array for predictions.
        gainF (np.ndarray): Gain factors.
        risk (np.ndarray): True risk values.
        cost (np.ndarray): Cost constraints.
        Q (float): Budget constraint.
        alphas (list): List of alpha values for fairness.

    Returns:
        pd.DataFrame: A table of prediction risk means, true risk mean, objectives, and normalized regrets.
    """

    # Feature scaling
    scaler = StandardScaler()
    feats_scaled = scaler.fit_transform(feats)

    # Predict risks
    model.eval()
    pred_risk = model(torch.FloatTensor(feats_scaled)).detach().numpy().flatten()

    # Initialize result storage
    results = []

    # Iterate over alphas
    for alpha in alphas:
        # Solve optimization problems
        true_sol, _ = solve_optimization(gainF, risk, cost, alpha, Q)
        pred_sol, _ = solve_optimization(gainF, pred_risk, cost, alpha, Q)

        # Calculate true and predicted utilities
        true_utility = (risk + 0.001) * gainF * true_sol
        pred_utility = (pred_risk + 0.001) * gainF * pred_sol
        pred_utility_truerisk = (risk + 0.001) * gainF * pred_sol

        # Calculate objectives
        true_obj = AlphaFairness(true_utility, alpha)
        pred_obj = AlphaFairness(pred_utility, alpha)
        pred_obj_truerisk = AlphaFairness(pred_utility_truerisk, alpha)

        # Calculate regret and normalized regret
        # regret = true_obj - pred_obj
        regret = true_obj - pred_obj_truerisk
        normalized_regret = regret / (abs(true_obj) + 1e-7)        

        # Collect results for this alpha
        results.append({
            'Alpha': alpha,
            'Predicted Risk Mean': pred_risk.mean(),
            'True Risk Mean': risk.mean(),
            'True Objective': true_obj,
            'Predicted Objective': pred_obj,
            'Regret': f"{regret:.2f}",
            'Normalized Regret': f"{normalized_regret:.2f}"
        })

    # Create a DataFrame for results
    results_df = pd.DataFrame(results)
    print(results_df)
    return results_df


In [70]:
data_sample = df.sample(n=40000, random_state=42)
feats_sample = data_sample[get_all_features(data_sample)].values
risk_sample = data_sample['risk_score_t'].values
gainF_sample = data_sample['g_continuous'].values
decision_sample = data_sample['propensity_score'].values
cost_sample = np.ones(risk_sample.shape)
race_sample = data_sample['race'].values



In [71]:
# results.to_csv('data/results.csv', index=False)
# results = twoStagePTO(model, feats_sample, gainF_sample, risk_sample, cost_sample, Q, alphas=[0,.5,.9,1,2,'inf'])
results = pd.read_csv('data/results.csv')
results

Unnamed: 0,Alpha,Predicted Risk Mean,True Risk Mean,True Objective,Predicted Objective,Regret,Normalized Regret
0,0.0,2.703654,4.387568,284164.896752,7960235.0,104721.5,0.37
1,0.5,2.703654,4.387568,73629.284021,38114.15,23887.93,0.32
2,0.9,2.703654,4.387568,361873.257701,306005.1,658.1,0.0
3,1.0,2.703654,4.387568,-44900.932086,-112700.5,-0.0,-0.0
4,2.0,2.703654,4.387568,-834952.038551,-1546416.0,2847299.59,3.41
5,inf,2.703654,4.387568,0.000627,0.003833028,0.0,1.0


In [74]:
def twoStagePTO_with_bias_analysis(model, fair_model, feats, gainF, risk, cost, race, Q=1000, alphas=[0.5],):
    # Feature scaling
    scaler = StandardScaler()
    feats_scaled = scaler.fit_transform(feats)

    # Predict risks
    model.eval()
    pred_risk = model(torch.FloatTensor(feats_scaled)).detach().numpy().flatten()
    fair_pred_risk = fair_model(torch.FloatTensor(feats_scaled)).detach().numpy().flatten()

    # Initialize result storage
    results = []
    bias_analysis = []
    fair_pto_results = []
    fair_pto_analysis = []

    # Iterate over alphas
    for alpha in alphas:
        # Solve optimization problems
        true_sol, _ = solve_optimization(gainF, risk, cost, alpha, Q)
        pred_sol, _ = solve_optimization(gainF, pred_risk, cost, alpha, Q)
        fair_pred_sol, _ = solve_optimization(gainF, fair_pred_risk, cost, alpha, Q)

        # Calculate true and predicted utilities
        true_utility = (risk + 0.001) * gainF * true_sol
        pred_utility = (pred_risk + 0.001) * gainF * pred_sol
        pred_utility_truerisk = (risk + 0.001) * gainF * pred_sol
        fair_pred_utility_truerisk = (risk + 0.001) * gainF * fair_pred_sol

        # Calculate objectives
        true_obj = AlphaFairness(true_utility, alpha)
        pred_obj = AlphaFairness(pred_utility, alpha)
        pred_obj_truerisk = AlphaFairness(pred_utility_truerisk, alpha)
        fair_pred_obj_truerisk = AlphaFairness(fair_pred_utility_truerisk, alpha)

        # Calculate regret and normalized regret
        # regret = true_obj - pred_obj
        regret = true_obj - pred_obj_truerisk
        normalized_regret = regret / (abs(true_obj) + 1e-7)

        fair_regret = true_obj - fair_pred_obj_truerisk
        fair_normalized_regret = fair_regret / (abs(true_obj) + 1e-7)

        # Collect results for this alpha
        results.append({
            'Alpha': alpha,
            'Predicted Risk Mean': pred_risk.mean(),
            'True Risk Mean': risk.mean(),
            'True Objective': true_obj,
            'Predicted Objective': pred_obj,
            'Regret': f"{regret:.2f}",
            'Normalized Regret': f"{normalized_regret:.2f}"
        })

        fair_pto_results.append({
            'Alpha': alpha,
            'Predicted Risk Mean': fair_pred_risk.mean(),
            'True Risk Mean': risk.mean(),
            'True Objective': true_obj,
            'Predicted Objective': fair_pred_obj_truerisk,
            'Regret': f"{fair_regret:.2f}",
            'Normalized Regret': f"{fair_normalized_regret:.2f}"
        })

        # Analyze bias in the optimal solution and utilities by race
        for r in [0, 1]:  # 0 = white, 1 = black
            mask = race == r
            race_stats = {
                'Alpha': alpha,
                'Race': r,
                'True Solution Mean': true_sol[mask].mean(),
                'True Solution Std': true_sol[mask].std(),
                'Predicted Solution Mean': pred_sol[mask].mean(),
                'Predicted Solution Std': pred_sol[mask].std(),
                'True Utility Mean': true_utility[mask].mean(),
                'True Utility Std': true_utility[mask].std(),
                'Predicted Utility Mean': pred_utility[mask].mean(),
                'Predicted Utility Std': pred_utility[mask].std()
            }
            bias_analysis.append(race_stats)
        
        # Analyze bias in the optimal solution and utilities by race in for fair model
        for r in [0, 1]:
            mask = race == r
            fair_stats = {
                'Alpha': alpha,
                'Race': r,
                'True Solution Mean': true_sol[mask].mean(),
                'True Solution Std': true_sol[mask].std(),
                'Predicted Solution Mean': fair_pred_sol[mask].mean(),
                'Predicted Solution Std': fair_pred_sol[mask].std(),
                'True Utility Mean': true_utility[mask].mean(),
                'True Utility Std': true_utility[mask].std(),
                'Predicted Utility Mean': fair_pred_utility_truerisk[mask].mean(),
                'Predicted Utility Std': fair_pred_utility_truerisk[mask].std()
            }
            fair_pto_analysis.append(fair_stats)


    # Create DataFrames for results and bias analysis
    results_df = pd.DataFrame(results)
    bias_analysis_df = pd.DataFrame(bias_analysis)
    bias_analysis_df['Race'] = bias_analysis_df['Race'].replace({0: 'White', 1: 'Black'})

    fair_pto_results_df = pd.DataFrame(fair_pto_results)
    fair_pto_analysis_df = pd.DataFrame(fair_pto_analysis)
    fair_pto_analysis_df['Race'] = fair_pto_analysis_df['Race'].replace({0: 'White', 1: 'Black'})

    return results_df, bias_analysis_df, fair_pto_results_df, fair_pto_analysis_df


In [77]:
# racial_results, racial_bias_analysis, fair_pto_results, fair_pto_analysis = twoStagePTO_with_bias_analysis(model, fair_model, feats_sample, gainF_sample, risk_sample, cost_sample, race_sample, alphas=[0,.5,.9,1,2,'inf'])

# racial_bias_analysis = racial_bias_analysis.round(8)
# fair_pto_analysis = fair_pto_analysis.round(8)

# racial_bias_analysis.to_csv('data/racial_bias_analysis.csv', index=False)
# fair_pto_analysis.to_csv('data/fair_pto_analysis.csv', index=False)
# fair_pto_results.to_csv('data/fair_pto_results.csv', index=False)

In [86]:
results

Unnamed: 0,Alpha,Predicted Risk Mean,True Risk Mean,True Objective,Predicted Objective,Regret,Normalized Regret
0,0.0,2.703654,4.387568,284164.896752,7960235.0,104721.5,0.37
1,0.5,2.703654,4.387568,73629.284021,38114.15,23887.93,0.32
2,0.9,2.703654,4.387568,361873.257701,306005.1,658.1,0.0
3,1.0,2.703654,4.387568,-44900.932086,-112700.5,-0.0,-0.0
4,2.0,2.703654,4.387568,-834952.038551,-1546416.0,2847299.59,3.41
5,inf,2.703654,4.387568,0.000627,0.003833028,0.0,1.0


In [84]:
racial_results

Unnamed: 0,Alpha,Predicted Risk Mean,True Risk Mean,True Objective,Predicted Objective,Regret,Normalized Regret
0,0.0,2.703654,4.387568,284164.896752,7960235.0,104721.5,0.37
1,0.5,2.703654,4.387568,73629.284021,38114.15,23887.93,0.32
2,0.9,2.703654,4.387568,361873.257701,306005.1,658.1,0.0
3,1.0,2.703654,4.387568,-44900.932086,-112700.5,-0.0,-0.0
4,2.0,2.703654,4.387568,-834952.038551,-1546416.0,2847299.59,3.41
5,inf,2.703654,4.387568,0.000627,0.003833028,0.0,1.0


In [79]:
fair_pto_results = pd.read_csv('data/fair_pto_results.csv')
fair_pto_results.groupby('Alpha').mean()

Unnamed: 0_level_0,Predicted Risk Mean,True Risk Mean,True Objective,Predicted Objective,Regret,Normalized Regret
Alpha,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0.0,2.733022,4.387568,284164.896752,176641.9,107522.97,0.38
0.5,2.733022,4.387568,73629.284021,54484.07,19145.22,0.26
0.9,2.733022,4.387568,361873.257701,361376.6,496.65,0.0
1.0,2.733022,4.387568,-44900.932086,-44900.93,0.0,0.0
2.0,2.733022,4.387568,-834952.038551,-3321640.0,2486687.55,2.98
inf,2.733022,4.387568,0.000627,2.412388e-06,0.0,1.0


In [89]:
fair_pto_analysis = pd.read_csv('data/fair_pto_analysis.csv')
fair_pto_analysis.groupby(['Alpha', 'Race']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,True Solution Mean,True Solution Std,Predicted Solution Mean,Predicted Solution Std,True Utility Mean,True Utility Std,Predicted Utility Mean,Predicted Utility Std
Alpha,Race,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0,Black,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,White,0.028225,5.312689,0.028225,5.312689,8.020686,1509.679826,4.985801,938.443701
0.5,Black,0.023181,0.03595,0.027342,0.097788,2.479947,6.371667,1.665787,10.759633
0.5,White,0.025235,0.036278,0.024698,0.144549,2.646779,6.408446,1.82581,25.133334
0.9,Black,0.024675,0.003974,0.02538,0.003714,0.945569,1.6021,0.846239,1.367325
0.9,White,0.025042,0.004007,0.024951,0.003765,1.029735,1.615768,0.912369,1.382302
1.0,Black,0.025,1e-05,0.025,3e-06,0.785466,1.218131,0.785441,1.218093
1.0,White,0.025,1e-05,0.025,3e-06,0.855046,1.229236,0.855019,1.229198
2.0,Black,0.026925,0.22779,0.023978,0.079558,0.152662,0.119106,0.483615,0.801976
2.0,White,0.024752,0.215799,0.025132,0.070277,0.162163,0.120497,0.555863,0.853823


In [None]:
racial_bias_analysis = pd.read_csv('data/racial_bias_analysis.csv')
racial_bias_analysis.groupby(['Alpha', 'Race']).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,True Solution Mean,True Solution Std,Predicted Solution Mean,Predicted Solution Std,True Utility Mean,True Utility Std,Predicted Utility Mean,Predicted Utility Std
Alpha,Race,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
0.0,Black,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0.0,White,0.028225,5.312689,0.028225,5.312689,8.020686,1509.679826,224.681324,42290.253843
0.5,Black,0.023181,0.03595,0.028787,0.1233266,2.479947,6.371667,5.824611,126.158507
0.5,White,0.025235,0.036278,0.024511,0.1855345,2.646779,6.408446,12.720251,1152.159325
0.9,Black,0.024675,0.003974,0.025626,0.00427723,0.945569,1.6021,0.365721,2.078181
0.9,White,0.025042,0.004007,0.024919,0.00439381,1.029735,1.615768,0.312457,3.82748
1.0,Black,0.025,1e-05,0.025,2.34e-06,0.785466,1.218131,0.26136,1.119721
1.0,White,0.025,1e-05,0.025,9.7e-07,0.855046,1.229236,0.222547,1.684594
2.0,Black,0.026925,0.22779,0.022331,0.06234881,0.152662,0.119106,0.058741,0.057505
2.0,White,0.024752,0.215799,0.025344,0.05960121,0.162163,0.120497,0.052995,0.054254
