In [6]:
import pandas as pd

def calculate_percentage_changes(baseline_metrics, transformed_metrics):
    """
    Calculate the percentage change for all given metrics, considering ideal values.
    """
    perfect_values = {
        'balanced_accuracy': 1,
        'statistical_parity_difference': 0,
        'disparate_impact': 1,
    }
    
    percentage_changes = {}
    for metric in baseline_metrics:
        baseline = baseline_metrics[metric]
        transformed = transformed_metrics[metric]
        perfect = perfect_values[metric]
        
        if metric == 'balanced_accuracy':
            percentage_changes[metric] = ((transformed - baseline) / baseline) * 100
        elif metric == 'statistical_parity_difference':
            if abs(baseline) > 0:
                percentage_changes[metric] = ((abs(baseline - perfect) - abs(transformed - perfect)) / abs(baseline - perfect)) * 100
            else:
                percentage_changes[metric] = 0
        elif metric == 'disparate_impact':
            if baseline != perfect:
                percentage_changes[metric] = ((abs(baseline - perfect) - abs(transformed - perfect)) / abs(baseline - perfect)) * 100
            else:
                percentage_changes[metric] = 0
    
    return percentage_changes

def compute_bias_changes(data):
    baseline_entry = next(item for item in data if item["Bias Mitigation Method"] == "Baseline (Original Data)")
    baseline_metrics = {
        "balanced_accuracy": baseline_entry["Balanced Accuracy"],
        "statistical_parity_difference": baseline_entry["Statistical Parity Difference"],
        "disparate_impact": baseline_entry["Disparate Impact"],
    }
    
    results = []
    for item in data:
        if item["Bias Mitigation Method"] != "Baseline (Original Data)":
            transformed_metrics = {
                "balanced_accuracy": item["Balanced Accuracy"],
                "statistical_parity_difference": item["Statistical Parity Difference"],
                "disparate_impact": item["Disparate Impact"],
            }
            percentage_changes = calculate_percentage_changes(baseline_metrics, transformed_metrics)
            
            results.append({
                "Classifier": item["Classifier"],
                "Bias Mitigation Method": item["Bias Mitigation Method"],
                "Balanced Accuracy Change (%)": percentage_changes["balanced_accuracy"],
                "Statistical Parity Difference Change (%)": percentage_changes["statistical_parity_difference"],
                "Disparate Impact Change (%)": percentage_changes["disparate_impact"],
            })
    
    return pd.DataFrame(results)

In [7]:
# Example usage
data = [
    {"Classifier": "Logistic Regression", "Bias Mitigation Method": "Baseline (Original Data)", "Balanced Accuracy": 0.74, "Statistical Parity Difference": -0.35, "Disparate Impact": 0.27},
    {"Classifier": "Logistic Regression", "Bias Mitigation Method": "Reweighing", "Balanced Accuracy": 0.71, "Statistical Parity Difference": -0.07, "Disparate Impact": 0.77},
    {"Classifier": "Logistic Regression", "Bias Mitigation Method": "DIR", "Balanced Accuracy": 0.72, "Statistical Parity Difference": -0.01, "Disparate Impact": 0.95},
    {"Classifier": "Logistic Regression", "Bias Mitigation Method": "LFR", "Balanced Accuracy": 0.69, "Statistical Parity Difference": -0.05, "Disparate Impact": 0.82},
    {"Classifier": "Logistic Regression", "Bias Mitigation Method": "OptimPreproc", "Balanced Accuracy": 0.71, "Statistical Parity Difference": -0.09, "Disparate Impact": 0.73},
    {"Classifier": "Logistic Regression", "Bias Mitigation Method": "Synthetic Data (Women_50K)", "Balanced Accuracy": 0.77, "Statistical Parity Difference": -0.18, "Disparate Impact": 0.63},
    {"Classifier": "Logistic Regression", "Bias Mitigation Method": "Synthetic Data (Women_neverMarried_50K)", "Balanced Accuracy": 0.76, "Statistical Parity Difference": -0.10, "Disparate Impact": 0.76},
    {"Classifier": "Logistic Regression", "Bias Mitigation Method": "Synthetic Data (Women_black_50K)", "Balanced Accuracy": 0.80, "Statistical Parity Difference": -0.17, "Disparate Impact": 0.68},
]

# Compute bias changes
df_results = compute_bias_changes(data)
df_results

Unnamed: 0,Classifier,Bias Mitigation Method,Balanced Accuracy Change (%),Statistical Parity Difference Change (%),Disparate Impact Change (%)
0,Logistic Regression,Reweighing,-4.054054,80.0,68.493151
1,Logistic Regression,DIR,-2.702703,97.142857,93.150685
2,Logistic Regression,LFR,-6.756757,85.714286,75.342466
3,Logistic Regression,OptimPreproc,-4.054054,74.285714,63.013699
4,Logistic Regression,Synthetic Data (Women_50K),4.054054,48.571429,49.315068
5,Logistic Regression,Synthetic Data (Women_neverMarried_50K),2.702703,71.428571,67.123288
6,Logistic Regression,Synthetic Data (Women_black_50K),8.108108,51.428571,56.164384


In [8]:
data = [
    {"Classifier": "Random Forest", "Bias Mitigation Method": "Baseline (Original Data)", "Balanced Accuracy": 0.74, "Statistical Parity Difference": -0.36, "Disparate Impact": 0.27},
    {"Classifier": "Random Forest", "Bias Mitigation Method": "Reweighing", "Balanced Accuracy": 0.71, "Statistical Parity Difference": -0.08, "Disparate Impact": 0.76},
    {"Classifier": "Random Forest", "Bias Mitigation Method": "DIR", "Balanced Accuracy": 0.74, "Statistical Parity Difference": -0.11, "Disparate Impact": 0.67},
    {"Classifier": "Random Forest", "Bias Mitigation Method": "LFR", "Balanced Accuracy": 0.68, "Statistical Parity Difference": -0.01, "Disparate Impact": 0.93},
    {"Classifier": "Random Forest", "Bias Mitigation Method": "OptimPreproc", "Balanced Accuracy": 0.70, "Statistical Parity Difference": -0.05, "Disparate Impact": 0.89},
    {"Classifier": "Random Forest", "Bias Mitigation Method": "Synthetic Data (Women_50K)", "Balanced Accuracy": 0.82, "Statistical Parity Difference": -0.09, "Disparate Impact": 0.80},
    {"Classifier": "Random Forest", "Bias Mitigation Method": "Synthetic Data (Women_neverMarried_50K)", "Balanced Accuracy": 0.82, "Statistical Parity Difference": -0.09, "Disparate Impact": 0.79},
    {"Classifier": "Random Forest", "Bias Mitigation Method": "Synthetic Data (Women_black_50K)", "Balanced Accuracy": 0.82, "Statistical Parity Difference": -0.11, "Disparate Impact": 0.74}
]
# Compute bias changes
df_results = compute_bias_changes(data)
df_results

Unnamed: 0,Classifier,Bias Mitigation Method,Balanced Accuracy Change (%),Statistical Parity Difference Change (%),Disparate Impact Change (%)
0,Random Forest,Reweighing,-4.054054,77.777778,67.123288
1,Random Forest,DIR,0.0,69.444444,54.794521
2,Random Forest,LFR,-8.108108,97.222222,90.410959
3,Random Forest,OptimPreproc,-5.405405,86.111111,84.931507
4,Random Forest,Synthetic Data (Women_50K),10.810811,75.0,72.60274
5,Random Forest,Synthetic Data (Women_neverMarried_50K),10.810811,75.0,71.232877
6,Random Forest,Synthetic Data (Women_black_50K),10.810811,69.444444,64.383562


In [9]:
data = [
    {"Classifier": "Gradient Boosting", "Bias Mitigation Method": "Baseline (Original Data)", "Balanced Accuracy": 0.74, "Statistical Parity Difference": -0.38, "Disparate Impact": 0.28},
    {"Classifier": "Gradient Boosting", "Bias Mitigation Method": "Reweighing", "Balanced Accuracy": 0.71, "Statistical Parity Difference": -0.09, "Disparate Impact": 0.79},
    {"Classifier": "Gradient Boosting", "Bias Mitigation Method": "DIR", "Balanced Accuracy": 0.72, "Statistical Parity Difference": -0.08, "Disparate Impact": 0.71},
    {"Classifier": "Gradient Boosting", "Bias Mitigation Method": "LFR", "Balanced Accuracy": 0.68, "Statistical Parity Difference": -0.01, "Disparate Impact": 0.97},
    {"Classifier": "Gradient Boosting", "Bias Mitigation Method": "OptimPreproc", "Balanced Accuracy": 0.71, "Statistical Parity Difference": -0.19, "Disparate Impact": 0.59},
    {"Classifier": "Gradient Boosting", "Bias Mitigation Method": "Synthetic Data (Women_50K)", "Balanced Accuracy": 0.83, "Statistical Parity Difference": -0.11, "Disparate Impact": 0.73},
    {"Classifier": "Gradient Boosting", "Bias Mitigation Method": "Synthetic Data (Women_neverMarried_50K)", "Balanced Accuracy": 0.81, "Statistical Parity Difference": -0.16, "Disparate Impact": 0.61},
    {"Classifier": "Gradient Boosting", "Bias Mitigation Method": "Synthetic Data (Women_black_50K)", "Balanced Accuracy": 0.80, "Statistical Parity Difference": -0.17, "Disparate Impact": 0.68}
]

# Compute bias changes
df_results = compute_bias_changes(data)
df_results

Unnamed: 0,Classifier,Bias Mitigation Method,Balanced Accuracy Change (%),Statistical Parity Difference Change (%),Disparate Impact Change (%)
0,Gradient Boosting,Reweighing,-4.054054,76.315789,70.833333
1,Gradient Boosting,DIR,-2.702703,78.947368,59.722222
2,Gradient Boosting,LFR,-8.108108,97.368421,95.833333
3,Gradient Boosting,OptimPreproc,-4.054054,50.0,43.055556
4,Gradient Boosting,Synthetic Data (Women_50K),12.162162,71.052632,62.5
5,Gradient Boosting,Synthetic Data (Women_neverMarried_50K),9.459459,57.894737,45.833333
6,Gradient Boosting,Synthetic Data (Women_black_50K),8.108108,55.263158,55.555556


In [None]:
#tobedone for medical expenditure