# 05: Gold Layer - Fairness and Bias Mitigation

This notebook demonstrates:
- Bias mitigation using AIF360
- Fairness metrics calculation
- Before/after comparison

In [None]:
import sys
sys.path.insert(0, '../src')

from pyspark.sql import SparkSession
from faircare.gold.biasmitigation import BiasMitigator
from faircare.gold.fairnessmetrics import FairnessMetrics
from faircare.metrics.layermetrics import GoldMetrics
import yaml
import matplotlib.pyplot as plt

In [None]:
spark = SparkSession.builder.appName("FAIR-CARE-Gold").getOrCreate()

with open('../configs/default.yaml', 'r') as f:
    config = yaml.safe_load(f)

dataset_config = config['datasets']['compas']

## Load Silver Data

In [None]:
silver_df = spark.read.format("delta").load(dataset_config['silver_path'])
print(f"Silver records: {silver_df.count()}")

## Fairness Metrics (Before Mitigation)

In [None]:
fairness_calculator = FairnessMetrics(dataset_config)
fairness_before = fairness_calculator.calculate(silver_df)

print("\nFairness Metrics (Before Mitigation):")
print(f"  Statistical Parity Difference: {fairness_before.get('statistical_parity_difference', 'N/A')}")
print(f"  Disparate Impact: {fairness_before.get('disparate_impact', 'N/A')}")

## Bias Mitigation

In [None]:
mitigator = BiasMitigator(dataset_config)
gold_df = mitigator.mitigate(silver_df, spark)

print(f"\nGold records: {gold_df.count()}")
if 'instance_weights' in gold_df.columns:
    print("Reweighing applied successfully")

## Fairness Metrics (After Mitigation)

In [None]:
fairness_after = fairness_calculator.calculate(gold_df)

print("\nFairness Metrics (After Mitigation):")
print(f"  Statistical Parity Difference: {fairness_after.get('statistical_parity_difference', 'N/A')}")
print(f"  Disparate Impact: {fairness_after.get('disparate_impact', 'N/A')}")

## Comparison

In [None]:
import numpy as np

metrics = ['SPD', 'DI']
before = [
    fairness_before.get('statistical_parity_difference', 0),
    fairness_before.get('disparate_impact', 0)
]
after = [
    fairness_after.get('statistical_parity_difference', 0),
    fairness_after.get('disparate_impact', 0)
]

x = np.arange(len(metrics))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
ax.bar(x - width/2, before, width, label='Before Mitigation', color='coral')
ax.bar(x + width/2, after, width, label='After Mitigation', color='lightgreen')

ax.set_ylabel('Metric Value')
ax.set_title('Fairness Metrics: Before vs After Bias Mitigation')
ax.set_xticks(x)
ax.set_xticklabels(metrics)
ax.legend()
ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Calculate Gold Score

In [None]:
gold_metrics = GoldMetrics()
sg = gold_metrics.calculate({
    'statistical_parity_difference': fairness_after.get('statistical_parity_difference'),
    'utility_retention': 0.85
})

print(f"\nGold Score (SG): {sg:.3f}")

## Summary

Gold layer complete:
- ✅ Bias mitigated using Reweighing
- ✅ Fairness metrics improved
- ✅ Gold Score calculated

**Next**: Proceed to notebook 06 for FAIR-CARE Score.

In [None]:
spark.stop()