In [1]:
# Step 1: Load and Preprocess Data
import pandas as pd
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric

# Load COMPAS data (filtered for relevant columns)
df = pd.read_csv("compas-scores-two-years.csv")
df = df[["race", "decile_score", "two_year_recid"]]

# Convert to AIF360 format
dataset = BinaryLabelDataset(
    favorable_label=0,  # Lower recidivism
    unfavorable_label=1,
    df=df,
    label_names=["two_year_recid"],
    protected_attribute_names=["race"]
)

ModuleNotFoundError: No module named 'aif360'

In [3]:
# First, install the aif360 package
!pip install aif360

# Step 1: Load and Preprocess Data
import pandas as pd
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import BinaryLabelDatasetMetric

# Load COMPAS data (filtered for relevant columns)
df = pd.read_csv("compas-scores-two-years.csv")
df = df[["race", "decile_score", "two_year_recid"]]

# Convert to AIF360 format
dataset = BinaryLabelDataset(
    favorable_label=0,  # Lower recidivism
    unfavorable_label=1,
    df=df,
    label_names=["two_year_recid"],
    protected_attribute_names=["race"]
)

Defaulting to user installation because normal site-packages is not writeable
Looking in links: /usr/share/pip-wheels
Collecting aif360
  Obtaining dependency information for aif360 from https://files.pythonhosted.org/packages/61/7b/7e4fa9e7b6f62759663db3b5aaa12a6cc9ef866223e5978c25844bceb762/aif360-0.6.1-py3-none-any.whl.metadata
  Downloading aif360-0.6.1-py3-none-any.whl.metadata (5.0 kB)
Downloading aif360-0.6.1-py3-none-any.whl (259 kB)
[2K   [38;5;70m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.7/259.7 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: aif360
Successfully installed aif360-0.6.1


ModuleNotFoundError: No module named 'aif360'

In [None]:
# Step 2: Compute Bias Metrics
from aif360.metrics import ClassificationMetric

# Split data by race (Black vs. White)
black = dataset.subset([df["race"] == "African-American"])
white = dataset.subset([df["race"] == "Caucasian"])

# Calculate fairness metrics
metric = ClassificationMetric(
    dataset, white,
    unprivileged_groups=[{"race": 0}],  # African-American
    privileged_groups=[{"race": 1}]     # Caucasian
)

print(f"Disparate Impact: {metric.disparate_impact()}")
print(f"False Positive Rate Difference: {metric.false_positive_rate_difference()}")
print(f"Statistical Parity Difference: {metric.statistical_parity_difference()}")

# Expected Output
# Disparate Impact < 0.8 (bias against Black defendants)

# Higher false positive rates for Black defendants.

In [None]:
# Step 3: Visualize Disparities
import matplotlib.pyplot as plt
import seaborn as sns

# Plot false positive rates by race
fp_rates = {
    "Black": metric.false_positive_rate(),
    "White": metric.false_positive_rate(privileged=True)
}

sns.barplot(x=list(fp_rates.keys()), y=list(fp_rates.values()))
plt.title("False Positive Rates by Race")
plt.ylabel("False Positive Rate")
plt.show()

# Visualization: https://via.placeholder.com/400x200?text=Black+defendants+have+higher+false+positives

In [None]:
# Step 4: Mitigate Bias (Optional)
# Use reweighting or rejection option classification:
 
from aif360.algorithms.preprocessing import Reweighing

# Apply reweighting to balance labels
RW = Reweighing(unprivileged_groups=[{"race": 0}], privileged_groups=[{"race": 1}])
dataset_transf = RW.fit_transform(dataset)
