### Bias & Fairness in Data: Bias Mitigation Techniques
**Question**: Use the Adult Income dataset and apply reweighing technique to balance the
class weights based on sensitive attributes (e.g., gender).

In [5]:
# write your code from here
# Import libraries
import pandas as pd
from aif360.datasets import AdultDataset
from aif360.algorithms.preprocessing import Reweighing
from aif360.metrics import BinaryLabelDatasetMetric

# Step 1: Load the Adult dataset
dataset = AdultDataset(protected_attribute_names=['sex'],  # 'sex' = gender
                       privileged_classes=[['Male']],
                       features_to_drop=[])

# Step 2: Split dataset into train and test (optional but common)
dataset_train, dataset_test = dataset.split([0.7], shuffle=True)

# Step 3: Compute fairness metrics before reweighing
metric_orig_train = BinaryLabelDatasetMetric(dataset_train,
                                             unprivileged_groups=[{'sex': 0}],
                                             privileged_groups=[{'sex': 1}])
print("### Before Reweighing ###")
print("Difference in mean outcomes between unprivileged and privileged groups:",
      metric_orig_train.mean_difference())

# Step 4: Apply Reweighing Technique
RW = Reweighing(unprivileged_groups=[{'sex': 0}],
                privileged_groups=[{'sex': 1}])
dataset_transf_train = RW.fit_transform(dataset_train)

# Step 5: Verify new weights
print("\n### Sample Weights After Reweighing ###")
print(dataset_transf_train.instance_weights[:10])

# Step 6: Fairness metrics after reweighing
metric_transf_train = BinaryLabelDatasetMetric(dataset_transf_train,
                                               unprivileged_groups=[{'sex': 0}],
                                               privileged_groups=[{'sex': 1}])
print("\n### After Reweighing ###")
print("Difference in mean outcomes between unprivileged and privileged groups:",
      metric_transf_train.mean_difference())


IOError: [Errno 2] No such file or directory: '/home/vscode/.local/lib/python3.10/site-packages/aif360/datasets/../data/raw/adult/adult.data'
To use this class, please download the following files:

	https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data
	https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.test
	https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names

and place them, as-is, in the folder:

	/home/vscode/.local/lib/python3.10/site-packages/aif360/data/raw/adult



SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing import Reweighing
from aif360.metrics import BinaryLabelDatasetMetric

# Step 1: Download the dataset from UCI
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
                'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
                'hours-per-week', 'native-country', 'income']

df = pd.read_csv(url, names=column_names, sep=',\s*', engine='python')

# Step 2: Preprocess
df = df.dropna()
df = df[df['income'].isin(['<=50K', '>50K'])]  # remove unknowns

# Encode categorical features
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    df[col] = LabelEncoder().fit_transform(df[col])

# Step 3: Convert to AIF360 BinaryLabelDataset
protected_attribute = 'sex'  # gender
label_name = 'income'
privileged_groups = [{'sex': 1}]   # 1 = Male after encoding
unprivileged_groups = [{'sex': 0}] # 0 = Female

dataset = BinaryLabelDataset(df=df,
                             label_names=[label_name],
                             protected_attribute_names=[protected_attribute])

# Step 4: Apply Reweighing
RW = Reweighing(unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups)
dataset_transf = RW.fit_transform(dataset)

# Step 5: Fairness metrics
metric_orig = BinaryLabelDatasetMetric(dataset, 
                                       unprivileged_groups=unprivileged_groups,
                                       privileged_groups=privileged_groups)

metric_transf = BinaryLabelDatasetMetric(dataset_transf, 
                                         unprivileged_groups=unprivileged_groups,
                                         privileged_groups=privileged_groups)

print("Mean difference before reweighing:", metric_orig.mean_difference())
print("Mean difference after reweighing:", metric_transf.mean_difference())


Mean difference before reweighing: -0.19627598779361352
Mean difference after reweighing: 2.7755575615628914e-17
