### Bias & Fairness in Data: Bias Mitigation Techniques
**Question**: Use the Adult Income dataset and apply reweighing technique to balance the
class weights based on sensitive attributes (e.g., gender).

In [1]:
# write your code from here
!pip install pandas seaborn aif360 scikit-learn numpy


Defaulting to user installation because normal site-packages is not writeable
Collecting aif360
  Downloading aif360-0.6.1-py3-none-any.whl (259 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m259.7/259.7 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Installing collected packages: aif360
Successfully installed aif360-0.6.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
import pandas as pd
import numpy as np
from aif360.datasets import BinaryLabelDataset
from aif360.algorithms.preprocessing import Reweighing
from sklearn.preprocessing import LabelEncoder

# Step 1: Load and clean Adult dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
columns = [
    "age", "workclass", "fnlwgt", "education", "education-num", "marital-status",
    "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss",
    "hours-per-week", "native-country", "income"
]
df = pd.read_csv(url, names=columns, na_values=" ?", skipinitialspace=True)
df.dropna(inplace=True)

# Step 2: Encode categorical columns
df['sex'] = LabelEncoder().fit_transform(df['sex'])  # 0: Female, 1: Male
df['income'] = LabelEncoder().fit_transform(df['income'])  # 0: <=50K, 1: >50K

# Step 3: Convert to AIF360 BinaryLabelDataset
protected_attribute = 'sex'  # Gender
privileged_groups = [{'sex': 1}]  # Male
unprivileged_groups = [{'sex': 0}]  # Female

dataset = BinaryLabelDataset(
    favorable_label=1,
    unfavorable_label=0,
    df=df,
    label_names=['income'],
    protected_attribute_names=['sex']
)

# Step 4: Apply reweighing
reweigher = Reweighing(
    privileged_groups=privileged_groups,
    unprivileged_groups=unprivileged_groups
)
dataset_transf = reweigher.fit_transform(dataset)

# Step 5: Show effect of weights
df['instance_weight'] = dataset_transf.instance_weights

print("\n📊 Mean instance weights by gender:")
print(df.groupby('sex')['instance_weight'].mean())

print("\n🔍 Sample of reweighed dataset:")
print(df[['sex', 'income', 'instance_weight']].sample(5))


pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[AdversarialDebiasing]'
pip install 'aif360[Reductions]'
pip install 'aif360[Reductions]'
pip install 'aif360[inFairness]'
pip install 'aif360[Reductions]'


ValueError: could not convert string to float: 'State-gov'


ValueError: DataFrame values must be numerical.