# Massaging to Improve Bias

### Imports and Creating the Dataframe

In [5]:
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import precision_score, accuracy_score, classification_report, recall_score, f1_score
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult

# Import files for Fairness Metrics
from individual_fairness import eval_ind_fairness 
from disparate_impact import eval_disparate_impact
from counterfactual_fairness import evaluate_counterfactual_fairness_sex
from equality_fairness import eval_equality
from group_fairness import eval_group_fairness

privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]
dataset_orig = load_preproc_data_adult(['sex'])
np.random.seed(42)

# Convert the Dataset into a Dataframe for easier massaging
df_original = pd.DataFrame(columns=dataset_orig.feature_names, data=dataset_orig.features)
df_original['Income Binary'] = dataset_orig.labels


  df['sex'] = df['sex'].replace({'Female': 0.0, 'Male': 1.0})


### Helper Functions

In [6]:
def eval_performance(y_test, y_pred):
    # Evaluate performance
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Display metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print("\nClassification Report:\n\n", classification_report(y_test, y_pred))

## Masaging to Remove Bias

In [7]:
def compute_discrimination(df, sensitive_attr, class_attr, privileged_value, positive_class):
    # Compute the discrimination score (difference in positive outcome rates).
    privileged = df[df[sensitive_attr] == privileged_value]
    unprivileged = df[df[sensitive_attr] != privileged_value]

    pos_rate_privileged = sum(privileged[class_attr] == positive_class) / len(privileged)
    pos_rate_unprivileged = sum(unprivileged[class_attr] == positive_class) / len(unprivileged)

    return pos_rate_privileged - pos_rate_unprivileged

def compute_m(df, sensitive_attr, class_attr, privileged_value, positive_class):
    # Compute number of instances M to relabel.
    disc = compute_discrimination(df, sensitive_attr, class_attr, privileged_value, positive_class)
    
    n_privileged = len(df[df[sensitive_attr] == privileged_value])
    n_unprivileged = len(df[df[sensitive_attr] != privileged_value])
    
    return int(disc * (n_privileged * n_unprivileged) / len(df))

def rank_instances(df, features, sensitive_attr, class_attr):
    # Train a classifier to rank instances by likelihood of being positive.
    X = df[features]
    y = df[class_attr]

    model = DecisionTreeClassifier()
    model.fit(X, y)
    
    scores = model.predict_proba(X)[:, 1]  # Probability of positive class
    df['score'] = scores
    return df

def apply_massaging(df, sensitive_attr, class_attr, privileged_value, positive_class):
    # Perform massaging technique.
    # Step 1: Compute M
    M = compute_m(df, sensitive_attr, class_attr, privileged_value, positive_class)
    print(f"Number of label changes (M): {M}")

    if M == 0:
        print("No massaging needed.")
        return df

    # Step 2: Rank instances
    features = [col for col in df.columns if col not in [sensitive_attr, class_attr]]
    df = rank_instances(df, features, sensitive_attr, class_attr)

    # Step 3: Modify labels
    unprivileged_neg = df[(df[sensitive_attr] != privileged_value) & (df[class_attr] != positive_class)]
    privileged_pos = df[(df[sensitive_attr] == privileged_value) & (df[class_attr] == positive_class)]

    # Promote top M from unprivileged_neg
    df.loc[unprivileged_neg.nlargest(M, 'score').index, class_attr] = positive_class

    # Demote bottom M from privileged_pos
    df.loc[privileged_pos.nsmallest(M, 'score').index, class_attr] = 1 - positive_class

    # Drop the ranking column
    df.drop(columns=['score'], inplace=True)

    return df


#### Baseline Model

In [8]:
scale_orig = StandardScaler()
lmod = LogisticRegression()

x_orig = df_original.drop(columns=['Income Binary'])
y_orig = df_original['Income Binary']

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x_orig, y_orig, test_size=0.2, random_state=42, stratify=y_orig)

# Scale numerical features for training
x_train = scale_orig.fit_transform(x_train)

# Train the model
lmod.fit(x_train, y_train)

# Make predictions
x_test_scaled = scale_orig.fit_transform(x_test)
y_pred = lmod.predict(x_test_scaled)

# Evaluate performance of baseline model
eval_performance(y_test, y_pred)

# ---- Fairness Assessment ----

print("Fairness Metrics:\n")

# Evaluate Individual Fairness
ind_fairness_orig = eval_ind_fairness(x_train, y_train, x_test_scaled, y_pred)
print(f'1. Individual Fairness For Baseline: {ind_fairness_orig:.4f}')

# Evaluate Disparate Impact
disparate_impact_orig = eval_disparate_impact(x_test['sex'], y_pred)
print(f'2. Disparate Impact Score for Baseline: {disparate_impact_orig:.4f}')

# Evaluate Counterfactual Fairness
counterfactual_fairness_orig = evaluate_counterfactual_fairness_sex(lmod, x_test)
fairness_metric_sex = counterfactual_fairness_orig['same_decision'].mean()
print(f'3. Counterfactual Fairness Score For Baseline: {fairness_metric_sex:.4f}')

# Evaluate Equality of Opportunity and Equality of Odds
tpr_diff_orig, eod_orig = eval_equality(x_test_scaled, y_pred, sensitive_attribute_index = 1, y_test=y_test) # Column 1 holds the 'sex' attribute in x_test_scaled dataframe
print(f"4. Equality of Opportunity (EO) for Baseline: {tpr_diff_orig:.4f}")
print(f"5. Equality of Odds (EOd) for Baseline: {eod_orig:.4f}")

# Evaluate Group-Level Fairness Metrics
group_fairness_orig = eval_group_fairness(x_test, target='Income Binary', protected_attr='sex', mode='model', y_pred=y_pred)
print("6. Group-Level Fairness Metrics After Massaging:")
for metric, value in group_fairness_orig.items():
    print(f"\t{metric}: {value:.4f}")

Accuracy: 0.8043
Precision: 0.6587
Recall: 0.3781
F1 Score: 0.4804

Classification Report:

               precision    recall  f1-score   support

         0.0       0.83      0.94      0.88      7431
         1.0       0.66      0.38      0.48      2338

    accuracy                           0.80      9769
   macro avg       0.74      0.66      0.68      9769
weighted avg       0.79      0.80      0.78      9769

Fairness Metrics:

1. Individual Fairness For Baseline: 0.7540
2. Disparate Impact Score for Baseline: 0.0000
3. Counterfactual Fairness Score For Baseline: 0.8063
4. Equality of Opportunity (EO) for Baseline: 0.4471
5. Equality of Odds (EOd) for Baseline: 0.5482
6. Group-Level Fairness Metrics After Massaging:
	Statistical Parity Difference: -0.2061
	Disparate Impact: 0.0000
	Demographic Parity: -0.2061


#### Model Performance After Massaging

In [9]:
# Apply massaging technique
df_massaged = apply_massaging(df_original, 'sex', 'Income Binary', privileged_value=1, positive_class=1)

# Repeat training and testing with massaged data
x_massaged = df_massaged.drop(columns=['Income Binary'])
y_massaged = df_massaged['Income Binary']

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x_massaged, y_massaged, test_size=0.2, random_state=42, stratify=y_massaged)

# Scale numerical features for training
x_train = scale_orig.fit_transform(x_train)

# Train the model
lmod.fit(x_train, y_train)

# Make predictions
x_test_scaled = scale_orig.fit_transform(x_test)
y_pred = lmod.predict(x_test_scaled)

# Evaluate performance of baseline model
eval_performance(y_test, y_pred)

# ---- Fairness Assessment ----

print("Fairness Metrics:\n")

# Evaluate Individual Fairness
ind_fairness_massaged = eval_ind_fairness(x_train, y_train, x_test_scaled, y_pred)
print(f'1. Individual Fairness Ater Massaging: {ind_fairness_massaged:.4f}')

# Evaluate Disparate Impact
disparate_impact_massaged = eval_disparate_impact(x_test['sex'], y_pred) # x_test[:,1] is the column for the 'sex' attribute
print(f'2. Disparate Impact Score After Massaging: {disparate_impact_massaged:.4f}')

# Evaluate Counterfactual Fairness
counterfactual_fairness_massaged = evaluate_counterfactual_fairness_sex(lmod, x_test)
fairness_metric_sex = counterfactual_fairness_massaged['same_decision'].mean()
print(f'3. Counterfactual Fairness Score After Massaging: {fairness_metric_sex:.4f}')

# Evaluate Equality of Opportunity and Equality of Odds
tpr_diff_massaged, eod_orig_massaged = eval_equality(x_test_scaled, y_pred, sensitive_attribute_index = 1, y_test=y_test) # Column 1 holds the 'sex' attribute in x_test_scaled dataframe
print(f"4. Equality of Opportunity (EO) After Massaging: {tpr_diff_massaged:.4f}")
print(f"5. Equality of Odds (EOd) After Massaging: {eod_orig_massaged:.4f}")

# Evaluate Group-Level Fairness Metrics
group_fairness_massaged = eval_group_fairness(x_test, target='Income Binary', protected_attr='sex', mode='model', y_pred=y_pred)
print("6. Group-Level Fairness Metrics After Massaging:")
for metric, value in group_fairness_massaged.items():
    print(f"\t{metric}: {value:.4f}")

Number of label changes (M): 2105


Accuracy: 0.8634
Precision: 0.7290
Recall: 0.6835
F1 Score: 0.7055

Classification Report:

               precision    recall  f1-score   support

         0.0       0.90      0.92      0.91      7431
         1.0       0.73      0.68      0.71      2338

    accuracy                           0.86      9769
   macro avg       0.82      0.80      0.81      9769
weighted avg       0.86      0.86      0.86      9769

Fairness Metrics:

1. Individual Fairness Ater Massaging: 0.8422
2. Disparate Impact Score After Massaging: 0.8236
3. Counterfactual Fairness Score After Massaging: 0.9818
4. Equality of Opportunity (EO) After Massaging: 0.1581
5. Equality of Odds (EOd) After Massaging: 0.2686
6. Group-Level Fairness Metrics After Massaging:
	Statistical Parity Difference: -0.0421
	Disparate Impact: 0.8236
	Demographic Parity: -0.0421


## Comments

This version of the massaging technique uses the same preprocessing as adult_reweighing.ipynb (from AIF360) for the baseline model. The baseline model's performance is nearly identical to that found in adult_reweighing.ipynb. 

## Results

Massaging the data improved the model's predictive performance. This could be because the baseline model relies to heavily on the "sex" attribute. The model's individual fairness increased significantly after massaging. The disparate impact and counterfactual fairness of the model increased with massaging. The Equality of Opportuniy and Equality of Odds are lowered after massaging, indicating the model is fair in predicting both positive and negative outcomes for both sexes. For group-level fairness metrics, the statistical parity difference and demographic parity has decreased. 