# Massaging to Improve Bias

### Imports and Creating the Dataframe

In [69]:
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, accuracy_score, classification_report, recall_score, f1_score
from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions\
        import load_preproc_data_adult

# Import files for Fairness Metrics
from individual_fairness import eval_ind_fairness 

privileged_groups = [{'sex': 1}]
unprivileged_groups = [{'sex': 0}]
dataset_orig = load_preproc_data_adult(['sex'])
np.random.seed(1)

# Convert the Dataset into a Dataframe for easier massaging
df_original = pd.DataFrame(columns=dataset_orig.feature_names, data=dataset_orig.features)
df_original['Income Binary'] = dataset_orig.labels


  df['sex'] = df['sex'].replace({'Female': 0.0, 'Male': 1.0})


### Helper Functions

In [70]:
def eval_performance(y_test, y_pred):
    # Evaluate performance
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Display metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

## Masaging to Remove Bias

In [71]:
def compute_discrimination(df, sensitive_attr, class_attr, privileged_value, positive_class):
    # Compute the discrimination score (difference in positive outcome rates).
    privileged = df[df[sensitive_attr] == privileged_value]
    unprivileged = df[df[sensitive_attr] != privileged_value]

    pos_rate_privileged = sum(privileged[class_attr] == positive_class) / len(privileged)
    pos_rate_unprivileged = sum(unprivileged[class_attr] == positive_class) / len(unprivileged)

    return pos_rate_unprivileged - pos_rate_privileged

def compute_m(df, sensitive_attr, class_attr, privileged_value, positive_class):
    # Compute number of instances M to relabel.
    disc = compute_discrimination(df, sensitive_attr, class_attr, privileged_value, positive_class)
    
    n_privileged = len(df[df[sensitive_attr] == privileged_value])
    n_unprivileged = len(df[df[sensitive_attr] != privileged_value])
    
    return int(abs(disc) * (n_privileged * n_unprivileged) / len(df))

def rank_instances(df, features, sensitive_attr, class_attr):
    # Train a classifier to rank instances by likelihood of being positive.
    X = df[features]
    y = df[class_attr]

    model = DecisionTreeClassifier()
    model.fit(X, y)
    
    scores = model.predict_proba(X)[:, 1]  # Probability of positive class
    df['score'] = scores
    return df

def apply_massaging(df, sensitive_attr, class_attr, privileged_value, positive_class):
    # Perform massaging technique.
    # Step 1: Compute M
    M = compute_m(df, sensitive_attr, class_attr, privileged_value, positive_class)
    print(f"Number of label changes (M): {M}")

    if M == 0:
        print("No massaging needed.")
        return df

    # Step 2: Rank instances
    features = [col for col in df.columns if col not in [sensitive_attr, class_attr]]
    df = rank_instances(df, features, sensitive_attr, class_attr)

    # Step 3: Modify labels
    unprivileged_neg = df[(df[sensitive_attr] != privileged_value) & (df[class_attr] != positive_class)]
    privileged_pos = df[(df[sensitive_attr] == privileged_value) & (df[class_attr] == positive_class)]

    # Promote top M from unprivileged_neg
    df.loc[unprivileged_neg.nlargest(M, 'score').index, class_attr] = positive_class

    # Demote bottom M from privileged_pos
    df.loc[privileged_pos.nsmallest(M, 'score').index, class_attr] = 1 - positive_class

    # Drop the ranking column
    df.drop(columns=['score'], inplace=True)

    return df


#### Baseline Model

In [None]:
scale_orig = StandardScaler()
lmod = LogisticRegression()

x_orig = df_original.drop(columns=['Income Binary'])
y_orig = df_original['Income Binary']

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x_orig, y_orig, test_size=0.2, random_state=42, stratify=y_orig)

# Scale numerical features for training
x_train = scale_orig.fit_transform(x_train)

# Train the model
lmod.fit(x_train, y_train)

# Make predictions
x_test = scale_orig.fit_transform(x_test)
y_pred = lmod.predict(x_test)

# Evaluate performance of baseline model
eval_performance(y_test, y_pred)

# Evaluate Individual Fairness
#ind_fairness = eval_ind_fairness(x_train, y_train, x_test, y_pred)
#print(f'Individual Fairness For Baseline: {ind_fairness:.4f}')

NameError: name 'df_orig' is not defined

#### Model Performance After Massaging

In [None]:
# Apply massaging technique
df_massaged = apply_massaging(df_original, 'sex', 'Income Binary', privileged_value=1, positive_class=1)

# Repeat training and testing with massaged data
x_massaged = df_massaged.drop(columns=['Income Binary'])
y_massaged = df_massaged['Income Binary']

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x_massaged, y_massaged, test_size=0.2, random_state=42, stratify=y_massaged)

# Scale numerical features for training
x_train = scale_orig.fit_transform(x_train)

# Train the model
lmod.fit(x_train, y_train)

# Make predictions
x_test = scale_orig.fit_transform(x_test)
y_pred = lmod.predict(x_test)

# Evaluate performance of baseline model
eval_performance(y_test, y_pred)

# Evaluate Individual Fairness
# ind_fairness_massaged = eval_ind_fairness(X_train, y_train, X_test, y_pred)
# print(f'Individual Fairness Ater Massaging: {ind_fairness_massaged:.4f}')

Number of label changes (M): 2105
Accuracy: 0.8634
Precision: 0.7290
Recall: 0.6835
F1 Score: 0.7055

Classification Report:
               precision    recall  f1-score   support

         0.0       0.90      0.92      0.91      7431
         1.0       0.73      0.68      0.71      2338

    accuracy                           0.86      9769
   macro avg       0.82      0.80      0.81      9769
weighted avg       0.86      0.86      0.86      9769

