# Massaging to Improve Bias

### Imports and Creating the Dataframe

In [None]:
import pandas as pd
import os
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import precision_score, accuracy_score, classification_report, recall_score, f1_score

# Import files for Fairness Metrics
from individual_fairness import eval_ind_fairness 

# Load the CSV file into a DataFrame
file_path = os.path.join("..", "data", "income", "adult.data") # Replace with your actual file path
df = pd.read_csv(file_path)

# Define column names
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 
                'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 
                'hours-per-week', 'native-country', 'income']

# Assign column names to the DataFrame
df.columns = column_names

# Divide the features into numerical and non-numerical lists
# Extract numerical and string features
num_features = df.select_dtypes(include=['number']).columns.tolist()
cat_features = df.select_dtypes(include=['object', 'string']).columns.tolist()

print(f"Numerical Features: {num_features}")
print(f"Categorical Features: {cat_features}")

Numerical Features: ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']
Categorical Features: ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country', 'income']


### Encoding Categorical Features

In [None]:
# Convert income column to binary, flag errors
def convert_income(value):
    value = str(value).strip()
    if value == '>50K':
        return 1
    elif value == '<=50K':
        return 0
    else:
        return np.nan  # Flag invalid values as NaN (or set a custom error flag)

df['income'] = df['income'].apply(convert_income)

# Encode the sex based on privileged/underprivileged
def encode_sex(value):
    value = str(value).strip()
    if value == "Male":
        return 1
    else:
        return 0  

df['sex'] = df['sex'].apply(encode_sex)

# Identify and display rows with errors
error_rows = df[df['income'].isna()]
if not error_rows.empty:
    print("Invalid income values found in ", error_rows.size, "rows: ")
    print(error_rows)

cat_features.remove('sex')
cat_features.remove('income')

# Initialize OneHotEncoder for remaining categorical features
encoder = OneHotEncoder(sparse_output=False, drop='first')  # drop='first' for avoiding multicollinearity

# Fit and transform
encoded_array = encoder.fit_transform(df[cat_features])

# Convert to DataFrame
encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(cat_features))

# Concatenate with original DataFrame (excluding original categorical columns)
df = pd.concat([df.drop(columns=cat_features), encoded_df], axis=1)


### Helper Functions

In [3]:
def eval_performance(y_test, y_pred):
    # Evaluate performance
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    # Display metrics
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1 Score: {f1:.4f}')
    print("\nClassification Report:\n", classification_report(y_test, y_pred))

## Masaging to Remove Bias

In [4]:
def compute_discrimination(df, sensitive_attr, class_attr, privileged_value, positive_class):
    # Compute the discrimination score (difference in positive outcome rates).
    privileged = df[df[sensitive_attr] == privileged_value]
    unprivileged = df[df[sensitive_attr] != privileged_value]

    pos_rate_privileged = sum(privileged[class_attr] == positive_class) / len(privileged)
    pos_rate_unprivileged = sum(unprivileged[class_attr] == positive_class) / len(unprivileged)

    return pos_rate_unprivileged - pos_rate_privileged

def compute_m(df, sensitive_attr, class_attr, privileged_value, positive_class):
    # Compute number of instances M to relabel.
    disc = compute_discrimination(df, sensitive_attr, class_attr, privileged_value, positive_class)
    
    n_privileged = len(df[df[sensitive_attr] == privileged_value])
    n_unprivileged = len(df[df[sensitive_attr] != privileged_value])
    
    return int(abs(disc) * (n_privileged * n_unprivileged) / len(df))

def rank_instances(df, features, sensitive_attr, class_attr):
    # Train a classifier to rank instances by likelihood of being positive.
    X = df[features]
    y = df[class_attr]

    model = DecisionTreeClassifier()
    model.fit(X, y)
    
    scores = model.predict_proba(X)[:, 1]  # Probability of positive class
    df['score'] = scores
    return df

def apply_massaging(df, sensitive_attr, class_attr, privileged_value, positive_class):
    # Perform massaging technique.
    # Step 1: Compute M
    M = compute_m(df, sensitive_attr, class_attr, privileged_value, positive_class)
    print(f"Number of label changes (M): {M}")

    if M == 0:
        print("No massaging needed.")
        return df

    # Step 2: Rank instances
    features = [col for col in df.columns if col not in [sensitive_attr, class_attr]]
    df = rank_instances(df, features, sensitive_attr, class_attr)

    # Step 3: Modify labels
    unprivileged_neg = df[(df[sensitive_attr] != privileged_value) & (df[class_attr] != positive_class)]
    privileged_pos = df[(df[sensitive_attr] == privileged_value) & (df[class_attr] == positive_class)]

    # Promote top M from unprivileged_neg
    df.loc[unprivileged_neg.nlargest(M, 'score').index, class_attr] = positive_class

    # Demote bottom M from privileged_pos
    df.loc[privileged_pos.nsmallest(M, 'score').index, class_attr] = 1 - positive_class

    # Drop the ranking column
    df.drop(columns=['score'], inplace=True)

    return df


#### Baseline Model

In [5]:
X = df.drop(columns=['income'])
y = df['income']

# Create a pipeline with logistic regression
model = Pipeline([
    ('preprocessor', StandardScaler()),
    ('classifier', LogisticRegression(max_iter=1000))
])

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate performance of baseline model
eval_performance(y_test, y_pred)

# Evaluate Individual Fairness
ind_fairness = eval_ind_fairness(X_train, y_train, X_test, y_pred)
print(f'Individual Fairness For Baseline: {ind_fairness:.4f}')

Accuracy: 0.8618
Precision: 0.7634
Recall: 0.6173
F1 Score: 0.6827

Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.94      0.91      4944
           1       0.76      0.62      0.68      1568

    accuracy                           0.86      6512
   macro avg       0.82      0.78      0.80      6512
weighted avg       0.86      0.86      0.86      6512

Individual Fairness For Baseline: 0.8263


#### Model Performance After Massaging

In [6]:
# Apply massaging technique
df_massaged = apply_massaging(df, 'sex', 'income', privileged_value=1, positive_class=1)

X_massaged = df_massaged.drop(columns=['income'])
y_massaged = df_massaged['income']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_massaged, y_massaged, test_size=0.2, random_state=42, stratify=y)

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate performance of baseline model
eval_performance(y_test, y_pred)

# Evaluate Individual Fairness
ind_fairness_massaged = eval_ind_fairness(X_train, y_train, X_test, y_pred)
print(f'Individual Fairness Ater Massaging: {ind_fairness_massaged:.4f}')

Number of label changes (M): 1414
Accuracy: 0.8019
Precision: 0.6695
Recall: 0.3501
F1 Score: 0.4598

Classification Report:
               precision    recall  f1-score   support

           0       0.82      0.95      0.88      4944
           1       0.67      0.35      0.46      1568

    accuracy                           0.80      6512
   macro avg       0.75      0.65      0.67      6512
weighted avg       0.78      0.80      0.78      6512

Individual Fairness Ater Massaging: 0.7974
