<a href="https://colab.research.google.com/github/Ben-Najafloo/Fairness-evaluation/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install aif360




In [9]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from aif360.datasets import StandardDataset
from aif360.metrics import ClassificationMetric


def assess_dataset_fairness(data, sensitive_attributes, label_column):
    """
    Evaluate dataset fairness for multiple sensitive attributes.
    """
    fairness_results = {}
    for attr in sensitive_attributes:
        print(f"\n=== Dataset Fairness Metrics for Sensitive Attribute: {attr} ===")
        privileged_groups = [{attr: 1}]
        unprivileged_groups = [{attr: 0}]

        dataset = StandardDataset(
            data, label_name=label_column, favorable_classes=[1],
            protected_attribute_names=[attr],
            privileged_classes=[[1]]
        )

        metric = ClassificationMetric(
            dataset,
            dataset,  # Assuming you want to compare the dataset to itself
            unprivileged_groups=unprivileged_groups,
            privileged_groups=privileged_groups,
        )

        # Updated method calls to calculate fairness metrics
        fairness_metrics = {
            # statistical_parity_difference() has been replaced by mean_difference()
            "Statistical Parity Difference": metric.statistical_parity_difference(),
            "Disparate Impact": metric.disparate_impact(),  # This remains the same
        }

        # Print and store metrics
        for metric_name, value in fairness_metrics.items():
            print(f"{metric_name}: {value:.4f}")
        fairness_results[attr] = fairness_metrics

    return fairness_results


def train_and_evaluate_fairness(data, sensitive_attributes, label_column):
    """
    Train a model and evaluate fairness for multiple sensitive attributes.
    """
    model_results = {}
    for attr in sensitive_attributes:
        print(f"\n=== Model Fairness Metrics for Sensitive Attribute: {attr} ===")
        try:
            # Split dataset into train and test sets
            train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

            # Preprocess train and test datasets
            dataset_train, privileged_groups, unprivileged_groups = preprocess_data(train_data, attr, label_column)
            dataset_test, _, _ = preprocess_data(test_data, attr, label_column)

            # Train logistic regression model
            X_train = pd.DataFrame(dataset_train.features, columns=dataset_train.feature_names)
            y_train = dataset_train.labels.ravel()
            X_test = pd.DataFrame(dataset_test.features, columns=dataset_test.feature_names)
            y_test = dataset_test.labels.ravel()
            model = LogisticRegression(solver='liblinear').fit(X_train, y_train)

            # Predict on test set
            predictions = model.predict(X_test)
            dataset_test_pred = dataset_test.copy()
            dataset_test_pred.labels = predictions

            # Compute classification fairness metrics
            classification_metric = ClassificationMetric(
                dataset_test, dataset_test_pred,
                unprivileged_groups=unprivileged_groups,
                privileged_groups=privileged_groups
            )

            fairness_metrics = {
                "Accuracy": model.score(X_test, y_test),
                "Equal Opportunity Difference": classification_metric.equal_opportunity_difference(),
                "Average Odds Difference": classification_metric.average_odds_difference(),
                "Disparate Impact": classification_metric.disparate_impact(),
            }

            # Handle nan values in metrics
            for metric, value in fairness_metrics.items():
                if isinstance(value, float) and np.isnan(value):
                    fairness_metrics[metric] = 0.0  # Replace nan with 0.0

            # Print and store metrics
            for metric_name, value in fairness_metrics.items():
                print(f"{metric_name}: {value:.4f}")
            model_results[attr] = fairness_metrics

        except Exception as e:
            print(f"Error occurred for sensitive attribute '{attr}': {e}")
            model_results[attr] = {
                "Accuracy": 0.0,
                "Equal Opportunity Difference": 0.0,
                "Average Odds Difference": 0.0,
                "Disparate Impact": 0.0,
            }

    return model_results


def preprocess_data(data, sensitive_attribute, label_column):
    """
    Preprocess data for training and fairness evaluation.
    """
    privileged_groups = [{sensitive_attribute: 1}]
    unprivileged_groups = [{sensitive_attribute: 0}]
    dataset = StandardDataset(
        data, label_name=label_column, favorable_classes=[1],
        protected_attribute_names=[sensitive_attribute],
        privileged_classes=[[1]]
    )
    return dataset, privileged_groups, unprivileged_groups


def main():
    """
    Main function to assess fairness for multiple sensitive attributes.
    """
    # Load dataset
    data = load_sample_data()  # Replace with your dataset loading logic
    sensitive_attributes = ["gender", "age"]  # List of sensitive attributes
    label_column = "label"  # Adjust based on your dataset

    # Step 1: Dataset fairness assessment
    dataset_fairness = assess_dataset_fairness(data, sensitive_attributes, label_column)

    # Step 2: Model fairness evaluation
    proceed = input("\nDo you want to evaluate fairness on the trained model? (yes/no): ").lower()
    if proceed == "yes":
        model_fairness = train_and_evaluate_fairness(data, sensitive_attributes, label_column)


# Replace `load_sample_data()` with actual dataset loading
def load_sample_data():
    """
    Placeholder for dataset loading logic.
    Replace this with your own data loading function.
    """
    data = pd.DataFrame({
        "gender": [1, 0, 1, 0, 1],
        "age": [1, 1, 0, 0, 1],
        "feature1": [2.5, 3.6, 1.2, 4.3, 3.1],
        "feature2": [1, 2, 1, 2, 1],
        "label": [1, 0, 1, 0, 1],
    })
    return data


if __name__ == "__main__":
    main()



=== Dataset Fairness Metrics for Sensitive Attribute: gender ===
Statistical Parity Difference: -1.0000
Disparate Impact: 0.0000

=== Dataset Fairness Metrics for Sensitive Attribute: age ===
Statistical Parity Difference: -0.1667
Disparate Impact: 0.7500

Do you want to evaluate fairness on the trained model? (yes/no): yes





=== Model Fairness Metrics for Sensitive Attribute: gender ===
Accuracy: 0.0000
Equal Opportunity Difference: 0.0000
Average Odds Difference: 0.0000
Disparate Impact: 0.0000

=== Model Fairness Metrics for Sensitive Attribute: age ===
Accuracy: 0.0000
Equal Opportunity Difference: 0.0000
Average Odds Difference: 0.0000
Disparate Impact: 0.0000


  TPR=TP / P, TNR=TN / N, FPR=FP / N, FNR=FN / P,
  GTPR=GTP / P, GTNR=GTN / N, GFPR=GFP / N, GFNR=GFN / P,
  return (self.num_pred_positives(privileged=privileged)
  TPR=TP / P, TNR=TN / N, FPR=FP / N, FNR=FN / P,
  GTPR=GTP / P, GTNR=GTN / N, GFPR=GFP / N, GFNR=GFN / P,
  return (self.num_pred_positives(privileged=privileged)


In [None]:
# # Dataset
    # data = pd.DataFrame({
    #     'age': [25, 45, 35, 50, 23, 40, 32, 47, 35, 50, 23, 40, 32, 47, 35, 50, 23, 40, 32, 47],
    #     'income': [50000, 80000, 62000, 72000, 52000, 68000, 59000, 77000, 80000, 62000, 72000, 52000, 80000, 62000, 72000, 52000, 80000, 62000, 72000, 52000],
    #     'gender': [0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1],  # 0: Female, 1: Male
    #     'loan_approved': [1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0]  # 1: Approved, 0: Not Approved
    # })
    # file_path = "dataset.csv"  # Replace with the path to your CSV file
    # data = pd.read_csv(file_path)

    # # Check the dataset structure
    # print(data.head())