In [None]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from fairlearn.metrics import MetricFrame, selection_rate, true_positive_rate, true_negative_rate, false_positive_rate, false_negative_rate
from fairlearn.reductions import ExponentiatedGradient, DemographicParity, EqualizedOdds


In [None]:
# Dynamic patch for np.PINF in fairlearn
import fairlearn.reductions._exponentiated_gradient.exponentiated_gradient as eg
if hasattr(np, 'PINF'):
    np.PINF = np.inf  # Just in case, though it should already be removed
eg.np.PINF = np.inf

In [None]:
# Function to load and preprocess the data
def load_and_preprocess_data(uploaded_file):
    try:
        data = pd.read_csv(uploaded_file)
    except pd.errors.EmptyDataError:
        st.error("The uploaded file is empty or malformed.")
        return None

    # Display the columns of the uploaded file for debugging
    st.write("Columns in the uploaded file:", data.columns.tolist())
    
    return data

In [None]:
# Function to encode categorical columns and retain the mapping for the sensitive feature
def encode_columns(data, sensitive_feature_column):
    # Encode the entire dataframe except the sensitive feature
    data_encoded = data.copy()
    for column in data_encoded.columns:
        if data_encoded[column].dtype == object and column != sensitive_feature_column:
            data_encoded[column] = LabelEncoder().fit_transform(data_encoded[column])

    # Encode the sensitive feature separately and store the mapping
    le = LabelEncoder()
    data_encoded[sensitive_feature_column] = le.fit_transform(data_encoded[sensitive_feature_column])
    sensitive_feature_mapping = dict(zip(le.transform(le.classes_), le.classes_))

    return data_encoded, sensitive_feature_mapping

In [None]:
# Function to train the model and evaluate bias
def train_and_evaluate_bias(data, target_column, sensitive_feature_column):
    try:
        X = data.drop(target_column, axis=1)
        y = data[target_column]
        sensitive_feature = data[sensitive_feature_column]

        X_train, X_test, y_train, y_test, sensitive_train, sensitive_test = train_test_split(X, y, sensitive_feature, test_size=0.25, random_state=42)

        model = LogisticRegression(max_iter=1000)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

        # Define a dictionary of metrics
        metrics = {
            'selection_rate': selection_rate,
            'true_positive_rate': true_positive_rate,
            'true_negative_rate': true_negative_rate,
            'false_positive_rate': false_positive_rate,
            'false_negative_rate': false_negative_rate
        }

        # Evaluate the metrics
        metric_frame = MetricFrame(metrics=metrics, y_true=y_test, y_pred=y_pred, sensitive_features=sensitive_test)

        sr_by_group = metric_frame.by_group['selection_rate']
        disparate_impact = sr_by_group.min() / sr_by_group.max()
        bias_percentage = (1 - disparate_impact) * 100

        return metric_frame, disparate_impact, bias_percentage, X_train, X_test, y_train, y_test, sensitive_train, sensitive_test, model

    except Exception as e:
        st.error("Please select applicable Target Column and Sensitive Feature Column.")
        return None, None, None, None, None, None, None, None, None

In [None]:
# Function to apply fairness constraints and evaluate additional metrics
def apply_fairness_constraints_and_metrics(constraint_option, model, X_train, X_test, y_train, y_test, sensitive_train, sensitive_test):
    if constraint_option == "Disparate Impact Mitigation":
        constraint = DemographicParity()
    else:
        constraint = EqualizedOdds()

    mitigator = ExponentiatedGradient(model, constraint)
    mitigator.fit(X_train, y_train, sensitive_features=sensitive_train)

    y_pred_mitigated = mitigator.predict(X_test)
    accuracy_mitigated = accuracy_score(y_test, y_pred_mitigated)

    # Calculate additional metrics
    precision = precision_score(y_test, y_pred_mitigated)
    recall = recall_score(y_test, y_pred_mitigated)
    f1 = f1_score(y_test, y_pred_mitigated)

    # Define a dictionary of metrics for the mitigated model
    metrics_mitigated = {
        'selection_rate': selection_rate,
        'true_positive_rate': true_positive_rate,
        'true_negative_rate': true_negative_rate,
        'false_positive_rate': false_positive_rate,
        'false_negative_rate': false_negative_rate
    }

    metric_frame_mitigated = MetricFrame(metrics=metrics_mitigated, y_true=y_test, y_pred=y_pred_mitigated, sensitive_features=sensitive_test)

    sr_by_group_mitigated = metric_frame_mitigated.by_group['selection_rate']
    disparate_impact_mitigated = sr_by_group_mitigated.min() / sr_by_group_mitigated.max()
    bias_percentage_mitigated = (1 - disparate_impact_mitigated) * 100

    return accuracy_mitigated, metric_frame_mitigated, disparate_impact_mitigated, bias_percentage_mitigated, precision, recall, f1


In [None]:
# Function to display the results with original category names
def display_results(disparate_impact, bias_percentage, metric_frame, sensitive_feature_mapping, accuracy_mitigated=None, metric_frame_mitigated=None, disparate_impact_mitigated=None, bias_percentage_mitigated=None, precision=None, recall=None, f1=None):
    st.write(f"Disparate Impact: {disparate_impact:.2f}")
    st.write(f"Bias Percentage: {bias_percentage:.2f}%")
    
    st.write("Overall metrics:")
    st.write(metric_frame.overall)

    st.write("Metrics by group:")
    metrics_by_group = metric_frame.by_group.rename(index=sensitive_feature_mapping)
    st.write(metrics_by_group)

    if accuracy_mitigated is not None and metric_frame_mitigated is not None:
        st.write(f"Mitigated Model accuracy: {accuracy_mitigated:.2f}")
        st.write("Overall metrics for mitigated model:")
        st.write(metric_frame_mitigated.overall)

        st.write("Metrics by group for mitigated model:")
        metrics_by_group_mitigated = metric_frame_mitigated.by_group.rename(index=sensitive_feature_mapping)
        st.write(metrics_by_group_mitigated)

        st.write(f"Mitigated Disparate Impact: {disparate_impact_mitigated:.2f}")
        st.write(f"Mitigated Bias Percentage: {bias_percentage_mitigated:.2f}%")

        # Display additional metrics
        st.write("### Additional Metrics for Mitigated Model")
        st.write(f"Precision: {precision:.2f}")
        st.write(f"Recall: {recall:.2f}")
        st.write(f"F1 Score: {f1:.2f}")


In [None]:
# Main Streamlit app function
def main():
    st.title("Flexible Fairness Analysis Tool")

    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

    if uploaded_file is not None:
        # Load and preprocess the data
        data = load_and_preprocess_data(uploaded_file)

        if data is not None:
            st.write("### Dataset Overview")
            st.dataframe(data.head())

            # Let the user select the target and sensitive feature columns
            st.write("## Select Columns for Analysis")
            target_column = st.selectbox("Select the Target Column (e.g., income, credit_risk)", options=data.columns)
            sensitive_feature_column = st.selectbox("Select the Sensitive Feature Column (e.g., gender, race)", options=data.columns)

            # Encode the data and retain the mapping for the sensitive feature
            data_encoded, sensitive_feature_mapping = encode_columns(data, sensitive_feature_column)

            # Train and evaluate bias
            result = train_and_evaluate_bias(data_encoded, target_column=target_column, sensitive_feature_column=sensitive_feature_column)
            if result[0] is not None:
                metric_frame, disparate_impact, bias_percentage, X_train, X_test, y_train, y_test, sensitive_train, sensitive_test, model = result

                # Display initial results with original category names
                display_results(disparate_impact, bias_percentage, metric_frame, sensitive_feature_mapping)

                # User selects a fairness constraint method
                st.write("## Select Fairness Constraint for Further Analysis")
                constraint_option = st.selectbox("Select Fairness Constraint", ["Disparate Impact Mitigation", "Equalized Odds"])

                proceed = st.button("Proceed with Fairness Analysis")

                if proceed:
                    st.write("## Fairness-Constrained Model Analysis")

                    # Apply the selected fairness constraint and evaluate additional metrics
                    accuracy_mitigated, metric_frame_mitigated, disparate_impact_mitigated, bias_percentage_mitigated, precision, recall, f1 = apply_fairness_constraints_and_metrics(constraint_option, model, X_train, X_test, y_train, y_test, sensitive_train, sensitive_test)

                    # Display fairness-constrained results with original category names and additional metrics
                    display_results(disparate_impact, bias_percentage, metric_frame, sensitive_feature_mapping, accuracy_mitigated, metric_frame_mitigated, disparate_impact_mitigated, bias_percentage_mitigated, precision, recall, f1)

if __name__ == "__main__":
    main()