<a href="https://colab.research.google.com/github/Ava-00/Causal-Inference-and-Algorithmic-Fairness/blob/main/Causal_Model_Simulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Simulating model data with the following assumptions: 1. Men receive an additional point advantage as opposed to women when it comes to finding jobs
#Initializing gender factor and generating data
import numpy as np
import pandas as pd
from scipy.special import expit
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
def generate_data(n):
    gender = np.random.binomial(1, 0.5, n)
    test_scores = np.maximum(0, np.minimum(100, 2 * gender + 100 * np.random.rand(n)))
    X = pd.DataFrame({
        'Gender': gender,
        'Test_Score': test_scores,
    })
    return X
X = generate_data(10000)

beta_0 = -1.0
beta_gender = 1.0
beta_test_score = 2.0


logits = beta_0 + beta_gender * X['Gender'] + beta_test_score * X['Test_Score'] / 100.0
probabilities = 1 / (1 + np.exp(-logits))
admission = np.random.binomial(1, probabilities)
y = admission

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test) #threshold = 0.5
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")
fitted_beta_0 = model.intercept_[0]
fitted_beta_gender, fitted_beta_test_score = model.coef_[0]

print(f"Fitted Intercept (beta_0): {fitted_beta_0}")
print(f"Fitted Coefficient for Gender (beta_gender): {fitted_beta_gender}")
print(f"Fitted Coefficient for Test_Score (beta_test_score): {fitted_beta_test_score}")

Model Accuracy: 0.6685
Fitted Intercept (beta_0): -0.9016708377878011
Fitted Coefficient for Gender (beta_gender): 1.0102693376086902
Fitted Coefficient for Test_Score (beta_test_score): 0.018729908823899157


In [None]:
from scipy.integrate import quad
def gender_probabilities(s):
    return 0.5
#Defining equal opportunity model
def f_eo(testscore_new, model):
    temp_df = pd.DataFrame({'Gender': [0], 'Test_Score': [testscore_new]})

    probability = model.predict_proba(temp_df)[:, 1][0]

    def integrand(s):
        return probability * (gender_probabilities(s))  # Use probability directly

    result, error = quad(integrand, 0,1)
    return result

adjusted_probabilities = []
for i in range(len(X_test)):
    eo_testscore = X_test["Test_Score"].iloc[i]
    adjusted_prob = f_eo(eo_testscore, model)
    adjusted_probabilities.append(adjusted_prob)

adjusted_probabilities = np.array(adjusted_probabilities)
y_pred_new = (adjusted_probabilities > 0.5).astype(int) #initialize threshhold
accuracy_new = accuracy_score(y_test, y_pred_new)
print(f"Model Accuracy with EO Adjustment: {accuracy_new}")
y_pred_new

Model Accuracy with EO Adjustment: 0.383


array([0, 0, 0, ..., 0, 0, 0])

In [None]:
#Affirmative Action Abduction Step and Computation

from scipy.integrate import dblquad
from scipy.integrate import romberg
from sklearn.linear_model import LinearRegression

sensitive_attr = 'Gender'
non_sensitive_attr = 'Test_Score'

X_sensitive = X_test[[sensitive_attr]]
X_non_sensitive = X_test[[non_sensitive_attr]]

#abduction step
regressor = LinearRegression()
regressor.fit(X_sensitive, X_non_sensitive)

#compute residual scores
X_test['AA_testscores'] = X_test[non_sensitive_attr] - regressor.predict(X_sensitive).flatten()

def p_a_given_s(AA_testscores):
    return 1 / 100 if 0 <= AA_testscores <= 100 else 0

def f_aa(eo_testscore, AA_testscores, model):
    def integrand(a, s):
        temp_df = pd.DataFrame({'Gender': [s], 'Test_Score': [eo_testscore]})
        prob = model.predict_proba(temp_df)[:, 1][0]
        return (prob * p_a_given_s(AA_testscores) * gender_probabilities(s))

    result_aa, error = dblquad(integrand, 0, 1, lambda x: 0, lambda x: 1)  # Integrate over both a and s
    return result_aa

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

# Assuming f_aa is vectorized and can take an array of test scores
def f_aa_vectorized(test_scores, AA_testscores, model):
    adjusted_probs = []
    for i, testscore_new in enumerate(test_scores):  # Use enumerate to get the index
        # Call f_aa for each test score, but pass the current AA_testscore as a scalar using the index
        adjusted_prob = f_aa(testscore_new, AA_testscores[i], model)
        adjusted_probs.append(adjusted_prob)
    return np.array(adjusted_probs)

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

# Extracting test scores
eo_testscore = X_test["Test_Score"].values
AA_testscores = X_test["AA_testscores"].values # Extract AA_testscores

# Assuming new_testscores is computed earlier in your code
adjusted_probabilities_aa = f_aa_vectorized(eo_testscore, AA_testscores, model) # Pass AA_testscores to f_aa_vectorized

# Generate predictions based on the adjusted probabilities
y_pred_aa = (adjusted_probabilities_aa > 0.5).astype(int)

accuracy_aa = accuracy_score(y_test, y_pred_aa)
print(f"Model Accuracy with AA Adjustment: {accuracy_aa:.4f}")
y_pred_aa

Model Accuracy with AA Adjustment: 0.3830


array([0, 0, 0, ..., 0, 0, 0])