<a href="https://colab.research.google.com/github/Ava-00/Causal-Inference-and-Algorithmic-Fairness/blob/main/Causal_Model_Simulation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#Simulating model data with the following assumptions: 1. Men receive an additional point advantage as opposed to women when it comes to finding jobs
#Initializing gender factor and generating data
import numpy as np
import pandas as pd
from scipy.special import expit
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
def generate_data(n):
    gender = np.random.binomial(1, 0.5, n)
    test_scores = np.maximum(0, np.minimum(100, 2 * gender + 100 * np.random.rand(n)))
    X = pd.DataFrame({
        'Gender': gender,
        'Test_Score': test_scores,
    })
    return X
X = generate_data(10000)

beta_0 = -1.0
beta_gender = 1.0
beta_test_score = 2.0


logits = beta_0 + beta_gender * X['Gender'] + beta_test_score * X['Test_Score'] / 100.0
probabilities = 1 / (1 + np.exp(-logits))
admission = np.random.binomial(1, probabilities)
y = admission

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = LogisticRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test) #threshold = 0.5
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")
fitted_beta_0 = model.intercept_[0]
fitted_beta_gender, fitted_beta_test_score = model.coef_[0]

print(f"Fitted Intercept (beta_0): {fitted_beta_0}")
print(f"Fitted Coefficient for Gender (beta_gender): {fitted_beta_gender}")
print(f"Fitted Coefficient for Test_Score (beta_test_score): {fitted_beta_test_score}")

Model Accuracy: 0.6805
Fitted Intercept (beta_0): -0.9346331885679137
Fitted Coefficient for Gender (beta_gender): 0.9698559281991558
Fitted Coefficient for Test_Score (beta_test_score): 0.019747892088624472


In [None]:
from scipy.integrate import quad
def gender_probabilities(s):
    return 0.5
#Defining equal opportunity model
def f_eo(testscore_new, model):
    def integrand(s):
        return model.predict_proba(X_test)[:, 1][0] * (gender_probabilities(s))
    result, error = quad(integrand, 0,1)
    return result

adjusted_probabilities = []
for i in range(len(X_test)):
    testscore_new = X_test["Test_Score"].iloc[i]
    adjusted_prob = f_eo(testscore_new, model)
    adjusted_probabilities.append(adjusted_prob)

adjusted_probabilities = np.array(adjusted_probabilities)
y_pred_new = (adjusted_probabilities > 0.5).astype(int) #initialize threshhold
accuracy_new = accuracy_score(y_test, y_pred_new)
print(f"Model Accuracy with EO Adjustment: {accuracy_new}")
y_pred_new

Model Accuracy with EO Adjustment: 0.3805


array([0, 0, 0, ..., 0, 0, 0])

In [None]:
#Affirmative Action Abduction Step and Computation
from scipy.integrate import dblquad
from scipy.integrate import romberg

def p_a_given_s(testscore_new, gender_probabilities): #For samples probabilities
    if gender_probabilities == 0:  # Female
        return 1 / 100 if 0 <= testscore_new <= 100 else 0
    elif gender_probabilities == 1:
        return 1 / 100 if 2 <= testscore_new <= 102 else 0
    return 0

def f_aa(testscore_new, model):
    def integrand(a,s):
        return (f_eo(testscore_new, model) * p_a_given_s(testscore_new, int(round(s))) * gender_probabilities(s))
    result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
    return result_aa

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score

# Assuming f_aa is vectorized and can take an array of test scores
def f_aa_vectorized(test_scores, model):
    adjusted_probs = []
    for testscore_new in test_scores:
        adjusted_prob = f_aa(testscore_new, model)  # Call f_aa for each test score
        adjusted_probs.append(adjusted_prob)
    return np.array(adjusted_probs)

In [None]:
test_scores = X_test["Test_Score"].values
adjusted_probabilities_aa = f_aa_vectorized(test_scores, model)

# Generate predictions based on the adjusted probabilities
y_pred_aa = (adjusted_probabilities_aa > 0.5).astype(int)

# Calculate the accuracy
accuracy_aa = accuracy_score(y_test, y_pred_aa)
print(f"Model Accuracy with AA Adjustment: {accuracy_aa}")

#Model Accuracy with AA Adjustment: 0.3805

  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  result_aa = romberg(integrand, 0, 1, args=(testscore_new,), tol=1e-6, divmax=10)
  re

Model Accuracy with AA Adjustment: 0.3805
