In [1]:
import sklearn.metrics
import numpy as np
import pandas as pd
from transparentai.datasets import load_adult, load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from transparentai.models import classification

import transparentai.fairness as fairness

In [2]:
data = load_adult()
X, Y = data.drop(columns='income'), data['income']
X = X.select_dtypes('number')
Y = Y.replace({'>50K':1, '<=50K':0})
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.33, random_state=42)
clf = RandomForestClassifier()
clf.fit(X_train,Y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [3]:
y_true = Y_train
y_true_valid = Y_valid
y_pred = clf.predict_proba(X_train)
y_pred_valid = clf.predict_proba(X_valid)

In [4]:
privileged_group = {
    'gender':['Male'],                
    'age': lambda x: x > 30 & x < 55, 
    'workclass': ['Private'],
    'marital-status': lambda x: 'Married' in x,
    'race':['White'],
    'test':['d']
}

df_valid = data.loc[X_valid.index,:]
df_train = data.loc[X_train.index,:]

res_train = fairness.compute_fairness_metrics(y_true, 
                                     y_pred, 
                                     df_train,
                                     privileged_group)

res_valid = fairness.compute_fairness_metrics(y_true_valid, 
                                     y_pred_valid, 
                                     df_valid,
                                     privileged_group)



In [5]:
print(res_train)
print()
print(res_valid)

{'gender': {'statistical_parity_difference': -0.19476541000793168, 'disparate_impact': 0.36193507464018176, 'equal_opportunity_difference': 0.0012555131216205329, 'average_odds_difference': 0.0002648639770150698, 'theil_index': 0.0009858691255119644}, 'age': {'statistical_parity_difference': -0.203725061026309, 'disparate_impact': 0.25714521167788223, 'equal_opportunity_difference': -0.007162267576955039, 'average_odds_difference': -0.0037346275592520773, 'theil_index': 0.0009858691255119644}, 'workclass': {'statistical_parity_difference': 0.07348962811212864, 'disparate_impact': 1.3373409073344713, 'equal_opportunity_difference': 0.0018590852857182094, 'average_odds_difference': 0.0007175586065426077, 'theil_index': 0.0009858691255119644}, 'marital-status': {'statistical_parity_difference': -0.3723357356482332, 'disparate_impact': 0.14530776576648555, 'equal_opportunity_difference': 0.001977948760519821, 'average_odds_difference': 0.0007141900360776936, 'theil_index': 0.00098586912551

In [6]:
from transparentai.datasets import load_boston
from sklearn.linear_model import LinearRegression

data = load_boston()
X, Y = data.drop(columns='MEDV'), data['MEDV']
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.33, random_state=42)
regr = LinearRegression()
regr.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

In [7]:
y_true = Y_train
y_true_valid = Y_valid
y_pred = regr.predict(X_train)
y_pred_valid = regr.predict(X_valid)

In [8]:
privileged_group = {
    'AGE': lambda x: (x > 30) & (x < 55)
}

df_valid = data.loc[X_valid.index,:]
df_train = data.loc[X_train.index,:]

res_train = fairness.compute_fairness_metrics(y_true, y_pred, df_train,
                                              privileged_group, regr_split='mean')

res_valid = fairness.compute_fairness_metrics(y_true_valid, y_pred_valid, 
                                              df_valid, privileged_group, regr_split='mean')

In [9]:
print(res_train)
print()
print(res_valid)

{'AGE': {'statistical_parity_difference': -0.24278409090909092, 'disparate_impact': 0.6386469344608879, 'equal_opportunity_difference': 0.030268418046830448, 'average_odds_difference': -0.13176501578278632, 'theil_index': 0.07290000413883886}}

{'AGE': {'statistical_parity_difference': -0.32598939208486327, 'disparate_impact': 0.5575858250276855, 'equal_opportunity_difference': 0.05590062111801242, 'average_odds_difference': -0.19990227625885348, 'theil_index': 0.08033756338030891}}


In [10]:
from transparentai.fairness import metrics
metrics.statistical_parity_difference
metrics.equal_opportunity_difference
metrics.average_odds_difference
metrics.disparate_impact
metrics.theil_index

# from transparentai.fairness import model_bias

# model_bias(y_true, y_pred, social_attr, returns_text=False)

<function transparentai.fairness.metrics.theil_index(y_true, y_pred, prot_attr, pos_label=1)>

In [12]:
fairness.

{'statistical_parity_difference': <function transparentai.fairness.metrics.statistical_parity_difference(y, prot_attr, pos_label=1)>,
 'disparate_impact': <function transparentai.fairness.metrics.disparate_impact(y, prot_attr, pos_label=1)>,
 'equal_opportunity_difference': <function transparentai.fairness.metrics.equal_opportunity_difference(y_true, y_pred, prot_attr, pos_label=1)>,
 'average_odds_difference': <function transparentai.fairness.metrics.average_odds_difference(y_true, y_pred, prot_attr, pos_label=1)>,
 'theil_index': <function transparentai.fairness.metrics.theil_index(y_true, y_pred, prot_attr, pos_label=1)>}