In [12]:
import sklearn.metrics
import numpy as np
import pandas as pd
from transparentai.datasets import load_adult, load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from transparentai.models import classification

import transparentai.fairness as fairness

In [6]:
data = load_adult()
X, Y = data.drop(columns='income'), data['income']
X = X.select_dtypes('number')
Y = Y.replace({'>50K':1, '<=50K':0})
X_train, X_valid, Y_train, Y_valid = train_test_split(X, Y, test_size=0.33, random_state=42)
clf = RandomForestClassifier()
clf.fit(X_train,Y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [7]:
y_true = Y_train
y_true_valid = Y_valid
y_pred = clf.predict_proba(X_train)
y_pred_valid = clf.predict_proba(X_valid)

In [11]:
metrics = ['TPR','FPR', 'confusion_matrix']
classification.compute_metrics(y_true_valid, y_pred_valid, metrics)

{'TPR': 0.524986849026828,
 'FPR': 0.09775901266645015,
 'confusion_matrix': array([[11112,  1204],
        [ 1806,  1996]])}

In [20]:
privileged_group = {
    'gender':['Male'],                # privileged group is man for gender attribute
    'age': lambda x: x > 30 & x < 55,  # privileged group aged between 30 and 55 years old
    'workclass': ['Private'],
    'marital-status': lambda x: 'Married' in x,
    'race':['White']    
}


In [22]:
privileged_df.sample(3)

Unnamed: 0,gender,age,workclass,marital-status,race
21283,0,0,1,1,1
1815,0,1,1,0,1
25436,1,1,0,0,0


In [None]:
# from transparentai.fairness import metrics
# metrics.statistical_parity_difference
# metrics.equal_opportunity_difference
# metrics.average_odds_difference
# metrics.disparate_impact
# metrics.theil_index

# from transparentai.fairness import model_bias

# model_bias(y_true, y_pred, social_attr, returns_text=False)

In [103]:
from transparentai.models import evaluation 

def preprocess_y(y, pos_label):
    """
    """
    y = np.array(y)
    
    if len(y.shape) > 1:
        y = np.argmax(y, axis=1)
    else:
        y = np.round(y, 0)
    
    return (y == pos_label).astype(int)


def base_rate(y, prot_attr, pos_label=1, privileged=True):
    """
    """
    prot_attr = np.array(prot_attr)
    y = preprocess_y(y, pos_label)
    
    priv_cond = prot_attr == int(privileged)
    n_priv    = np.sum(priv_cond)
    n_pos     = np.sum(y[priv_cond] == 1)
    
    if n_priv > 0:
        return n_pos / n_priv
    return 1.


def model_metrics_priv(metrics_fun, *args, privileged=True):
    """
    """
    y_true, y_pred = args[0], args[1]
    prot_attr, pos_label = args[2], args[3]
    
    y_true = preprocess_y(y_true, pos_label)
    y_pred = preprocess_y(y_pred, pos_label)
    
    y_true = y_true[prot_attr == int(privileged)]
    y_pred = y_pred[prot_attr == int(privileged)]
    
    return metrics_fun(y_true, y_pred)


def tpr_privileged(*args, privileged=True):
    """
    """
    metrics_fun = evaluation.classification.true_positive_rate
    return model_metrics_priv(metrics_fun, *args, privileged=privileged)
    
def fpr_privileged(*args, privileged=True):
    """
    """
    metrics_fun = evaluation.classification.false_positive_rate
    return model_metrics_priv(metrics_fun, *args, privileged=privileged)
    
    
    
def difference(metric_fun, *args):
    """Computes difference of the metric for 
    unprivileged and privileged groups.

    Parameters
    ----------
    metric_fun: function
        metric function that returns a number

    Returns
    -------
    float:
        Difference of a metric for 
        unprivileged and privileged groups.
    """
    return (metric_fun(*args, privileged=False)
            - metric_fun(*args, privileged=True))
    
    
def ratio(metric_fun, *args):
    """Computes ratio of the metric for 
    unprivileged and privileged groups.

    Parameters
    ----------
    metric_fun: function
        metric function that returns a number

    Returns
    -------
    float:
        Ratio of a metric for 
        unprivileged and privileged groups.
    """
    return (metric_fun(*args, privileged=False)
            / metric_fun(*args, privileged=True))

    
def statistical_parity_difference(y, prot_attr, pos_label=1):
    """Computed as the difference of the rate of 
    favorable outcomes received by the unprivileged group 
    to the privileged group.

    The ideal value of this metric is 0 A value < 0 implies 
    higher benefit for the privileged group and a value > 0 
    implies a higher benefit for the unprivileged group.

    Fairness for this metric is between -0.1 and 0.1

    Pr(Y^=pos_label|D=unprivileged)−Pr(Y^=pos_label|D=privileged)
    
    src : 
    
    - https://fairware.cs.umass.edu/papers/Verma.pdf
    - https://aif360.readthedocs.io/en/latest/modules/generated/aif360.sklearn.metrics.statistical_parity_difference.html?highlight=Statistical%20Parity%20Difference#aif360.sklearn.metrics.statistical_parity_difference
    
    """
    # prot_attr same len as y_pred
    
    return difference(base_rate, y, prot_attr, pos_label)

def disparate_impact(y, prot_attr, pos_label=1):
    """Computed as the ratio of rate of favorable outcome for 
    the unprivileged group to that of the privileged group.

    The ideal value of this metric is 1.0 A value < 1 implies 
    higher benefit for the privileged group and a value > 1 
    implies a higher benefit for the unprivileged group.

    Fairness for this metric is between 0.8 and 1.2
    
    .. math::
           \frac{Pr(\hat{Y} = 1 | D = \text{unprivileged})}
           {Pr(\hat{Y} = 1 | D = \text{privileged})}
    
    src : 
    
    - https://aif360.readthedocs.io/en/latest/modules/generated/aif360.sklearn.metrics.disparate_impact_ratio.html?highlight=Disparate%20Impact#aif360.sklearn.metrics.disparate_impact_ratio
    
    
    """
    # prot_attr same len as y_pred
    
    return ratio(base_rate, y, prot_attr, pos_label)

    
def equal_opportunity_difference(y_true, y_pred, prot_attr, pos_label=1):
    """This metric is computed as the difference of 
    true positive rates between the unprivileged and 
    the privileged groups. The true positive rate is 
    the ratio of true positives to the total number 
    of actual positives for a given group.

    The ideal value is 0. A value of < 0 implies higher 
    benefit for the privileged group and a value > 0 implies 
    higher benefit for the unprivileged group.

    Fairness for this metric is between -0.1 and 0.1

    TPRD=unprivileged − TPRD=privileged
    
    src : 
    
    - https://aif360.readthedocs.io/en/latest/modules/generated/aif360.sklearn.metrics.equal_opportunity_difference.html?highlight=Equal%20Opportunity%20Difference
        
    """
    return difference(tpr_privileged, y_true, y_pred, prot_attr, pos_label)

def average_odds_difference(y_true, y_pred, prot_attr, pos_label=1):
    """Computed as average difference of false positive rate 
    (false positives / negatives) and true positive rate 
    (true positives / positives) between unprivileged and 
    privileged groups.

    The ideal value of this metric is 0. A value of < 0 implies
    higher benefit for the privileged group and a value > 0
    implies higher benefit for the unprivileged group.

    Fairness for this metric is between -0.1 and 0.1
    
    1/2 [(FPRD=unprivileged−FPRD=privileged) + 
    (TPRD=unprivileged−TPRD=privileged))]
    
    src :
    
    - https://aif360.readthedocs.io/en/latest/modules/generated/aif360.sklearn.metrics.average_odds_difference.html?highlight=Average%20Odds%20Difference#aif360.sklearn.metrics.average_odds_difference
    
    """
    args = [y_true, y_pred, prot_attr, pos_label]
    return (1/2) * (
        difference(fpr_privileged, *args) + difference(tpr_privileged, *args)
    )

def theil_index(y_true, y_pred, prot_attr, pos_label=1):
    """Computed as the generalized entropy of benefit 
    for all individuals in the dataset, with alpha = 1. 
    It measures the inequality in benefit allocation for individuals.

    A value of 0 implies perfect fairness.

    Fairness is indicated by lower scores, higher scores are problematic

    With bi=y^i−yi+1:
    1/n ∑ bi/μ (ln(bi/μ))
    
    src :
    
    - https://aif360.readthedocs.io/en/latest/modules/generated/aif360.metrics.ClassificationMetric.html?highlight=Theil%20Index#aif360.metrics.ClassificationMetric.generalized_entropy_index
    
    """
    y_true = preprocess_y(y_true, pos_label)
    y_pred = preprocess_y(y_pred, pos_label)
    
    b = y_pred - y_true + 1
    
    return np.mean(np.log((b / np.mean(b))**b) / np.mean(b))
    
FAIRNESS_METRICS = {
    'statistical_parity_difference':statistical_parity_difference,
    'disparate_impact':disparate_impact,
    'equal_opportunity_difference':equal_opportunity_difference,
    'average_odds_difference':average_odds_difference,
    'theil_index':theil_index
}

def preprocess_metrics(input_metrics, metrics_dict):
    """Preprocess the inputed metrics so that it maps
    with the appropriate function in METRICS global variable.

    input_metrics can have str or function. If it's a string
    then it has to be a key from METRICS global variable dict

    Returns a dictionnary with metric's name as key and 
    metric function as value

    Parameters
    ----------
    input_metrics: list
        List of metrics to compute

    Returns
    -------
    dict:
        Dictionnary with metric's name as key and 
        metric function as value

    Raises
    ------
    TypeError:
        input_metrics must be a list
    """
    if type(input_metrics) != list:
        raise TypeError('input_metrics must be a list')

    fn_dict = {}
    cnt_custom = 1

    for fn in input_metrics:
        if type(fn) == str:
            if fn in metrics_dict:
                fn_dict[fn] = metrics_dict[fn]
            else:
                warnings.warn('%s function not found' % fn)
        else:
            fn_dict['custom_'+str(cnt_custom)] = fn
            cnt_custom += 1

    if len(fn_dict.keys()) == 0:
        raise ValueError('No valid metrics found')

    return fn_dict

    
def compute_metrics(y_true, y_pred, metrics, prot_attr, pos_label=1):
    """Computes the inputed metrics.

    metrics can have str or function. If it's a string
    then it has to be a key from FAIRNESS_METRICS global variable dict.

    Returns a dictionnary with metric's name as key and 
    metric function's result as value

    Parameters
    ----------
    y_true: array like
        True labels
    y_pred: array like
        Predicted labels
    metrics: list
        List of metrics to compute

    Returns
    -------
    dict:
        Dictionnary with metric's name as key and 
        metric function's result as value

    Raises
    ------
    TypeError:
        metrics must be a list
    """
    if type(metrics) != list:
        raise TypeError('metrics must be a list')

    if type(y_true) == list:
        y_true = np.array(y_true)
    if type(y_pred) == list:
        y_pred = np.array(y_pred)
        
    metrics = preprocess_metrics(input_metrics=metrics,
                                 metrics_dict=FAIRNESS_METRICS)
    res = {}
    args = []
    for name, fn in metrics.items():
        need_both = 'y_true' in fn.__code__.co_varnames

        if need_both:
            res[name] = fn(y_true, y_pred, prot_attr, pos_label)
        else:
            res[name] = fn(y_pred, prot_attr, pos_label)

    return res

df = data.loc[X_valid.index,:]
privileged_df = fairness.create_privilieged_df(df, privileged_group)
prot_attr_valid = privileged_df['marital-status']

df = data.loc[X_train.index,:]
privileged_df = fairness.create_privilieged_df(df, privileged_group)
prot_attr = privileged_df['marital-status']

pos_label = 1

metrics = [
    'statistical_parity_difference',
    'disparate_impact',
    'equal_opportunity_difference',
    'average_odds_difference',
    'theil_index',
]

res_train = compute_metrics(y_true, 
                          y_pred, 
                          metrics, 
                          prot_attr, 
                          pos_label)

res = compute_metrics(y_true_valid, 
                      y_pred_valid, 
                      metrics, 
                      prot_attr_valid, 
                      pos_label)
print(res_train)
print(res)

{'statistical_parity_difference': -0.37232384428532844, 'disparate_impact': 0.14508294869705107, 'equal_opportunity_difference': 0.0012024767358392863, 'average_odds_difference': 0.00035237607077305696, 'theil_index': 0.0010234507425749904}
{'statistical_parity_difference': -0.17332207645194392, 'disparate_impact': 0.40534380745537735, 'equal_opportunity_difference': 0.1021395855623608, 'average_odds_difference': 0.0320817599168834, 'theil_index': 0.14563763594900897}
