# Fairness Methods

This method contains multiple methods to estimate if the emebedding is fair

In [1]:
import snap 
import csv
import pandas as pd
import random
import networkx as nx
import sklearn.metrics as skl_metrics
import numpy as np
#import fairlearn.metrics as metrics

**ConfusionMatrix:** creates based in the prediction and the true labels a Confusion Matrix 

In [2]:
def confusionMatrix(y_true, y_pred, labels=None):
    if(len(y_true) != 0) and (len(y_pred) != 0):
        #print(y_true,y_pred)
        y_true = np.array(y_true, dtype=int)  # Convert string values to integers
        y_pred = np.array(y_pred, dtype=int)
        matrix =  skl_metrics.confusion_matrix(y_true, y_pred, labels=[0, 1], sample_weight=None, normalize=None)
        print(matrix)
        tn, fp, fn, tp = matrix.ravel()
        return tn, fp, fn, tp
    else:
        return 0,0,0,0

**Predictive Parity:** 
- claculates the Positvive Predictive Value for each group (fermale and male)
- returns: PPV for male and female

In [3]:
#calculate PPV for each group
#protected and unprotected group have equal PPV

def predictive_Parity(y_true_f, y_pred_f, y_true_m, y_pred_m):
    #print('y_true_f: ', len(y_true_f))
    #print('y_pred_f: ',len(y_pred_f))
    #print('y_true_m: ',len(y_true_m))
    #print('y_pred_m: ',len(y_pred_m))
    ppv_f = 0
    ppv_m = 0
    if(len(y_true_f) != 0) and  (len(y_pred_f) != 0):
      
        tn_f, fp_f, fn_f, tp_f = confusionMatrix(y_true_f,y_pred_f)
        if(tp_f != 0):
            ppv_f = tp_f/(tp_f + fp_f)
    if(len(y_true_m) != 0) and  (len(y_pred_m) != 0):
      
        tn_m, fp_m, fn_m, tp_m = confusionMatrix(y_true_m,y_pred_m)
        if(tp_m != 0):
            ppv_m = tp_m/(tp_m + fp_m)
      
    return ppv_f, ppv_m


**False Positve Error Rate Balance:**
- claculates the False positive rate for each group (fermale and male)
- returns: FPR for male and female

In [4]:
#False positive rate for each group
def false_positive_error_rate_balance(y_true_f, y_pred_f, y_true_m, y_pred_m):
    
    fpr_f = 0
    fpr_m = 0
    if ((len(y_true_f) != 0) and  (len(y_pred_f) != 0)):
        #print(confusionMatrix(y_true_f,y_pred_f))
        print(y_true_f)
        print('predic')
        print(y_pred_f)
        tn_f, fp_f, fn_f, tp_f = confusionMatrix(y_true_f,y_pred_f)
        fpr_f = fp_f/(tn_f + fp_f)
        
    if(len(y_true_m) != 0) and  (len(y_pred_m) != 0):
      
        tn_m, fp_m, fn_m, tp_m = confusionMatrix(y_true_m,y_pred_m)
        fpr_m = fp_m/(tn_m + fp_m)
    
    return fpr_f, fpr_m

**Check Equal Probability:**
- checks if the probability is equal for each group
- returns true or false

In [5]:
#check if the probability is equal
def check_Equal_Probability(PPV_f, PPV_m):
    return PPV_f == PPV_m

**Group Fairness**:
- calculate the probability of being assigned to a positive predicted class for each group (female and male)
- returns the probabillity for female and male

In [6]:
def group_Fairness(y_true_f, y_true_m):
    f_1 = y_true_f.count(1)
    f_2 = y_true_m.count(1)
    #print(f_1)
    #print(f_2)
    if(f_1 + f_2 == 0):
        probF_PC = 0
        probM_PC = 0
    else:
        probF_PC = f_1/(f_1 + f_2)
        probM_PC = f_2/(f_1 + f_2)
        
    
    return probF_PC, probM_PC

**Equalized Opporrtunity**:
- calcaulates the False Negative Rate for each group (female and male)
- returns: FNR for male and female

In [7]:
def equalized_opportunity(y_true_f, y_pred_f, y_true_m, y_pred_m):
    
    fnr_f=0
    fnr_m = 0
    if(len(y_true_f) != 0) and  (len(y_pred_f) != 0):
      
        tn_f, fp_f, fn_f, tp_f = confusionMatrix(y_true_f,y_pred_f)
        fnr_f = fn_f / (tp_f + fn_f)
        
    if(len(y_true_m) != 0) and  (len(y_pred_m) != 0):
      
        tn_m, fp_m, fn_m, tp_m = confusionMatrix(y_true_m,y_pred_m)
        fnr_m = fn_m / (tp_m + fn_m)
    
    return fnr_f, fnr_m

**Equalized Odds:**
- calculates FPR and TPR for each group
- returns: FPR and TPR

In [8]:
def equalized_odds(y_true_f, y_pred_f, y_true_m, y_pred_m):
    ppv_f = 0
    ppv_m = 0
    fpr_f= 0
    fpr_m = 0
        
    if(len(y_true_f) != 0) and  (len(y_pred_f) != 0):
      
        tn_f, fp_f, fn_f, tp_f = confusionMatrix(y_true_f,y_pred_f)
        ppv_f = tp_f/(tp_f + fn_f)
        fpr_f = fp_f/(tn_f + fp_f)
        
    if(len(y_true_m) != 0) and  (len(y_pred_m) != 0):
      
        tn_m, fp_m, fn_m, tp_m = confusionMatrix(y_true_m,y_pred_m)
        ppv_m = tp_m/(tp_m + fn_m)
        fpr_m = fp_m/(tn_m + fp_m)
    
    return ppv_f, ppv_m, fpr_f, fpr_m

**Seperate Female and Male**:
- this method divides the data into female/male predictions and labels
- return: female, male prediction and labels as lists

In [9]:
def seperate_female_male(prediction, labels, test_indices, gender):
    #print(len(labels))
    predictions_female = []
    predictions_male = []
    labels_female =[]
    labels_male = []
#     print(len(prediction))
#     print(len(labels))
#     print(len(test_indices))
#     print(test_indices[-5:])
    
    #print(gender)
    
    for i in range(len(prediction)):
        indices = test_indices[i]
        if (gender[indices] == 0):
#             print('in')
#             print(prediction[i])
            predictions_male.append(prediction[i])
            labels_male.append(labels[i])
        else:
            predictions_female.append(prediction[i])
            labels_female.append(labels[i])
    
    return predictions_female, predictions_male, labels_female, labels_male
      

In [10]:
# def seperate_0_from_1(prediction, labels): 
#     zero_pred = []
#     zero_label = []
#     one_pred = []
#     one_label = []
#     for i in prediction:
#         if(i == 0):
#             zero_pred.append(i)
#         else:
#             one_pred.append(i)
#     for k in labels:
#         if(i == 0):
#             zero_label.append(i)
#         else:
#             one_label.append(i)
            
    
#     return zero_pred, zero_label, one_pred, one_label
        

**Count gender**:
- count the number of female and males in the lists
- return: 2 integers 

In [11]:
def count_gender(list_gender):
    print('jh')
    list_gender = list(list_gender)
    print(list_gender)
    if type(list_gender) == 'String':
        count_of_males = list_gender.count('0')
        count_of_females = list_gender.count('1')
        
    else: 
        count_of_males = list_gender.count(0)
        count_of_females = list_gender.count(1)
    return count_of_females, count_of_males

**Return frrame with evaluation**:
-  this methods creates a dataframe out of the results of the fairness methods
- return: result dataframe

In [12]:
def return_frame_with_evaluations(predictions, target, test_indices, gender):
    predictions_female, prediction_male, labels_female, labels_male = seperate_female_male(predictions, target, test_indices, list(gender))
    
    index = ['Group Fairness', 'Predictive Parity', 'False Positive Error Rate', 'Equalized Opportunity', 'Equalized Odds PPV', 'Equalized Odds FPR']
    column = ['Female', 'Male']
    dataFrame_result = pd.DataFrame(index =index )
    
    #zero_pred_f, zero_label_f, one_pred_f, one_label_f = seperate_0_from_1(predictions_female, labels_female)
    #zero_pred_m, zero_label_m, one_pred_m, one_label_m = seperate_0_from_1(prediction_male, labels_male)

    probF_PC, probM_PC = group_Fairness(predictions_female, prediction_male)
    #print(probF_PC, probM_PC)
    dataFrame_result.loc['Group Fairness', 'Female'] = probF_PC
    dataFrame_result.loc['Group Fairness', 'Male'] = probM_PC
    
    PPV_f, PPV_m = predictive_Parity(labels_female, predictions_female, labels_male, prediction_male)
    #print(PPV_f, PPV_m)
    dataFrame_result.loc['Predictive Parity', 'Female'] = PPV_f
    dataFrame_result.loc['Predictive Parity', 'Male'] = PPV_m
    
    print('PPV_f and PPV_m is equal:', check_Equal_Probability(PPV_f, PPV_m))
    
    fpr_f, fpr_m = false_positive_error_rate_balance(labels_female, predictions_female, labels_male, prediction_male)
    #print(fpr_f, fpr_m)
    dataFrame_result.loc['False Positive Error Rate', 'Female'] = fpr_f
    dataFrame_result.loc['False Positive Error Rate', 'Male'] = fpr_m
    
    fnr_f, fnr_m = equalized_opportunity(labels_female, predictions_female, labels_male, prediction_male)
    #print(fnr_f, fnr_m)
    dataFrame_result.loc['Equalized Opportunity', 'Female'] = fnr_f
    dataFrame_result.loc['Equalized Opportunity', 'Male'] = fnr_m
    
    ppv_f, ppv_m, fpr_f, fpr_m = equalized_odds(labels_female, predictions_female, labels_male, prediction_male)
    #print(ppv_f, ppv_m, fpr_f, fpr_m)
    dataFrame_result.loc['Equalized Odds PPV', 'Female'] = ppv_f
    dataFrame_result.loc['Equalized Odds PPV', 'Male'] = ppv_m
    dataFrame_result.loc['Equalized Odds FPR', 'Female'] = fpr_f
    dataFrame_result.loc['Equalized Odds FPR', 'Male'] = fpr_m
    
    return dataFrame_result
