In [0]:
import matplotlib.pyplot as plt
#######################################################################################################################
""" Groups all cases by metric, creating a dictionary with each group value as a key
@:param test_data:      List of lists, the full data set to pull groups from
@:param categories:     List of column titles, stored separately from the numerical data
@:param metric:         String used to determine groups, i.e 'race'
@:param mappings:       Dictionary mappings from training, used to convert the data between numerical and string format
@:param predictions:    List of predicted values produced from a machine learning model.
@:param labels:         List of labels for the test data

@:note: test_data, predictions, and labels should correspond to one another
@:note: Groups with less than 50 members are not considered sufficiently representative and are excluded. 

@:returns total_cases:  Dictionary with each group value as keys. Each key has a list of (prediction, label) tuples
representing all of the data points within that group
"""

def get_cases_by_metric(test_data, categories, metric, mappings, predictions, labels):

    total_cases = {}
    index = -1
    for i in range(len(categories)):
        if metric in categories[i]:
            index = i
            break

    for value in mappings[metric].keys():
        cases = []
        for i in range(len(test_data)):
            if test_data[i][index] == mappings[metric][value]:
                cases.append((float(predictions[i]), int(labels[i])))

        # Only include groups that have more than 50 members
        if len(cases) > 50:
            total_cases[value] = cases

    return total_cases

#######################################################################################################################
"""Applies a threshold to real-valued model predictions to make them either 0 or 1. Values above the threshold become
1's, values below or equal to the threshold become 0's.

@:param predictions:    Tuples of the form (prediction, label), such as those returned by get_cases_by_metric
@:param threshold:      Float or Int value used to calculate the predicted value

@:returns predictions:  The thresholded version of the same input (prediction, label) tuples 
"""

def apply_threshold(prediction_label_pairs, threshold):

    threshed = [(0, 0)] * len(prediction_label_pairs)
    for i in range(len(prediction_label_pairs)):
        if prediction_label_pairs[i][0] <= threshold:
            threshed[i] = (0, prediction_label_pairs[i][1])
        else:
            threshed[i] = (1, prediction_label_pairs[i][1])

    return threshed

#######################################################################################################################
"""Gets the total accuracy of a set of classifications

@:param classifications:    a dictionary of all the classifications, separated into groups. Each group contains
                            a list of (prediction, label) tuples
                
@:note:                     assumes that the predictions have been already thresholded

@:returns total_accuracy:   the total accuracy of the classifications
"""

def get_total_accuracy(classifications):

    total_correct = 0.0
    total_num_cases = 0.0
    for group in classifications.keys():
        for prediction, label in classifications[group]:
            total_num_cases += 1.0
            if prediction == label:
                total_correct += 1.0

    return total_correct / total_num_cases

#######################################################################################################################
"""Determines the number of correct predictions in a group

@:param prediction_label_pairs:       List of (prediction, label) tuples

@:note:             Assumes predictions have already been thresholded

@:returns num_correct:  Int value of correct predictions. Dividing this by len(category) would give the
                        accuracy for the group
"""

def get_num_correct(prediction_label_pairs):
    num_correct = 0
    for pair in prediction_label_pairs:
        prediction = int(pair[0])
        label = int(pair[1])
        if prediction == label:
            num_correct += 1

    return num_correct

#######################################################################################################################
"""Determines the number of false positives in a group

@:param prediction_label_pairs:   List of (prediction, label) tuples

@:note:             Assumes predictions have already been thresholded

@:returns false_positives:        The number of false positives (prediction == 1, label == 0)
"""
def get_num_false_positives(prediction_label_pairs):
    false_positives = 0

    for pair in prediction_label_pairs:
        prediction = int(pair[0])
        label = int(pair[1])
        if prediction == 1 and label == 0:
            false_positives += 1

    return false_positives

#######################################################################################################################
"""Determines the rate of false positives in a group

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:                             Assumes predictions have already been thresholded

@:returns FPR:                      The number of false positives divided by the number of labelled negatives. Will 
                                    return 0 to avoid divide by 0, but in practice there should be no instances of no 
                                    labelled negatives.
"""

def get_false_positive_rate(prediction_label_pairs):
    false_positives = 0
    labelled_negatives = 0

    for pair in prediction_label_pairs:
        prediction = int(pair[0])
        label = int(pair[1])
        if label == 0:
            labelled_negatives += 1
            if prediction == 1:
                false_positives += 1

    if labelled_negatives != 0:
        return false_positives / labelled_negatives
    else:
        return 0

#######################################################################################################################
"""Determines the number of true negatives in a group

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:                             Assumes predictions have already been thresholded

@:returns true_negatives            The number of true negatives (prediction == 0, label == 0)
"""

def get_num_true_negatives(prediction_label_pairs):
    true_negatives = 0

    for pair in prediction_label_pairs:
        prediction = int(pair[0])
        label = int(pair[1])
        if prediction == 0 and label == 0:
            true_negatives += 1

    return true_negatives

#######################################################################################################################
"""Determines the rate of true negatives in a group

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:             Assumes predictions have already been thresholded

@:returns TNR:                      1 - false_positive_rate.
"""

def get_true_negative_rate(prediction_labels_pairs):

    return 1 - get_false_positive_rate(prediction_labels_pairs)

#######################################################################################################################
"""Determines the number of false negatives in a group

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:                             Assumes predictions have already been thresholded

@:returns false_negatives           The number of false negatives (prediction == 0, label == 1)
"""

def get_num_false_negatives(prediction_label_pairs):
    false_negatives = 0

    for pair in prediction_label_pairs:
        prediction = int(pair[0])
        label = int(pair[1])
        if prediction == 0 and label == 1:
            false_negatives += 1

    return false_negatives

#######################################################################################################################
"""Determines the rate of false negatives in a group

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:                             Assumes predictions have already been thresholded

@:returns FNR:                      The number of false negatives divided by the number of labelled positives. Will 
                                    return 0 to avoid divide by 0, but in practice there should be no instances of no 
                                    labelled positives.
"""

def get_false_negative_rate(prediction_label_pairs):
    false_negatives = 0
    labelled_positives = 0

    for pair in prediction_label_pairs:
        prediction = int(pair[0])
        label = int(pair[1])
        if label == 1:
            labelled_positives += 1
            if prediction == 0:
                false_negatives += 1

    if labelled_positives != 0:
        return false_negatives / labelled_positives
    else:
        return 0

#######################################################################################################################
"""Determines the number of true positives in a group

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:                             Assumes predictions have already been thresholded

@:returns true_positives           The number of true positives (prediction == 1, label == 1)
"""

def get_num_true_positives(prediction_label_pairs):
    true_positives = 0

    for pair in prediction_label_pairs:
        prediction = int(pair[0])
        label = int(pair[1])
        if prediction == 1 and label == 1:
            true_positives += 1

    return true_positives

#######################################################################################################################
"""Determines the rate of true positives in a group

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:                             Assumes predictions have already been thresholded

@:returns TPR:                      1 - false_negative_rate.
"""

def get_true_positive_rate(category):

    return 1 - get_false_negative_rate(category)

#######################################################################################################################
"""Determines the number of samples that have a positive prediction

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:                             Assumes predictions have already been thresholded

@:returns predicted_positives       Number of samples with a positive prediction"""

def get_num_predicted_positives(prediction_label_pairs):
    predicted_positives = 0

    for pair in prediction_label_pairs:
        prediction = int(pair[0])
        if prediction == 1:
            predicted_positives += 1

    return predicted_positives

#######################################################################################################################
"""Determines the positive predictive value of a group, defined as the number of true positives divided by the 
number of predicted positives

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:                             Assumes predictions have already been thresholded

@:returns PPV:                      true positives / predicted positives                          
"""

def get_positive_predictive_value(prediction_label_pairs):
    true_positives = get_num_true_positives(prediction_label_pairs)
    predicted_positives = get_num_predicted_positives(prediction_label_pairs)

    if predicted_positives == 0:
        return 0
    else:
        return true_positives / predicted_positives

#######################################################################################################################
"""Calculates the Fscore (or harmonic mean) of a group. Used as a substitute for accuracy when data is skewed

@:param prediction_label_pairs:     List of (prediction, label) tuples

@:note:                             Assumes predictions have already been thresholded

@:returns Fscore                    Harmonic mean, defined as 2 * (precision * recall) + (precision + recall)
"""

def calculate_Fscore(prediction_label_pairs):

    precision = get_positive_predictive_value(prediction_label_pairs)
    recall = get_true_positive_rate(prediction_label_pairs)

    numerator = precision * recall
    denominator = precision + recall

    return 2 * (numerator/denominator)

#######################################################################################################################

def get_ROC_data(prediction_label_pairs, group):
    true_positives = []
    false_positives = []
    for i in range(1, 101):
        threshold = float(i) / 100.0
        eval_copy = list.copy(prediction_label_pairs)
        eval_copy = apply_threshold(eval_copy, threshold)
        TPR = get_true_positive_rate(eval_copy)
        FPR = get_false_positive_rate(eval_copy)
        true_positives.append(TPR)
        false_positives.append(FPR)

    return (true_positives, false_positives, group)

#######################################################################################################################

def plot_ROC_data(ROC_data_list):
    for curve in  ROC_data_list:
        TPR = curve[0]
        FPR = curve[1]
        title = curve[2]
        plt.plot(FPR, TPR, label=title)

    plt.legend()
    axes = plt.gca()
    axes.set_xlim([0.0, 1.0])
    axes.set_ylim([0.0, 1.0])
    plt.ylabel("True Positive Rate")
    plt.xlabel("False Positive Rate")

    plt.show()

#######################################################################################################################

def apply_financials(data, group_level=False):

    # Costs for the various categories
    tp_val = -60076
    tn_val = 23088
    fp_val = -110076
    fn_val = -202330

    full_list = []
    if group_level:
        full_list = data
    else:
        for group in data.keys():
            full_list += data[group]

    num_tp = get_num_true_positives(full_list)
    num_tn = get_num_true_negatives(full_list)
    num_fp = get_num_false_positives(full_list)
    num_fn = get_num_false_negatives(full_list)

    total = 0.0
    total += num_tp * tp_val
    total += num_tn * tn_val
    total += num_fp * fp_val
    total += num_fn * fn_val

    return total

In [0]:
# from utils import *
#######################################################################################################################
# YOU MUST FILL OUT YOUR SECONDARY OPTIMIZATION METRIC (either accuracy or cost)!
# The metric chosen must be the same for all 5 methods.
#
# Chosen Secondary Optimization Metric: #
#######################################################################################################################
""" Determines the thresholds such that each group has equal predictive positive rates within 
    a tolerance value epsilon. For the Naive Bayes Classifier and SVM you should be able to find
    a nontrivial solution with epsilon=0.02. 
    Chooses the best solution of those that satisfy this constraint based on chosen 
    secondary optimization criteria.
"""
def enforce_demographic_parity(categorical_results, epsilon):
    demographic_parity_data = {}
    test_data = {}
    thresholds = {'African-American': 0, 'Caucasian': 0, 'Hispanic': 0, 'Other': 0}
    accuracy = 0

    thresh_list = []
    afr_pred_pos, cauc_pred_pos, hisp_pred_pos, oth_pred_pos = [],[],[],[]
    test_thresholds = []
    # Must complete this function!
    #return demographic_parity_data, thresholds
    for i in [float(j) / 100 for j in range(0, 100, 1)]:
        thresh_list.append(i)
        
        test_data['African-American'] = apply_threshold(categorical_results['African-American'], i)
        afr_pred_pos.append(get_num_predicted_positives(test_data['African-American'])/len(test_data['African-American']))

        test_data['Caucasian'] = apply_threshold(categorical_results['Caucasian'], i)
        cauc_pred_pos.append(get_num_predicted_positives(test_data['Caucasian'])/len(test_data['Caucasian']))

        test_data['Hispanic'] = apply_threshold(categorical_results['Hispanic'], i)
        hisp_pred_pos.append(get_num_predicted_positives(test_data['Hispanic'])/len(test_data['Hispanic']))

        test_data['Other'] = apply_threshold(categorical_results['Other'], i)
        oth_pred_pos.append(get_num_predicted_positives(test_data['Other'])/len(test_data['Other']))
     
    for afr_prob in afr_pred_pos:
        for cauc_prob in cauc_pred_pos:
            if compare_probs(cauc_prob,afr_prob,epsilon) == False:
                continue
            for hisp_prob in hisp_pred_pos:
                if compare_probs(hisp_prob,afr_prob,epsilon) == False or compare_probs(hisp_prob,cauc_prob,epsilon) == False:
                       continue
                for oth_prob in oth_pred_pos:
                    if compare_probs(oth_prob,afr_prob,epsilon) == False or compare_probs(oth_prob,cauc_prob,epsilon) == False or compare_probs(oth_prob,hisp_prob,epsilon) == False:
                            continue
                    else:
                        poss_threshold = [thresh_list[afr_pred_pos.index(afr_prob)], thresh_list[cauc_pred_pos.index(cauc_prob)], thresh_list[hisp_pred_pos.index(hisp_prob)], thresh_list[oth_pred_pos.index(oth_prob)]]
                        if poss_threshold not in test_thresholds:
                            test_thresholds.append(poss_threshold)
      
    for thresh in test_thresholds:
        test_data['African-American'] = apply_threshold(categorical_results['African-American'], thresh[0])
        test_data['Caucasian'] = apply_threshold(categorical_results['Caucasian'], thresh[1])
        test_data['Hispanic'] = apply_threshold(categorical_results['Hispanic'], thresh[2])
        test_data['Other'] = apply_threshold(categorical_results['Other'], thresh[3])
        total_accuracy = get_total_accuracy(test_data)
        if total_accuracy > accuracy:
            accuracy = total_accuracy
            thresholds = {'African-American': thresh[0], 'Caucasian': thresh[1], 'Hispanic': thresh[2], 'Other': thresh[3]}

    # return final solution                 
    for key in categorical_results.keys():
        threshold = thresholds[key]
        demographic_parity_data[key] = apply_threshold(categorical_results[key], threshold)
    return demographic_parity_data, thresholds
                        
                                    
#     return None, None

#######################################################################################################################
""" Determine thresholds such that all groups have equal TPR within some tolerance value epsilon, 
    and chooses best solution according to chosen secondary optimization criteria. For the Naive 
    Bayes Classifier and SVM you should be able to find a non-trivial solution with epsilon=0.01
"""
def enforce_equal_opportunity(categorical_results, epsilon):
    test_data, equal_opportunity_data = {}, {}
    thresholds = {'African-American': 0, 'Caucasian': 0, 'Hispanic': 0, 'Other': 0}
    accuracy = 0

    thresh_list = []
    afr_tpr, cauc_tpr, hisp_tpr, oth_tpr = [],[],[],[]
    test_thresholds = []
    
    for i in [float(j) / 100 for j in range(0, 100, 1)]:
        thresh_list.append(i)
        
        test_data['African-American'] = apply_threshold(categorical_results['African-American'], i)
        afr_tpr.append(get_true_positive_rate(test_data['African-American']))

        test_data['Caucasian'] = apply_threshold(categorical_results['Caucasian'], i)
        cauc_tpr.append(get_true_positive_rate(test_data['Caucasian']))

        test_data['Hispanic'] = apply_threshold(categorical_results['Hispanic'], i)
        hisp_tpr.append(get_true_positive_rate(test_data['Hispanic']))

        test_data['Other'] = apply_threshold(categorical_results['Other'], i)
        oth_tpr.append(get_true_positive_rate(test_data['Other']))
     
    for afr_prob in afr_tpr:
        for cauc_prob in cauc_tpr:
            if compare_probs(cauc_prob,afr_prob,epsilon) == False:
                continue
            for hisp_prob in hisp_tpr:
                if compare_probs(hisp_prob,afr_prob,epsilon) == False or compare_probs(hisp_prob,cauc_prob,epsilon) == False:
                       continue
                for oth_prob in oth_tpr:
                    if compare_probs(oth_prob,afr_prob,epsilon) == False or compare_probs(oth_prob,cauc_prob,epsilon) == False or compare_probs(oth_prob,hisp_prob,epsilon) == False:
                            continue
                    else:
                        poss_threshold = [thresh_list[afr_tpr.index(afr_prob)], thresh_list[cauc_tpr.index(cauc_prob)], thresh_list[hisp_tpr.index(hisp_prob)], thresh_list[oth_tpr.index(oth_prob)]]
                        if poss_threshold not in test_thresholds:
                            test_thresholds.append(poss_threshold)
      
    for thresh in test_thresholds:
        test_data['African-American'] = apply_threshold(categorical_results['African-American'], thresh[0])
        test_data['Caucasian'] = apply_threshold(categorical_results['Caucasian'], thresh[1])
        test_data['Hispanic'] = apply_threshold(categorical_results['Hispanic'], thresh[2])
        test_data['Other'] = apply_threshold(categorical_results['Other'], thresh[3])
        total_accuracy = get_total_accuracy(test_data)
        if total_accuracy > accuracy:
            accuracy = total_accuracy
            thresholds = {'African-American': thresh[0], 'Caucasian': thresh[1], 'Hispanic': thresh[2], 'Other': thresh[3]}

    # return final solution                 
    for key in categorical_results.keys():
        threshold = thresholds[key]
        equal_opportunity_data[key] = apply_threshold(categorical_results[key], threshold)
    return equal_opportunity_data, thresholds

    # Must complete this function!
    #return equal_opportunity_data, thresholds

#     return None, None

#######################################################################################################################

"""Determines which thresholds to use to achieve the maximum profit or maximum accuracy with the given data
"""

def enforce_maximum_profit(categorical_results):
    test_data, mp_data = {}, {}
    thresholds = {}
    
    afr_max, cauc_max, hisp_max, oth_max = 0,0,0,0
    for i in [float(j) / 100 for j in range(0, 100, 1)]:
        test_data['African-American'] = apply_threshold(categorical_results['African-American'], i)
        afr_acc = get_num_correct(test_data['African-American'])/len(test_data['African-American'])
        if afr_acc > afr_max:
            afr_max = afr_acc
            thresholds['African-American'] = i
        
        test_data['Caucasian'] = apply_threshold(categorical_results['Caucasian'], i)
        cauc_acc = get_num_correct(test_data['Caucasian'])/len(test_data['Caucasian'])
        if cauc_acc > cauc_max:
            cauc_max = cauc_acc
            thresholds['Caucasian'] = i 
            
        test_data['Hispanic'] = apply_threshold(categorical_results['Hispanic'], i)
        hisp_acc = get_num_correct(test_data['Hispanic'])/len(test_data['Hispanic'])
        if hisp_acc > hisp_max:
            hisp_max = hisp_acc
            thresholds['Hispanic'] = i
        
        test_data['Other'] = apply_threshold(categorical_results['Other'], i)
        oth_acc = get_num_correct(test_data['Other'])/len(test_data['Other'])
        if oth_acc > oth_max:
            oth_max = oth_acc
            thresholds['Other'] = i
       
    for key in categorical_results.keys():
        threshold = thresholds[key]
        mp_data[key] = apply_threshold(categorical_results[key], threshold)
    return mp_data, thresholds
#     return None, None

#######################################################################################################################
""" Determine thresholds such that all groups have the same PPV, and return the best solution
    according to chosen secondary optimization criteria
"""

def enforce_predictive_parity(categorical_results, epsilon):
  test_data, predictive_parity_data = {},{}
  thresholds = {'African-American': 0, 'Caucasian': 0, 'Hispanic': 0, 'Other': 0}
  accuracy = 0

  thresh_list = []
  afr_ppv, cauc_ppv, hisp_ppv, oth_ppv = [],[],[],[]
  test_thresholds = []
  
  for i in [float(j) / 100 for j in range(0, 100, 1)]:
      thresh_list.append(i)
      
      test_data['African-American'] = apply_threshold(categorical_results['African-American'], i)
      afr_ppv.append(get_positive_predictive_value(test_data['African-American']))

      test_data['Caucasian'] = apply_threshold(categorical_results['Caucasian'], i)
      cauc_ppv.append(get_positive_predictive_value(test_data['Caucasian']))

      test_data['Hispanic'] = apply_threshold(categorical_results['Hispanic'], i)
      hisp_ppv.append(get_positive_predictive_value(test_data['Hispanic']))

      test_data['Other'] = apply_threshold(categorical_results['Other'], i)
      oth_ppv.append(get_positive_predictive_value(test_data['Other']))
      
      
  for afr_prob in afr_ppv:
      for cauc_prob in cauc_ppv:
          if compare_probs(cauc_prob,afr_prob,epsilon) == False:
              continue
          for hisp_prob in hisp_ppv:
              if compare_probs(hisp_prob,afr_prob,epsilon) == False or compare_probs(hisp_prob,cauc_prob,epsilon) == False:
                      continue
              for oth_prob in oth_ppv:
                  if compare_probs(oth_prob,afr_prob,epsilon) == False or compare_probs(oth_prob,cauc_prob,epsilon) == False or compare_probs(oth_prob,hisp_prob,epsilon) == False:
                          continue
                  else:
                      poss_threshold = [thresh_list[afr_ppv.index(afr_prob)], thresh_list[cauc_ppv.index(cauc_prob)], thresh_list[hisp_ppv.index(hisp_prob)], thresh_list[oth_ppv.index(oth_prob)]]
                      if poss_threshold not in test_thresholds:
                          test_thresholds.append(poss_threshold)
    
  for thresh in test_thresholds:
      test_data['African-American'] = apply_threshold(categorical_results['African-American'], thresh[0])
      test_data['Caucasian'] = apply_threshold(categorical_results['Caucasian'], thresh[1])
      test_data['Hispanic'] = apply_threshold(categorical_results['Hispanic'], thresh[2])
      test_data['Other'] = apply_threshold(categorical_results['Other'], thresh[3])
      total_accuracy = get_total_accuracy(test_data)
      if total_accuracy > accuracy:
          accuracy = total_accuracy
          thresholds = {'African-American': thresh[0], 'Caucasian': thresh[1], 'Hispanic': thresh[2], 'Other': thresh[3]}

  # return final solution                 
  for key in categorical_results.keys():
      threshold = thresholds[key]
      predictive_parity_data[key] = apply_threshold(categorical_results[key], threshold)
  return predictive_parity_data, thresholds
#     return None, None

    ###################################################################################################################
""" Apply a single threshold to all groups, and return the best solution according to 
    chosen secondary optimization criteria
"""

def enforce_single_threshold(categorical_results):
    test_data = {}
    accuracy = 0

    for i in [float(j) / 100 for j in range(0, 100, 1)]:
        for key in categorical_results.keys():
            test_data[key] = apply_threshold(categorical_results[key], i)
        total_accuracy = get_total_accuracy(test_data)
        if total_accuracy > accuracy:
            accuracy = total_accuracy
            thresholds = {'African-American': i, 'Caucasian': i, 'Hispanic': i, 'Other': i}

    single_threshold_data = {}
    for key in categorical_results.keys():
        threshold = thresholds[key]
        single_threshold_data[key] = apply_threshold(categorical_results[key], threshold)

    return single_threshold_data, thresholds

#     return None, None

def compare_probs(p1, p2, epsilon):
    return abs(p1 - p2) <= epsilon

In [0]:
# from Postprocessing import *
# from utils import *
from datetime import datetime
import copy

def report_results(data):

    begin = datetime.now()

    print("Attempting to enforce demographic parity...")
    demographic_parity_data, demographic_parity_thresholds = enforce_demographic_parity(copy.deepcopy(data), 0.02)
    if demographic_parity_data is not None:

        print("--------------------DEMOGRAPHIC PARITY RESULTS--------------------")
        print("")
        for group in demographic_parity_data.keys():
            num_positive_predictions = get_num_predicted_positives(demographic_parity_data[group])
            prob = num_positive_predictions / len(demographic_parity_data[group])
            print("Probability of positive prediction for " + str(group) + ": " + str(prob))


        print("")
        for group in demographic_parity_data.keys():
            accuracy = get_num_correct(demographic_parity_data[group]) / len(demographic_parity_data[group])
            print("Accuracy for " + group + ": " + str(accuracy))

        print("")
        for group in demographic_parity_data.keys():
            FPR = get_false_positive_rate(demographic_parity_data[group])
            print("FPR for " + group + ": " + str(FPR))

        print("")
        for group in demographic_parity_data.keys():
            FNR = get_false_negative_rate(demographic_parity_data[group])
            print("FNR for " + group + ": " + str(FNR))

        print("")
        for group in demographic_parity_data.keys():
            TPR = get_true_positive_rate(demographic_parity_data[group])
            print("TPR for " + group + ": " + str(TPR))

        print("")
        for group in demographic_parity_data.keys():
            TNR = get_true_negative_rate(demographic_parity_data[group])
            print("TNR for " + group + ": " + str(TNR))

        print("")
        for group in demographic_parity_thresholds.keys():
            print("Threshold for " + group + ": " + str(demographic_parity_thresholds[group]))

        print("")
        total_cost = apply_financials(demographic_parity_data)
        print("Total cost: ")
        print('${:,.0f}'.format(total_cost))
        total_accuracy = get_total_accuracy(demographic_parity_data)
        print("Total accuracy: " + str(total_accuracy))
        print("-----------------------------------------------------------------")
        print("")

    print("Attempting to enforce equal opportunity...")
    equal_opportunity_data, equal_opportunity_thresholds = enforce_equal_opportunity(copy.deepcopy(data), 0.01)
    if equal_opportunity_data is not None:
        print("--------------------EQUAL OPPORTUNITY RESULTS--------------------")
        print("")
        for group in equal_opportunity_data.keys():
            accuracy = get_num_correct(equal_opportunity_data[group]) / len(equal_opportunity_data[group])
            print("Accuracy for " + group + ": " + str(accuracy))

        print("")
        for group in equal_opportunity_data.keys():
            FPR = get_false_positive_rate(equal_opportunity_data[group])
            print("FPR for " + group + ": " + str(FPR))

        print("")
        for group in equal_opportunity_data.keys():
            FNR = get_false_negative_rate(equal_opportunity_data[group])
            print("FNR for " + group + ": " + str(FNR))

        print("")
        for group in equal_opportunity_data.keys():
            TPR = get_true_positive_rate(equal_opportunity_data[group])
            print("TPR for " + group + ": " + str(TPR))

        print("")
        for group in equal_opportunity_data.keys():
            TNR = get_true_negative_rate(equal_opportunity_data[group])
            print("TNR for " + group + ": " + str(TNR))

        print("")
        for group in equal_opportunity_thresholds.keys():
            print("Threshold for " + group + ": " + str(equal_opportunity_thresholds[group]))

        print("")
        total_cost = apply_financials(equal_opportunity_data)
        print("Total cost: ")
        print('${:,.0f}'.format(total_cost))
        total_accuracy = get_total_accuracy(equal_opportunity_data)
        print("Total accuracy: " + str(total_accuracy))
        print("-----------------------------------------------------------------")
        print("")


    print("Attempting to enforce maximum profit...")
    max_profit_data, max_profit_thresholds = enforce_maximum_profit(copy.deepcopy(data))
    if max_profit_data is not None:
        print("--------------------MAXIMUM PROFIT RESULTS--------------------")
        print("")
        for group in max_profit_data.keys():
            accuracy = get_num_correct(max_profit_data[group]) / len(max_profit_data[group])
            print("Accuracy for " + group + ": " + str(accuracy))

        print("")
        for group in max_profit_data.keys():
            FPR = get_false_positive_rate(max_profit_data[group])
            print("FPR for " + group + ": " + str(FPR))

        print("")
        for group in max_profit_data.keys():
            FNR = get_false_negative_rate(max_profit_data[group])
            print("FNR for " + group + ": " + str(FNR))

        print("")
        for group in max_profit_data.keys():
            TPR = get_true_positive_rate(max_profit_data[group])
            print("TPR for " + group + ": " + str(TPR))

        print("")
        for group in max_profit_data.keys():
            TNR = get_true_negative_rate(max_profit_data[group])
            print("TNR for " + group + ": " + str(TNR))

        print("")
        for group in max_profit_thresholds.keys():
            print("Threshold for " + group + ": " + str(max_profit_thresholds[group]))

        print("")
        total_cost = apply_financials(max_profit_data)
        print("Total cost: ")
        print('${:,.0f}'.format(total_cost))
        total_accuracy = get_total_accuracy(max_profit_data)
        print("Total accuracy: " + str(total_accuracy))

        print("-----------------------------------------------------------------")
        print("")

    print("Attempting to enforce predictive parity...")
    predictive_parity_data, predictive_parity_thresholds = enforce_predictive_parity(copy.deepcopy(data), 0.01)
    if predictive_parity_data is not None:
        print("--------------------PREDICTIVE PARITY RESULTS--------------------")
        print("")
        for group in predictive_parity_data.keys():
            accuracy = get_num_correct(predictive_parity_data[group]) / len(predictive_parity_data[group])
            print("Accuracy for " + group + ": " + str(accuracy))

        print("")
        for group in predictive_parity_data.keys():
            PPV = get_positive_predictive_value(predictive_parity_data[group])
            print("PPV for " + group + ": " + str(PPV))

        print("")
        for group in predictive_parity_data.keys():
            FPR = get_false_positive_rate(predictive_parity_data[group])
            print("FPR for " + group + ": " + str(FPR))

        print("")
        for group in predictive_parity_data.keys():
            FNR = get_false_negative_rate(predictive_parity_data[group])
            print("FNR for " + group + ": " + str(FNR))

        print("")
        for group in predictive_parity_data.keys():
            TPR = get_true_positive_rate(predictive_parity_data[group])
            print("TPR for " + group + ": " + str(TPR))

        print("")
        for group in predictive_parity_data.keys():
            TNR = get_true_negative_rate(predictive_parity_data[group])
            print("TNR for " + group + ": " + str(TNR))

        print("")
        for group in predictive_parity_thresholds.keys():
            print("Threshold for " + group + ": " + str(predictive_parity_thresholds[group]))

        print("")
        total_cost = apply_financials(predictive_parity_data)
        print("Total cost: ")
        print('${:,.0f}'.format(total_cost))
        total_accuracy = get_total_accuracy(predictive_parity_data)
        print("Total accuracy: " + str(total_accuracy))
        print("-----------------------------------------------------------------")
        print("")

    print("Attempting to enforce single threshold...")
    single_threshold_data, single_thresholds = enforce_single_threshold(copy.deepcopy(data))
    if single_threshold_data is not None:
        print("--------------------SINGLE THRESHOLD RESULTS--------------------")
        print("")
        for group in single_threshold_data.keys():
            accuracy = get_num_correct(single_threshold_data[group]) / len(single_threshold_data[group])
            print("Accuracy for " + group + ": " + str(accuracy))

        print("")
        for group in single_threshold_data.keys():
            FPR = get_false_positive_rate(single_threshold_data[group])
            print("FPR for " + group + ": " + str(FPR))

        print("")
        for group in single_threshold_data.keys():
            FNR = get_false_negative_rate(single_threshold_data[group])
            print("FNR for " + group + ": " + str(FNR))

        print("")
        for group in single_threshold_data.keys():
            TPR = get_true_positive_rate(single_threshold_data[group])
            print("TPR for " + group + ": " + str(TPR))

        print("")
        for group in single_threshold_data.keys():
            TNR = get_true_negative_rate(single_threshold_data[group])
            print("TNR for " + group + ": " + str(TNR))

        print("")
        for group in single_thresholds.keys():
            print("Threshold for " + group + ": " + str(single_thresholds[group]))

        print("")
        total_cost = apply_financials(single_threshold_data)
        print("Total cost: ")
        print('${:,.0f}'.format(total_cost))
        total_accuracy = get_total_accuracy(single_threshold_data)
        print("Total accuracy: " + str(total_accuracy))
        print("-----------------------------------------------------------------")

        end = datetime.now()

        seconds = end-begin
        print("Postprocessing took approximately: " + str(seconds) + " seconds")




In [0]:
import numpy as np
import csv
import random

def preprocess(metrics, recalculate=False, causal=False):

    categories, data = clean_data()
    if recalculate:
        training_data, training_labels, test_data, test_labels = split_data(data, categories, 0.2, causal=causal)
        print("Recalculating data...")
    else:
        try:
            training_data = np.load("COMPAS_train_data.npy")
            training_labels = np.load("COMPAS_train_labels.npy")
            test_data = np.load("COMPAS_test_data.npy")
            test_labels = np.load("COMPAS_test_labels.npy")
            for i in range(len(training_labels)):
                training_labels[i] = int(training_labels[i])
            for i in range(len(test_labels)):
                test_labels[i] = int(test_labels[i])
            data = np.concatenate((training_data, test_data))
            print("Loaded training data")

        except:
            training_data, training_labels, test_data, test_labels = split_data(data, categories, 0.2, causal=causal)
            print("Could not locate data...")

    used_metrics = metrics
    training_data, reduced_categories, training_predictions = reduce_data(categories, training_data, used_metrics)
    np.save("COMPAS_train_decile_scores", training_predictions)
    test_data, reduced_categories, test_predictions = reduce_data(categories, test_data, used_metrics)
    np.save("COMPAS_test_decile_scores", test_predictions)
    mappings = determine_mappings(data, used_metrics)
    vectorize_data(training_data, reduced_categories, metrics, mappings)
    vectorize_data(test_data, reduced_categories, metrics, mappings)
    vectorize_labels(training_labels)
    vectorize_labels(test_labels)

    training_data = np.array(training_data)
    test_data = np.array(test_data)
    training_labels = np.array(training_labels)
    test_labels = np.array(test_labels)

    return training_data, training_labels, test_data, test_labels, reduced_categories, mappings

#######################################################################################################################

def metric_vs_recid(metric):
    with open("compas-scores-two-years.csv", "r+") as compas_data:
        #print("Opened data file")
        reader = csv.reader(compas_data)
        totals = {}
        possible_values = {}
        is_recid = 52
        index = -1
        categories = reader.__next__()
        for i in range(len(categories)):
            if metric in categories[i]:
                index = i

        if index == -1:
            print("Couldn't find metric: " + metric)
            return

        row = reader.__next__()
        while row is not None:

            if row[is_recid] != "-1":
                if row[index] in possible_values:
                    possible_values[row[index]] = int(possible_values[row[index]]) + int(row[is_recid])
                    totals[row[index]] = int(totals[row[index]]) + 1
                else:
                    possible_values[row[index]] = row[is_recid]
                    totals[row[index]] = 1

            try:
                row = reader.__next__()
            except:
                break

        for value in possible_values:
            print(str(value) + ": " + str(int(possible_values[value])*100/int(totals[value])))
        print("")

#######################################################################################################################

def clean_data():
    pos_data = []
    neg_data = []
    # Reads data from csv into a list of lists
    # Throws out any rows with a -1 for recidivism
    with open("compas-scores-two-years.csv", "r+") as compas_data:
        is_recid = 52
        #print("Opened data file")
        reader = csv.reader(compas_data)
        categories = reader.__next__()
        row = reader.__next__()
        while True:

            if row[is_recid] != "-1":
                if row[is_recid] == "0":
                    neg_data.append(row)
                else:
                    pos_data.append(row)

            try:
                row = reader.__next__()
            except:
                break

        if len(pos_data) < len(neg_data):
            data = pos_data + random.sample(neg_data, len(pos_data))
        else:
            data = neg_data + random.sample(pos_data, len(neg_data))

    random.shuffle(data)

    return categories, data

#######################################################################################################################

def split_data(data, categories, percent_test, causal=False):

    if causal:
        data = enforce_causal_discrimination(data, categories, "race", "Caucasian")

    is_recid = 52

    sample_size = int(percent_test * len(data))

    while True:
        training_data = data[:-sample_size]
        test_data = data[-sample_size:]

        training_labels = []
        test_labels = []

        for i in range(len(training_data)):
            training_labels.append(training_data[i][is_recid])

        zeros = 0
        ones = 0
        for i in range(len(test_data)):
            if test_data[i][is_recid] == "0":
                zeros += 1
            else:
                ones += 1
            test_labels.append(test_data[i][is_recid])

        if zeros == ones:
            break
        else:
            random.shuffle(data)

    np.save("COMPAS_train_data", training_data)
    np.save("COMPAS_train_labels", training_labels)
    np.save("COMPAS_test_data", test_data)
    np.save("COMPAS_test_labels", test_labels)
    return training_data, training_labels, test_data, test_labels

#######################################################################################################################

def vectorize_data(data, categories, metrics, mappings):

    for metric in metrics:
        index = -1
        for i in range(len(categories)):
            if metric in categories[i]:
                index = i
                break

        for i in range(len(data)):
            data[i][index] = mappings[metric][data[i][index]]

#######################################################################################################################

def vectorize_labels(labels):
    for i in range(len(labels)):
        labels[i] = int(labels[i])

#######################################################################################################################

def reduce_data(categories, data, keep_metrics):
    metric_indices = []
    reduced_categories = []
    for metric in keep_metrics:
        metric_indices.append(categories.index(metric))

    prediction_index = -1
    for i in range(len(categories)):
        if "decile_score" in categories[i]:
            prediction_index = i
    predictions = []

    reduced_data = []
    for i in range(len(data)):
        row = []
        for index in metric_indices:
            row.append(data[i][index])
        reduced_data.append(row)
        predictions.append(data[i][prediction_index])

    for index in metric_indices:
        reduced_categories.append(categories[index])

    return reduced_data, reduced_categories, predictions

#######################################################################################################################

def determine_mappings(data, keep_metrics):

    with open("compas-scores-two-years.csv", "r+") as compas_data:
        #print("Opened data file")
        mappings = {}
        reader = csv.reader(compas_data)
        index = -1
        categories = reader.__next__()
        for metric in keep_metrics:
            mappings[metric] = {}
            for i in range(len(categories)):
                if metric in categories[i]:
                    index = i
                    break

            if index == -1:
                print("Couldn't find metric: " + metric)
                return

            possible_values = set()
            for i in range(len(data)):
                possible_values.add(data[i][index])

            for i, value in enumerate(sorted(possible_values)):
                mappings[metric][value] = i

    return mappings

#######################################################################################################################

def enforce_causal_discrimination(data, categories, reference_metric, reference_value):
    index = categories.index(reference_metric)
    augmented_data = list.copy(data)

    # Loop through training data and add an entry for each class besides the reference class
    for i, row in enumerate(data):
        if row[index] != reference_value:
            duplicate = list.copy(row)
            duplicate[index] = reference_value
            augmented_data.append(duplicate)

    return augmented_data





In [72]:
from sklearn import svm
# from Preprocessing import preprocess
# from Report_Results import report_results
import numpy as np
# from utils import *


def SVM_classification(metrics):

    training_data, training_labels, test_data, test_labels, categories, mappings = preprocess(metrics, recalculate=False, causal=False)

    np.random.seed(42)
    SVR = svm.LinearSVR(C=1.0/float(len(test_data)), max_iter=5000)
    SVR.fit(training_data, training_labels)

    data = np.concatenate((training_data, test_data))
    labels = np.concatenate((training_labels, test_labels))

    predictions = SVR.predict(data)
    return data, predictions, labels, categories, mappings

#######################################################################################################################

metrics = ["sex", "age_cat", 'race', 'c_charge_degree', 'priors_count']

data, predictions, labels, categories, mappings = SVM_classification(metrics)
race_cases = get_cases_by_metric(data, categories, "race", mappings, predictions, labels)

report_results(race_cases)



Loaded training data
Attempting to enforce demographic parity...
--------------------DEMOGRAPHIC PARITY RESULTS--------------------

Probability of positive prediction for African-American: 0.6167061611374408
Probability of positive prediction for Caucasian: 0.6087357736081206
Probability of positive prediction for Hispanic: 0.6127886323268206
Probability of positive prediction for Other: 0.5976331360946746

Accuracy for African-American: 0.6252962085308057
Accuracy for Caucasian: 0.6334973854198708
Accuracy for Hispanic: 0.5968028419182948
Accuracy for Other: 0.6005917159763313

FPR for African-American: 0.4901694915254237
FPR for Caucasian: 0.47523838818824976
FPR for Hispanic: 0.513595166163142
FPR for Other: 0.4975609756097561

FNR for African-American: 0.28511309836927934
FNR for Caucasian: 0.25776684097200864
FNR for Hispanic: 0.24568965517241378
FNR for Other: 0.24812030075187969

TPR for African-American: 0.7148869016307207
TPR for Caucasian: 0.7422331590279914
TPR for Hispanic

In [10]:
from numpy.random import seed
seed(4940)
import tensorflow #import set_random_seed
# set_random_seed(80)

import numpy as np
from keras import regularizers
from keras.layers import Dense
from keras.models import Sequential
# from Preprocessing import preprocess
# from Report_Results import report_results
# from utils import *


def neural_network_classification(metrics):

    training_data, training_labels, test_data, test_labels, categories, mappings = preprocess(metrics)

    activation = "relu"
    model = Sequential()
    model.add(Dense(len(metrics)*2, activation=activation, kernel_regularizer=regularizers.l2(0.1), input_shape = (len(metrics), )))
    model.add(Dense(30, activation=activation, kernel_regularizer=regularizers.l2(0.1)))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss="binary_crossentropy")
    model.fit(training_data, training_labels, epochs=30, batch_size=300, validation_data=(test_data, test_labels), verbose=0)

    data = np.concatenate((training_data, test_data))
    labels = np.concatenate((training_labels, test_labels))

    predictions = model.predict(data)
    predictions = np.squeeze(predictions, axis=1)

    return data, predictions, labels, categories, mappings

#######################################################################################################################


#######################################################################################################################


metrics = ["sex", "age_cat", "race", 'c_charge_degree', 'priors_count']

# Changing the int value sets the number of models to create before choosing the "best" one
data, predictions, labels, categories, mappings = neural_network_classification(metrics)
race_cases = get_cases_by_metric(data, categories, "race", mappings, predictions, labels)

report_results(race_cases)


Loaded training data
Attempting to enforce demographic parity...
--------------------DEMOGRAPHIC PARITY RESULTS--------------------

Probability of positive prediction for African-American: 0.3728813559322034
Probability of positive prediction for Caucasian: 0.3646308113035551
Probability of positive prediction for Hispanic: 0.3805309734513274
Probability of positive prediction for Other: 0.3611940298507463

Accuracy for African-American: 0.552482902170681
Accuracy for Caucasian: 0.5925250683682771
Accuracy for Hispanic: 0.6300884955752213
Accuracy for Other: 0.6537313432835821

FPR for African-American: 0.29343365253077974
FPR for Caucasian: 0.2964169381107492
FPR for Hispanic: 0.2882882882882883
FPR for Other: 0.25742574257425743

FNR for African-American: 0.5660178853235139
FNR for Caucasian: 0.5486542443064182
FNR for Hispanic: 0.4870689655172414
FNR for Other: 0.48120300751879697

TPR for African-American: 0.43398211467648606
TPR for Caucasian: 0.4513457556935818
TPR for Hispanic:

In [74]:
from sklearn.naive_bayes import MultinomialNB
import numpy as np
# from Preprocessing import preprocess
# from Report_Results import report_results
# from utils import *


def naive_bayes_classification(metrics):
    training_data, training_labels, test_data, test_labels, categories, mappings = preprocess(metrics)

    NBC = MultinomialNB()
    NBC.fit(training_data, training_labels)

    data = np.concatenate((training_data, test_data))
    labels = np.concatenate((training_labels, test_labels))

    class_predictions = NBC.predict_proba(data)
    predictions = []

    for i in range(len(labels)):
        predictions.append(class_predictions[i][1])

    return data, predictions, labels, categories, mappings


metrics = ["race", "sex", "age", 'c_charge_degree', 'priors_count', 'c_charge_desc']
data, predictions, labels, categories, mappings = naive_bayes_classification(metrics)
race_cases = get_cases_by_metric(data, categories, "race", mappings, predictions, labels)

report_results(race_cases)

Loaded training data
Attempting to enforce demographic parity...
--------------------DEMOGRAPHIC PARITY RESULTS--------------------

Probability of positive prediction for African-American: 0.591824644549763
Probability of positive prediction for Caucasian: 0.5733620424484774
Probability of positive prediction for Hispanic: 0.5772646536412078
Probability of positive prediction for Other: 0.5857988165680473

Accuracy for African-American: 0.6170023696682464
Accuracy for Caucasian: 0.6291910181482621
Accuracy for Hispanic: 0.6252220248667851
Accuracy for Other: 0.6420118343195266

FPR for African-American: 0.4711864406779661
FPR for Caucasian: 0.4441710243002153
FPR for Hispanic: 0.459214501510574
FPR for Other: 0.45365853658536587

FNR for African-American: 0.3145712782745923
FNR for Caucasian: 0.29744693940326056
FNR for Hispanic: 0.2543103448275862
FNR for Other: 0.21052631578947367

TPR for African-American: 0.6854287217254077
TPR for Caucasian: 0.7025530605967394
TPR for Hispanic: 0