In [22]:
import numpy as np
import pandas as pd
import cv2 as cv2
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, roc_auc_score, accuracy_score, f1_score, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import time
import scipy.optimize as optim
import copy
import random
import pickle
from IPython.display import Markdown, display
import seaborn as sns
import matplotlib.patches as patches
from tabulate import tabulate
import sys

# Learning fair representations (LFR)

In [23]:
raw_data = pd.read_csv('https://raw.githubusercontent.com/propublica/compas-analysis/master/compas-scores-two-years.csv')

In [24]:
raw_data.head()

Unnamed: 0,id,name,first,last,compas_screening_date,sex,dob,age,age_cat,race,...,v_decile_score,v_score_text,v_screening_date,in_custody,out_custody,priors_count.1,start,end,event,two_year_recid
0,1,miguel hernandez,miguel,hernandez,2013-08-14,Male,1947-04-18,69,Greater than 45,Other,...,1,Low,2013-08-14,2014-07-07,2014-07-14,0,0,327,0,0
1,3,kevon dixon,kevon,dixon,2013-01-27,Male,1982-01-22,34,25 - 45,African-American,...,1,Low,2013-01-27,2013-01-26,2013-02-05,0,9,159,1,1
2,4,ed philo,ed,philo,2013-04-14,Male,1991-05-14,24,Less than 25,African-American,...,3,Low,2013-04-14,2013-06-16,2013-06-16,4,0,63,0,1
3,5,marcu brown,marcu,brown,2013-01-13,Male,1993-01-21,23,Less than 25,African-American,...,6,Medium,2013-01-13,,,1,0,1174,0,0
4,6,bouthy pierrelouis,bouthy,pierrelouis,2013-03-26,Male,1973-01-22,43,25 - 45,Other,...,1,Low,2013-03-26,,,2,0,1102,0,0


## Data Cleaning

In [25]:
processed_data = raw_data.loc[raw_data['race'].isin(["African-American", "Caucasian"])]

processed_data = processed_data[['sex', 'age', 'age_cat', 'race', 'decile_score', 'juv_fel_count', 'juv_misd_count', 'juv_other_count',
            'priors_count', 'days_b_screening_arrest', 'c_jail_in', 'c_jail_out', 'c_charge_degree', 'is_recid', 
             'score_text', 'two_year_recid']]

# If the charge date of a defendants Compas scored crime was not within 30 days from when the person was arrested, 
# we can assume that because of data quality reasons, that we do not have the right offense.

processed_data = processed_data.loc[processed_data['days_b_screening_arrest'] <= 30]
processed_data = processed_data.loc[processed_data['days_b_screening_arrest'] >= -30]

# The recidivist flag (is_recid) should be -1 if we could not find a compas case at all.

processed_data = processed_data.loc[processed_data['is_recid'] != -1]
# Ordinary traffic offenses (c_charge_degree = 'O') will not result in Jail time and hence are removed 
# (only two of them).

processed_data = processed_data.loc[processed_data['c_charge_degree'] != 'O']
# score_text shouldn't be 'N/A'

processed_data = processed_data.loc[processed_data['score_text'] != 'N/A']
processed_data['length_of_stay'] = (pd.to_datetime(processed_data['c_jail_out'])-pd.to_datetime(processed_data['c_jail_in'])).apply(lambda x: x.days)
processed_data = processed_data.drop(columns=['c_jail_in', 'c_jail_out'])

In [26]:
# replace the values of the sensitive attribute race as follows: Caucasian -> 1, African-American -> 0
processed_data = processed_data.replace({'race': 'Caucasian'}, 1)
processed_data = processed_data.replace({'race': 'African-American'}, 0)
# replace the values of sex as follows
processed_data = processed_data.replace({'sex': 'Male'}, 1)
processed_data = processed_data.replace({'sex': 'Female'}, 0)

# replace the values of age_cat as follows
processed_data = processed_data.replace({'age_cat': 'Less than 25'}, 0)
processed_data = processed_data.replace({'age_cat': '25 - 45'}, 1)
processed_data = processed_data.replace({'age_cat': 'Greater than 45'}, 2)

# replace the values of c_charge_degree as follows
processed_data = processed_data.replace({'c_charge_degree': 'F'}, 0)
processed_data = processed_data.replace({'c_charge_degree': 'M'}, 1)

# replace the values of score_text as follows
processed_data = processed_data.replace({'score_text': 'Low'}, 0)
processed_data = processed_data.replace({'score_text': 'Medium'}, 1)
processed_data = processed_data.replace({'score_text': 'High'}, 2)

In [27]:
# check whether there are NaN values in the final dataset as well as the number of unique values per column

unique_NAN_df = pd.DataFrame(columns=['column name', '# of unique values', '# of NaN values'])
for item in processed_data.columns:
    unique_NAN_df = unique_NAN_df.append({
        'column name': item, 
        '# of unique values': len(processed_data[item].unique()),
        '# of NaN values': sum(processed_data[item].isna() == True)}, ignore_index = True)
    
unique_NAN_df = unique_NAN_df.style.hide_index()
unique_NAN_df

column name,# of unique values,# of NaN values
sex,2,0
age,62,0
age_cat,3,0
race,2,0
decile_score,10,0
juv_fel_count,9,0
juv_misd_count,10,0
juv_other_count,8,0
priors_count,36,0
days_b_screening_arrest,56,0


In [28]:
# move two_year_recid to the end

cols = list(processed_data.columns.values)
cols.pop(cols.index('two_year_recid'))
processed_data = processed_data[cols+['two_year_recid']]
# move race to the first column

race_column = processed_data.pop('race')
processed_data.insert(0, 'race', race_column)

processed_data = processed_data.drop(columns=['age', 'juv_fel_count', 'juv_misd_count', 'juv_other_count'])

In [29]:
processed_data.to_csv("../processed-compas-scores-two-years.csv", index=False)

In [30]:
data = np.array(processed_data)
y = np.array(data[:,-1]).flatten()
data = data[:,:-1]
sensitive = data[:,0]
data = preprocessing.scale(data)
data = data[:,1:]

# Split data into sensitive and nonsensitive data (sensitive --> race: Caucasian)

sensitive_idx = np.array(np.where(sensitive==1))[0].flatten()
nonsensitive_idx = np.array(np.where(sensitive!=1))[0].flatten()
data_sensitive = data[sensitive_idx,:]
data_nonsensitive = data[nonsensitive_idx,:]
y_sensitive = y[sensitive_idx]
y_nonsensitive = y[nonsensitive_idx]

In [31]:
# split sensitive data into training, validation, and testing sets

X_train_s, X_test_s, y_train_s, y_test_s = train_test_split(data_sensitive, y_sensitive, test_size= 0.2, random_state=42)
X_train_s, X_valid_s, y_train_s, y_valid_s = train_test_split(X_train_s, y_train_s, test_size = 0.25, random_state=42)
# split non-sensitive data into training, validation, and testing sets

X_train_n, X_test_n, y_train_n, y_test_n = train_test_split(data_nonsensitive, y_nonsensitive, test_size= 0.2, random_state=42)
X_train_n, X_valid_n, y_train_n, y_valid_n = train_test_split(X_train_n, y_train_n, test_size = 0.25, random_state=42)
# create final training, validation, and testing sets

X_train = np.concatenate((X_train_s, X_train_n))
X_valid = np.concatenate((X_valid_s, X_valid_n))
X_test = np.concatenate((X_test_s, X_test_n))

Y_train = np.concatenate((y_train_s, y_train_n))
Y_valid = np.concatenate((y_valid_s, y_valid_n))
Y_test = np.concatenate((y_test_s, y_test_n))

## LFR Model

In [32]:
# this function returns the distance matrix
def distances(X, v, alpha, N, P, k):
    dists = np.zeros((N, k))
    for i in range(N):
        for p in range(P):
            for j in range(k):    
                dists[i, j] += (X[i, p] - v[j, p]) * (X[i, p] - v[j, p]) * alpha[p]
    return dists

# this function returns the M_nk
def M_nk(dists, N, k):
    M_nk = np.zeros((N, k))
    exp = np.zeros((N, k))
    denom = np.zeros(N)
    for i in range(N):
        for j in range(k):
            exp[i, j] = np.exp(-1 * dists[i, j])
            denom[i] += exp[i, j]
        for j in range(k):
            if denom[i]:
                M_nk[i, j] = exp[i, j] / denom[i]
            else:
                M_nk[i, j] = exp[i, j] / 1e-6
    return M_nk
 
# this function returns the M_k array
def M_k(M_nk, N, k):
    M_k = np.zeros(k)
    for j in range(k):
        for i in range(N):
            M_k[j] += M_nk[i, j]
        M_k[j] /= N
    return M_k

# this function reconstructs of X to x_n_hat and L_x
def x_n_hat(X, M_nk, v, N, P, k):
    x_n_hat = np.zeros((N, P))
    L_x = 0.0
    for i in range(N):
        for p in range(P):
            for j in range(k):
                x_n_hat[i, p] += M_nk[i, j] * v[j, p]
            L_x += (X[i, p] - x_n_hat[i, p]) * (X[i, p] - x_n_hat[i, p])
    return x_n_hat, L_x

# this function returns a list of prediction
def yhat(M_nk, y, w, N, k):
    yhat = np.zeros(N)
    L_y = 0.0
    for i in range(N):
        for j in range(k):
            yhat[i] += M_nk[i, j] * w[j]
        yhat[i] = 1e-6 if yhat[i] <= 0 else yhat[i]
        yhat[i] = 0.999 if yhat[i] >= 1 else yhat[i]
        L_y += -1 * y[i] * np.log(yhat[i]) - (1.0 - y[i]) * np.log(1.0 - yhat[i])
    return yhat, L_y


# this function returns the objective function we want to minimize
def LFR_objective(params, data_sensitive, data_nonsensitive, y_sensitive, 
        y_nonsensitive,  k=10, A_x = 1e-4, A_y = 0.1, A_z = 1000):
    LFR_objective.iters += 1 
    Ns, P = data_sensitive.shape
    Nns, _ = data_nonsensitive.shape
    
    alpha0 = params[:P]
    alpha1 = params[P : 2 * P]
    w = params[2 * P : (2 * P) + k]
    v = np.matrix(params[(2 * P) + k:]).reshape((k, P))
        
    dists_sensitive = distances(data_sensitive, v, alpha0, Ns, P, k)
    dists_nonsensitive = distances(data_nonsensitive, v, alpha1, Nns, P, k)

    M_nk_sensitive = M_nk(dists_sensitive, Ns, k)
    M_nk_nonsensitive = M_nk(dists_nonsensitive, Nns, k)
    
    M_k_sensitive = M_k(M_nk_sensitive, Ns, k)
    M_k_nonsensitive = M_k(M_nk_nonsensitive, Nns, k)
    
    L_z = 0.0
    for j in range(k):
        L_z += abs(M_k_sensitive[j] - M_k_nonsensitive[j])

    x_n_hat_sensitive, L_x_sen = x_n_hat(data_sensitive, M_nk_sensitive, v, Ns, P, k)
    x_n_hat_nonsensitive, L_x_nsen = x_n_hat(data_nonsensitive, M_nk_nonsensitive, v, Nns, P, k)
    L_x = L_x_sen + L_x_nsen

    yhat_sensitive, L_y_sen = yhat(M_nk_sensitive, y_sensitive, w, Ns, k)
    yhat_nonsensitive, L_y_nsen = yhat(M_nk_nonsensitive, y_nonsensitive, w, Nns, k)
    L_y = L_y_sen + L_y_nsen

    objective = A_x * L_x + A_y * L_y + A_z * L_z

    return objective

LFR_objective.iters = 0

def LFR(X_train_s, X_train_n, y_train_s, y_train_n, K=10, A_x = 1e-4, A_y = 0.1, A_z = 1000, iter = 100):
    rez = np.random.uniform(size=X_train_s.shape[1] * 2 + K + X_train_s.shape[1] * K)
    bnd = []
    for i, k2 in enumerate(rez):
        if i < X_train_s.shape[1] * 2 or i >= X_train_s.shape[1] * 2 + K:
            bnd.append((None, None))
        else:
            bnd.append((0, 1))
    
    # minimize the metric by parameters alpha, w and v
    para, min_L, d = optim.fmin_l_bfgs_b(LFR_objective, x0=rez, epsilon=1e-5, 
                                         args=(X_train_s, X_train_n, y_train_s, y_train_n, K, A_z, A_x, A_y), 
                                         bounds = bnd, approx_grad=True, 
                                         maxfun=iter, maxiter=iter)
    
    return para

## Evaluation Matrices

In [33]:
# this function defines the threshold for y_n_hat to be 0 or 1
def predic_category(y):
    for i in range(len(y)):
        if y[i] >= 0.5:
            y[i] = 1
        else:
            y[i] = 0
    return y

# this function calculate y_n_hat by using the best parameters
def predict(params, data_sensitive, data_nonsensitive, k=10):
    
    Ns, P = data_sensitive.shape
    Nns, _ = data_nonsensitive.shape
    
    # form parameters in new forms
    alpha0 = params[:P]
    alpha1 = params[P : 2 * P]
    w = params[2 * P : (2 * P) + k]
    v = np.matrix(params[(2 * P) + k:]).reshape((k, P))
    
    dists_sensitive = distances(data_sensitive, v, alpha0, Ns, P, k)
    dists_nonsensitive = distances(data_nonsensitive, v, alpha1, Nns, P, k)

    M_nk_sensitive = M_nk(dists_sensitive, Ns, k)
    M_nk_nonsensitive = M_nk(dists_nonsensitive, Nns, k)
    
    M_k_sensitive = M_k(M_nk_sensitive, Ns, k)
    M_k_nonsensitive = M_k(M_nk_nonsensitive, Nns, k)
    
    # make predictions for sensitive data
    yhat_sensitive = np.zeros(Ns)
    for i in range(Ns):
        for j in range(k):
            yhat_sensitive[i] += M_nk_sensitive[i, j] * w[j]
        yhat_sensitive[i] = 1e-6 if yhat_sensitive[i] <= 0 else yhat_sensitive[i]
        yhat_sensitive[i] = 0.999 if yhat_sensitive[i] >= 1 else yhat_sensitive[i]
        
    # make predictions for nonsensitive data
    yhat_nonsensitive = np.zeros(Nns)
    for i in range(Nns):
        for j in range(k):
            yhat_nonsensitive[i] += M_nk_nonsensitive[i, j] * w[j]
        yhat_nonsensitive[i] = 1e-6 if yhat_nonsensitive[i] <= 0 else yhat_nonsensitive[i]
        yhat_nonsensitive[i] = 0.999 if yhat_nonsensitive[i] >= 1 else yhat_nonsensitive[i]
        
    final_y_s = predic_category(yhat_sensitive)
    final_y_n = predic_category(yhat_nonsensitive)
    
    return final_y_s, final_y_n

def calc_accuracy(y_sen, y_nsen, y_sen_label, y_nsen_label):
    y_sen_df = pd.DataFrame(y_sen)
    y_nsen_df = pd.DataFrame(y_nsen)
    y_label = pd.DataFrame(y_sen_label).append(pd.DataFrame(y_nsen_label))
    y_df = y_sen_df.append(y_nsen_df)
    
    acc_sen = accuracy_score(y_sen_label, y_sen_df)
    acc_nsen = accuracy_score(y_nsen_label, y_nsen_df)
    total_accuracy = accuracy_score(y_label, y_df)
    
    return acc_sen, acc_nsen, total_accuracy

def calc_calibration(acc_sen, acc_nsen):
    return abs(acc_sen - acc_nsen)

def get_model_performance(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    matrix = confusion_matrix(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    return accuracy, matrix, f1

def plot_model_performance(y_pred_s, y_pred_n, y_pred, y_true_s, y_true_n, y_true):
    accuracy_s, matrix_s, f1_s = get_model_performance(y_true_s, y_pred_s)

    display(Markdown('#### Sensitive data (Caucasians):'))
    print(f'Accuracy: {accuracy_s}')
    print(f'F1 score: {f1_s}')
    
    accuracy_n, matrix_n, f1_n = get_model_performance(y_true_n, y_pred_n)

    display(Markdown('#### Nonsensitive data (African-Americans):'))
    print(f'Accuracy: {accuracy_n}')
    print(f'F1 score: {f1_n}')
    
    accuracy, matrix, f1 = get_model_performance(y_true, y_pred)

    display(Markdown('#### All data:'))
    print(f'Accuracy: {accuracy}')
    print(f'F1 score: {f1}')

    fig = plt.figure(figsize=(15, 6))
    ax = fig.add_subplot(1, 3, 1)
    sns.heatmap(matrix_s, annot=True, cmap='Blues', fmt='g')
    plt.title('Confusion Matrix (sensitive data)')
    
    ax = fig.add_subplot(1, 3, 2)
    sns.heatmap(matrix_n, annot=True, cmap='Blues', fmt='g')
    plt.title('Confusion Matrix (nonsensitive data)')
    
    ax = fig.add_subplot(1, 3, 3)
    sns.heatmap(matrix, annot=True, cmap='Blues', fmt='g')
    plt.title('Confusion Matrix (all data)')
    
def equal_opportunity_difference(y_test_s, y_test_n, pred_test_s, pred_test_n):
    tpr_s = 0
    for i in range(len(y_test_s)):
        if y_test_s[i] == 1 and pred_test_s[i] == 1:
            tpr_s += 1 
    tpr_s = tpr_s/len(y_test_s)
    tpr_n = 0
    for i in range(len(y_test_n)):
        if y_test_n[i] == 1 and pred_test_n[i] == 1:
            tpr_n += 1 
    tpr_n = tpr_n/len(y_test_n)
    
    equal_opportunity_difference = tpr_s - tpr_n
    
    return equal_opportunity_difference

def avg_abs_odds_difference(y_test_s, y_test_n, pred_test_s, pred_test_n):
    tpr_s = 0
    for i in range(len(y_test_s)):
        if y_test_s[i] == 1 and pred_test_s[i] == 1:
            tpr_s += 1 
    tpr_s = tpr_s/len(y_test_s)
    tpr_n = 0
    for i in range(len(y_test_n)):
        if y_test_n[i] == 1 and pred_test_n[i] == 1:
            tpr_n += 1 
    tpr_n = tpr_n/len(y_test_n)
    
    fpr_s = 0
    for i in range(len(y_test_s)):
        if y_test_s[i] == 0 and pred_test_s[i] == 1:
            fpr_s += 1 
    fpr_s = fpr_s/len(y_test_s)
    fpr_n = 0
    for i in range(len(y_test_n)):
        if y_test_n[i] == 0 and pred_test_n[i] == 1:
            fpr_n += 1 
    fpr_n = fpr_n/len(y_test_n)
    
    avg_abs_odds_diff = 0.5*(abs(fpr_s - fpr_n) + abs(tpr_s - tpr_n))
    
    return avg_abs_odds_diff

def fair_metrics(pred_test_s, pred_test_n, pred_test, y_test_s, y_test_n, y_test):
    
    cols = ['calibration', 'equal_opportunity_difference', 'average_abs_odds_difference',  'disparate_impact']
    obj_fairness = [[0,0,0,1]]
    
    fair_metrics = pd.DataFrame(data=obj_fairness, index=['objective'], columns=cols)
    
    acc_sen, acc_nsen, total_accuracy = calc_accuracy(pred_test_s, pred_test_n, y_test_s, y_test_n)
    
    calibration = acc_sen - acc_nsen
    
    equal_opp_diff = equal_opportunity_difference(y_test_s, y_test_n, pred_test_s, pred_test_n)
    
    avg_abs_odds_diff = avg_abs_odds_difference(y_test_s, y_test_n, pred_test_s, pred_test_n)
    
    disparate_impact = acc_sen/acc_nsen
    
    row = pd.DataFrame([[calibration, equal_opp_diff, avg_abs_odds_diff, disparate_impact]],
                           columns  = cols,
                           index = ['Race']
                          )
    
    fair_metrics = fair_metrics.append(row)
    fair_metrics = fair_metrics.replace([-np.inf, np.inf], 2)
    
    return fair_metrics

def plot_fair_metrics(fair_metrics):
    fig, ax = plt.subplots(figsize=(20,4), ncols=5, nrows=1)

    plt.subplots_adjust(
        left    =  0.125, 
        bottom  =  0.1, 
        right   =  0.9, 
        top     =  0.9, 
        wspace  =  .5, 
        hspace  =  1.1
    )

    y_title_margin = 1.2

    plt.suptitle("Fairness metrics", y = 1.09, fontsize=20)
    sns.set(style="dark")

    cols = fair_metrics.columns.values
    obj = fair_metrics.loc['objective']
    size_rect = [0.2,0.2,0.2,0.4]
    rect = [-0.1,-0.1,-0.1,0.8]
    bottom = [-1,-1,-1,0]
    top = [1,1,1,2]
    bound = [[-0.1,0.1],[-0.1,0.1],[-0.1,0.1],[0.8,1.2]]

    display(Markdown("### Check bias metrics :"))
    display(Markdown("A model can be considered bias if just one of these four metrics show that this model is biased."))
    for attr in fair_metrics.index[1:len(fair_metrics)].values:
        display(Markdown("#### For the %s attribute :"%attr))
        check = [bound[i][0] < fair_metrics.loc[attr][i] < bound[i][1] for i in range(0,4)]
        display(Markdown("With default thresholds, bias against unprivileged group detected in **%d** out of 4 metrics"%(4 - sum(check))))

    for i in range(0,4):
        plt.subplot(1, 4, i+1)
        ax = sns.barplot(x=fair_metrics.index[1:len(fair_metrics)], y=fair_metrics.iloc[1:len(fair_metrics)][cols[i]])
        
        for j in range(0,len(fair_metrics)-1):
            a, val = ax.patches[j], fair_metrics.iloc[j+1][cols[i]]
            marg = -0.2 if val < 0 else 0.1
            ax.text(a.get_x()+a.get_width()/4, a.get_y()+a.get_height()+marg, round(val, 3), fontsize=15,color='black')

        plt.ylim(bottom[i], top[i])
        plt.setp(ax.patches, linewidth=0)
        ax.add_patch(patches.Rectangle((-5,rect[i]), 10, size_rect[i], alpha=0.3, facecolor="green", linewidth=1, linestyle='solid'))
        plt.axhline(obj[i], color='black', alpha=0.3)
        plt.title(cols[i])
        ax.set_ylabel('')    
        ax.set_xlabel('')

def compare_models(pred_1_test_s, pred_1_test_n, pred_2_test_s, pred_2_test_n, y_test_s, y_test_n, y_PR_test_s, y_PR_test_n,
                  fair_metrics_1, fair_metrics_2, model1, model2):
    acc_1_sen, acc_1_nsen, total_accuracy_1 = calc_accuracy(pred_1_test_s, pred_1_test_n, y_test_s, y_test_n)
    acc_2_sen, acc_2_nsen, total_accuracy_2 = calc_accuracy(pred_2_test_s, pred_2_test_n, y_PR_test_s, y_PR_test_n)

    calibration_1 = fair_metrics_1.iloc[1]['calibration']
    equal_opp_diff_1 = fair_metrics_1.iloc[1]['equal_opportunity_difference']
    avg_abs_odds_diff_1 = fair_metrics_1.iloc[1]['average_abs_odds_difference']
    disparate_impact_1 = fair_metrics_1.iloc[1]['disparate_impact']

    calibration_2 = fair_metrics_2.iloc[1]['calibration']
    equal_opp_diff_2 = fair_metrics_2.iloc[1]['equal_opportunity_difference']
    avg_abs_odds_diff_2 = fair_metrics_2.iloc[1]['average_abs_odds_difference']
    disparate_impact_2 = fair_metrics_2.iloc[1]['disparate_impact']
    
    print(tabulate([['accuracy', total_accuracy_1, total_accuracy_2], 
                ['calibration', calibration_1, calibration_2],
                ['equal_opportunity_difference', equal_opp_diff_1, equal_opp_diff_2],
                ['average_abs_odds_difference', avg_abs_odds_diff_1, avg_abs_odds_diff_2],
                ['disparate_impact', disparate_impact_1, disparate_impact_2]], headers=['metric', model1, model2]))
 

## Implementing Model

In [34]:
iter_max = 500

model_train_time = []
train_Accuracy = []
val_Accuracy = []
train_Calibration = []
val_Calibration = []

best_accuracy = 0

for i in range(100, iter_max+100, 100):

    #model training
    start = time.time()
    #random.seed(1024); np.random.seed(1024)
    final_parameters = LFR(X_train_s, X_train_n, y_train_s, y_train_n, 10, 1e-4, 0.1, 1000, iter = i)
    model_train_time.append(time.time() - start)

    #Train set accuracy and calibration
    pred_train_s, pred_train_n = predict(final_parameters, X_train_s, X_train_n, 10)
    acc_sen, acc_nsen, total_accuracy = calc_accuracy(pred_train_s, pred_train_n, y_train_s, y_train_n)
    train_Accuracy.append(total_accuracy)

    calibration = calc_calibration(acc_sen, acc_nsen)
    train_Calibration.append(calibration)

    #Validation set accuracy and calibration
    pred_val_s, pred_val_n = predict(final_parameters, X_valid_s, X_valid_n, 10)
    acc_sen, acc_nsen, total_accuracy = calc_accuracy(pred_val_s, pred_val_n, y_valid_s, y_valid_n)
    val_Accuracy.append(total_accuracy)

    calibration = calc_calibration(acc_sen, acc_nsen)
    val_Calibration.append(calibration)

    if total_accuracy > best_accuracy:
        best_accuracy = total_accuracy
        best_model = copy.deepcopy(final_parameters)

    print("Finished for " + str(i) + " iterations in " + str(time.time() - start) + " secs")

Finished for 100 iterations in 991.5338418483734 secs
Finished for 200 iterations in 621.6066381931305 secs
Finished for 300 iterations in 895.771886587143 secs
Finished for 400 iterations in 1628.4575157165527 secs
Finished for 500 iterations in 2639.538587808609 secs


In [35]:
filename = 'best_model.sav'
pickle.dump(best_model, open(filename, 'wb'))

iterations = [i for i in range(100, iter_max+100, 100)]

print(iterations)
print(model_train_time)
print(train_Accuracy)
print(val_Accuracy)
print(train_Calibration)
print(val_Calibration)

[100, 200, 300, 400, 500]
[989.1727020740509, 619.2910935878754, 894.5519225597382, 1625.0544106960297, 2635.091701745987]
[0.5262160454832596, 0.543903979785218, 0.5764371446620341, 0.5906506632975363, 0.3063802905874921]
[0.4895833333333333, 0.5539772727272727, 0.5785984848484849, 0.6070075757575758, 0.2793560606060606]
[0.04422853170316443, 0.12143260046498949, 0.03705137571522832, 0.0687834718519027, 0.13752281757801685]
[0.024153216002394007, 0.137366974021359, 0.08457179194643427, 0.12027979875437189, 0.09720762339386915]


In [36]:
filename = 'best_model.sav'
loaded_model = pickle.load(open(filename, 'rb'))

Training set

In [37]:
# get predictions for the training dataset

pred_train_s, pred_train_n = predict(loaded_model, X_train_s, X_train_n, 10)

# get accuracy for the training dataset

acc_sen, acc_nsen, total_accuracy = calc_accuracy(pred_train_s, pred_train_n, y_train_s, y_train_n)

print("The accuracy for Caucasians is: ", acc_sen)
print("The accuracy for African-Americans is: ", acc_nsen)
print("The total accuracy is: ", total_accuracy)

# get calibration for the training dataset

calibration = calc_calibration(acc_sen, acc_nsen)

print("The calibration is: ", calibration)

The accuracy for Caucasians is:  0.6320380650277557
The accuracy for African-Americans is:  0.563254593175853
The total accuracy is:  0.5906506632975363
The calibration is:  0.0687834718519027


Validation set

In [38]:
# get predictions for the validation dataset

pred_val_s, pred_val_n = predict(loaded_model, X_valid_s, X_valid_n, 10)

# get accuracy for the validation dataset

acc_sen, acc_nsen, total_accuracy = calc_accuracy(pred_val_s, pred_val_n, y_valid_s, y_valid_n)

print("The accuracy for Caucasians is: ", acc_sen)
print("The accuracy for African-Americans is: ", acc_nsen)
print("The total accuracy is: ", total_accuracy)

# get calibration for the validation dataset

calibration = calc_calibration(acc_sen, acc_nsen)

print("The calibration is: ", calibration)

The accuracy for Caucasians is:  0.6793349168646081
The accuracy for African-Americans is:  0.5590551181102362
The total accuracy is:  0.6070075757575758
The calibration is:  0.12027979875437189


## Evaluation

Testing set

In [39]:
# get predictions for the testing dataset

pred_test_s, pred_test_n = predict(loaded_model, X_test_s, X_test_n, 10)

# get accuracy for the testing dataset

acc_sen, acc_nsen, total_accuracy = calc_accuracy(pred_test_s, pred_test_n, y_test_s, y_test_n)

print("The accuracy for Caucasians is: ", acc_sen)
print("The accuracy for African-Americans is: ", acc_nsen)
print("The total accuracy is: ", total_accuracy)

# get calibration for the testing dataset

calibration = calc_calibration(acc_sen, acc_nsen)

print("The calibration is: ", calibration)

The accuracy for Caucasians is:  0.6603325415676959
The accuracy for African-Americans is:  0.5480314960629922
The total accuracy is:  0.5928030303030303
The calibration is:  0.11230104550470377
