In [1]:
import torch
import os
import numpy as np
from tqdm import tqdm
from sklearn.metrics import roc_curve

In [2]:
from shutil import copy2

In [3]:
import matplotlib.pyplot as plt

In [4]:
races = ['African','Asian','Caucasian','Indian']
genders = ['Man','Woman']
labels = []
dist = []
for race in races:
    for gender in genders:
        with open(f"./sims/{race}_{gender}_sims.csv",'r') as f:
            lines = f.readlines()
        for line in lines:
            arr = line.strip().split(',')
            labels.append(int(arr[3]))
            dist.append(float(arr[2]))

In [5]:
fpr , tpr ,thresholds = roc_curve(labels,dist)
idx = np.argmax(tpr-fpr)
threshold = thresholds[idx]
print(f"Threshold: {threshold}")

Threshold: 0.4119390845298767


In [6]:
races = ['African','Asian','Caucasian','Indian']
genders = ['Man','Woman']
accuracies = {}
for race in races:
    for gender in genders:
        accuracies[f"{race}_{gender}"] = {'TP':0,'TN':0,'FP':0,'FN':0}
        with open(f"./sims/{race}_{gender}_sims.csv",'r') as f:
            lines = f.readlines()
        dist = []
        labels = []
        for line in lines:
            arr = line.strip().split(',')
            dist.append(float(arr[2]))
            labels.append(int(arr[3]))
            if float(arr[2]) < threshold:
                with open(f"./results/{race}_{gender}_results.csv",'a') as f:
                    f.write(f"{arr[0]},{arr[1]},0,{arr[3]}\n")
                if int(arr[3]) == 1:
                    accuracies[f"{race}_{gender}"]['TN'] += 1
                else:
                    accuracies[f"{race}_{gender}"]['FN'] += 1
            else:
                with open(f"./results/{race}_{gender}_results.csv",'a') as f:
                    f.write(f"{arr[0]},{arr[1]},1,{arr[3]}\n")
                if int(arr[3]) == 1:
                    accuracies[f"{race}_{gender}"]['TP'] += 1
                else:
                    accuracies[f"{race}_{gender}"]['FP'] += 1
        fpr, tpr, thresholds = roc_curve(labels,dist)
        fpr = np.array(fpr)
        tpr = np.array(tpr)
        with open(f"./roc/{race}_{gender}_roc.npy",'wb') as f:
            np.save(f,fpr)
            np.save(f,tpr)

In [7]:
print(accuracies)

{'African_Man': {'TP': 2360, 'TN': 579, 'FP': 590, 'FN': 2350}, 'African_Woman': {'TP': 45, 'TN': 16, 'FP': 7, 'FN': 53}, 'Asian_Man': {'TP': 1711, 'TN': 417, 'FP': 496, 'FN': 1635}, 'Asian_Woman': {'TP': 675, 'TN': 197, 'FP': 166, 'FN': 703}, 'Caucasian_Man': {'TP': 1633, 'TN': 620, 'FP': 50, 'FN': 2202}, 'Caucasian_Woman': {'TP': 508, 'TN': 239, 'FP': 9, 'FN': 739}, 'Indian_Man': {'TP': 1877, 'TN': 438, 'FP': 323, 'FN': 1994}, 'Indian_Woman': {'TP': 573, 'TN': 112, 'FP': 106, 'FN': 576}}


In [8]:
gar = {}
far = {}
scores = {}
for race in races:
    for gender in genders:
        tp = accuracies[f"{race}_{gender}"]['TP']
        tn = accuracies[f"{race}_{gender}"]['TN']
        fp = accuracies[f"{race}_{gender}"]['FP']
        fn = accuracies[f"{race}_{gender}"]['FN']
        gar[f"{race}_{gender}"] = tp/(tn+tp)
        far[f"{race}_{gender}"] = fp/(fn+fp)
        scores[f"{race}_{gender}"] = 100*(fn+tp)/(fn+fp+tn+tp)

In [9]:
gar

{'African_Man': 0.8029942157196325,
 'African_Woman': 0.7377049180327869,
 'Asian_Man': 0.8040413533834586,
 'Asian_Woman': 0.7740825688073395,
 'Caucasian_Man': 0.7248113626276076,
 'Caucasian_Woman': 0.6800535475234271,
 'Indian_Man': 0.8107991360691145,
 'Indian_Woman': 0.8364963503649635}

In [10]:
far

{'African_Man': 0.20068027210884354,
 'African_Woman': 0.11666666666666667,
 'Asian_Man': 0.2327545753167527,
 'Asian_Woman': 0.19102416570771003,
 'Caucasian_Man': 0.022202486678507993,
 'Caucasian_Woman': 0.012032085561497326,
 'Indian_Man': 0.1394044022442814,
 'Indian_Woman': 0.15542521994134897}

In [11]:
scores

{'African_Man': 80.11566592957986,
 'African_Woman': 80.99173553719008,
 'Asian_Man': 78.56304296783283,
 'Asian_Woman': 79.14991384261918,
 'Caucasian_Man': 85.1276359600444,
 'Caucasian_Woman': 83.4113712374582,
 'Indian_Man': 83.57081174438687,
 'Indian_Woman': 84.05267008046818}