# Evaluation - resubmission

We re-do the Tables for the resubmission

In [1]:
import sys
sys.path.append('../')

In [2]:
import pandas as pd
import xlsxwriter
from collections import OrderedDict, Counter

import matplotlib.pyplot as plt
plt.style.use('ggplot')

import seaborn as sns
sns.set(color_codes=True)

from evaluators import *
from evaluator import *
from config import DIR_PATH

%matplotlib inline
%pylab inline

REGISTERED_EVALUATORS = [GenderAPIEvaluator, GenderAPIFullEvaluator, \
    NameAPIEvaluator, NameAPIFullEvaluator, GenderGuesserEvaluator, \
    GenderizeIoEvaluator, NamSorEvaluator]

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [3]:
# Color codes for tables
light_green = '#e5ffe5'
dark_green = '#2e992e'

In [4]:
gender_evalautor_to_service_name = {'gender_api': 'Gender API', 'gender_guesser': 'gender-guesser', 
                                    'genderize_io': 'genderize.io', 'name_api_full': 'NameAPI', 'namsor': 'NamSor'}
def reduce_table(df, by_index):
    """Filter rows or columns of a benchmark table and rename."""
    df = df.sort_index()
    if by_index:
        df = df.loc[gender_evalautor_to_service_name.keys()]
        df = df.rename(index=gender_evalautor_to_service_name)
    else:
        df = df[list(gender_evalautor_to_service_name.keys())]
        df.columns = gender_evalautor_to_service_name.values()
        sorted_cols = sorted(df.columns)
        df = df[sorted_cols]
    return df

## Compute errors without tuning

In [5]:
def compute_all_errors_without_tuning():
    service_to_all_errors = {}
    error_names = ['errorCoded', 'errorCodedWithoutNA', 'errorGenderBias', 'naCoded', 'WeightedError']
    
    for s in REGISTERED_EVALUATORS:  
        evaluator = s('all')
        eval_name = evaluator.gender_evaluator
        evaluator.load_data(evaluated=True)
        evaluator._translate_api_response()
        evaluator.compute_confusion_matrix(evaluator.test_data)
        errors = evaluator.compute_all_errors()
        service_to_all_errors[eval_name] = errors
    all_errors = pd.DataFrame.from_dict(service_to_all_errors, orient='index')
    all_errors.columns = error_names
    return all_errors

In [13]:
cm = sns.light_palette("green", as_cmap=True)
df = compute_all_errors_without_tuning()
df = reduce_table(df, by_index=True)
df = df.round(3)

df.style.background_gradient(cmap=cm)

Unnamed: 0,errorCoded,errorCodedWithoutNA,errorGenderBias,naCoded,WeightedError
Gender API,0.079,0.05,-0.011,0.03,0.056
gender-guesser,0.222,0.026,0.002,0.201,0.073
genderize.io,0.143,0.05,0.022,0.097,0.07
NameAPI,0.179,0.034,0.004,0.15,0.067
NamSor,0.128,0.043,0.007,0.089,0.061


In [14]:
df.abs().style.background_gradient(cmap=cm)

Unnamed: 0,errorCoded,errorCodedWithoutNA,errorGenderBias,naCoded,WeightedError
Gender API,0.079,0.05,0.011,0.03,0.056
gender-guesser,0.222,0.026,0.002,0.201,0.073
genderize.io,0.143,0.05,0.022,0.097,0.07
NameAPI,0.179,0.034,0.004,0.15,0.067
NamSor,0.128,0.043,0.007,0.089,0.061


In [11]:
# Export tables to Excel files
df = compute_all_errors_without_tuning()
df = reduce_table(df, by_index=True)
df = df.round(3)

# add colors using ExcelWriter; see http://xlsxwriter.readthedocs.io/working_with_conditional_formats.html
writer = pd.ExcelWriter('../../../benchmark_paper/resubmission/tables/Table4.xlsx', engine='xlsxwriter')
df.abs().to_excel(writer, sheet_name='Sheet1')
workbook  = writer.book
worksheet = writer.sheets['Sheet1']

worksheet.conditional_format('B2:B8', {'type': '2_color_scale', 'min_color': light_green, 'max_color': dark_green})
worksheet.conditional_format('C2:C8', {'type': '2_color_scale', 'min_color': light_green, 'max_color': dark_green})
worksheet.conditional_format('D2:D8', {'type': '2_color_scale', 'min_color': light_green, 'max_color': dark_green})
worksheet.conditional_format('E2:E8', {'type': '2_color_scale', 'min_color': light_green, 'max_color': dark_green})
worksheet.conditional_format('F2:F8', {'type': '2_color_scale', 'min_color': light_green, 'max_color': dark_green})

writer.save()

In [None]:
# DO NOT FORGET TO PUT THE MINUS SIGN BACK IN WHEN EXPORTING TO WORD