# Evaluate `genderize.io` 

In [1]:
from genderize import Genderize
import pandas as pd
from genderize_io_evaluator import GenderizeIoEvaluator

### Can it handle surnames?

In [2]:
results = Genderize().get(['Hans Joachim Schmidt', 'Anna Meier'])

In [3]:
print(results)

[{'name': 'Hans Joachim Schmidt', 'gender': None}, {'name': 'Anna Meier', 'gender': None}]


### Double names (where the order matters)

In [4]:
results = Genderize().get(['Hans Joachim', 'Hans-Joachim', 'Maria-José', 'José Maria', 'Jose Maria', 
                           'José-Maria', 'Josémaria', 'theo c. m'])

In [5]:
for r in results:
    print(r)

{'name': 'Hans Joachim', 'gender': None}
{'name': 'Hans-Joachim', 'probability': 1.0, 'gender': 'male', 'count': 1}
{'name': 'Maria-José', 'probability': 1.0, 'gender': 'female', 'count': 2}
{'name': 'José Maria', 'probability': 1.0, 'gender': 'male', 'count': 3}
{'name': 'Jose Maria', 'probability': 0.99, 'gender': 'male', 'count': 125}
{'name': 'José-Maria', 'gender': None}
{'name': 'Josémaria', 'gender': None}
{'name': 'theo c. m', 'gender': None}


The examples show that the API: 

* accepts double names
* is sensitive towards non-letter characters such as '-' or ' ' (cf. `Hans Joachim` and `Hans-Joachim`)
* works fine with non-ASCII characters (e.g. `é`)
* is sensitive towards accents (cf. `José Maria` and `Jose Maria`)

### Names with different gender depending on ethnicity

In [6]:
results = Genderize().get(['Nicola', 'Andrea', 'Alex', 'Mika', 'Addison', 'Ash', 'Dakota'])

In [7]:
for r in results:
    print(r)

{'name': 'Nicola', 'probability': 0.71, 'gender': 'female', 'count': 1226}
{'name': 'Andrea', 'probability': 0.79, 'gender': 'female', 'count': 5794}
{'name': 'Alex', 'probability': 0.87, 'gender': 'male', 'count': 5856}
{'name': 'Mika', 'probability': 0.51, 'gender': 'male', 'count': 182}
{'name': 'Addison', 'probability': 0.64, 'gender': 'male', 'count': 11}
{'name': 'Ash', 'probability': 0.56, 'gender': 'male', 'count': 243}
{'name': 'Dakota', 'probability': 0.75, 'gender': 'male', 'count': 139}


These examples show that:

* names like `Andrea` or `Nicola` where the gender is highly country-specific have a higher score than common unisex names like `Mika` or `Ash`
* Alex is a nickname for either Alexander or Alexandra and is one of the most evenly divided gender-neutral names. Its probability value here is quite high with 0.87

### Check for nonsense words

In [8]:
results = Genderize().get(['the', 'a', 'with', 'an', 'I', 'my'])

In [9]:
for r in results:
    print(r)

{'name': 'the', 'probability': 1.0, 'gender': 'female', 'count': 1}
{'name': 'a', 'probability': 0.59, 'gender': 'male', 'count': 56}
{'name': 'with', 'gender': None}
{'name': 'an', 'probability': 0.83, 'gender': 'female', 'count': 170}
{'name': 'I', 'gender': None}
{'name': 'my', 'probability': 0.73, 'gender': 'female', 'count': 44}


Not every word which gets a gender assigned is a name. This is due to the fact that such words are sometimes part of social media names, and this is what the API is based on.

### Capital letters

In [19]:
results = Genderize().get(['pierre', 'Pierre'])

GenderizeException: ('Request limit reached', 429, {'Server': 'nginx/1.10.3 (Ubuntu)', 'Content-Type': 'application/json; charset=utf-8', 'ETag': 'W/"21-/l1SSO33EFsZ7fO3uQLHxw"', 'Content-Length': '33', 'Access-Control-Allow-Methods': 'GET', 'X-Powered-By': 'Express', 'Connection': 'keep-alive', 'Date': 'Sun, 10 Dec 2017 15:22:44 GMT', 'Access-Control-Allow-Headers': 'Content-Type', 'Access-Control-Allow-Origin': '*'})

In [None]:
for r in results:
    print(r)

## Test on zbMATH data - full 400 records

In [2]:
zbmath = GenderizeIoEvaluator("test_data/test_data_zbmath_full.csv")
zbmath.load_data()

In [3]:
zbmath.test_data.head()

Unnamed: 0,first_name,middle_name,last_name,full_name,gender
0,pierre,paul,grivel,pierre paul grivel,m
1,raul,,serapioni,raul serapioni,m
2,adriano,,moura,adriano moura,m
3,ralf,,kieser,ralf kieser,m
4,teppei,,ariyoshi,teppei ariyoshi,u


In [4]:
zbmath.fetch_gender()

Fetching gender data from API of service genderize_io
Saving data to dump file test_data/test_data_zbmath_full_genderize_io.csv


In [5]:
zbmath.test_data.head()

Unnamed: 0,first_name,middle_name,last_name,full_name,gender,count,gender_infered,name,probability
0,pierre,paul,grivel,pierre paul grivel,m,5.0,m,pierre-paul,1.0
1,raul,,serapioni,raul serapioni,m,821.0,m,raul,1.0
2,adriano,,moura,adriano moura,m,166.0,m,adriano,0.99
3,ralf,,kieser,ralf kieser,m,86.0,m,ralf,1.0
4,teppei,,ariyoshi,teppei ariyoshi,u,,u,teppei,


### Compute metrics on zbMATH data

In [6]:
zbmath.compute_confusion_matrix()

In [8]:
zbmath.confusion_matrix

Unnamed: 0,f_pred,m_pred,u_pred
f,55,0,3
m,3,271,17
u,5,27,19


Show names for which human said 'm' but algorithm said 'f' 

In [9]:
zbmath.compare_ground_truth_with_inference(true_gender='m', gender_infered='f')

Unnamed: 0,first_name,middle_name,last_name,full_name,gender,count,gender_infered,name,probability
65,jungmin,,ahn,jungmin ahn,m,9.0,f,jungmin,1.0
176,taro,,takimoto,taro takimoto,m,19.0,f,taro,0.58
326,yuan,,xu,yuan xu,m,69.0,f,yuan,0.71


Show names for which human said 'u' but algorithm said 'f' or 'm'

In [10]:
zbmath.compare_ground_truth_with_inference(true_gender='u', gender_infered='f')

Unnamed: 0,first_name,middle_name,last_name,full_name,gender,count,gender_infered,name,probability
300,ben,da,zhou,ben da zhou,u,1.0,f,benda,1.0
315,rosario,,di-nardo,rosario di-nardo,u,343.0,f,rosario,0.75
357,cai,gui,liu,cai gui liu,u,54.0,f,cai,0.76
382,yamin,,ma,yamin ma,u,2.0,f,yamin,0.5
389,tack,wang,lee,tack wang lee,u,2.0,f,tack,0.5


In [11]:
zbmath.compare_ground_truth_with_inference(true_gender='u', gender_infered='m')

Unnamed: 0,first_name,middle_name,last_name,full_name,gender,count,gender_infered,name,probability
10,sushil,,singh,sushil singh,u,91.0,m,sushil,0.99
33,seiya,,haze,seiya haze,u,5.0,m,seiya,1.0
44,jae,il,lee,jae il lee,u,90.0,m,jae,0.58
66,koji,,okuguchi,koji okuguchi,u,10.0,m,koji,0.9
67,takashi,,amisaki,takashi amisaki,u,16.0,m,takashi,1.0
95,shoichi,,suzuki,shoichi suzuki,u,5.0,m,shoichi,1.0
132,makoto,,idzumi,makoto idzumi,u,28.0,m,makoto,1.0
173,lutfi,,avazpour,lutfi avazpour,u,25.0,m,lutfi,0.96
179,cuong,,le,cuong le,u,53.0,m,cuong,1.0
218,sung,hoon,hong,sung hoon hong,u,31.0,m,sung,0.84


The API returns 'm' for most of the Chinese names in the list.

In [7]:
zbmath.compute_all_errors()

error counting prediction as 'unknown gender' as classification errors:  0.0705521472393
error ignoring prediction as 'unknown gender' :  0.00911854103343
error counting proportion of names with unpredicted gender:  0.0573065902579
error where negative value suggestes that more women than men are missclassified:  0.00911854103343


## Test on genderizeR paper data

In [15]:
genderizeR = GenderizeIoEvaluator("test_data/test_data_genderizeR.csv")
genderizeR.load_data()

In [16]:
genderizeR.test_data.head()

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender
0,"Thayer, Ann",ann,,thayer,ann thayer,u
1,"Chiesa, Paolo",paolo,,chiesa,paolo chiesa,m
2,"Abbate, Ernesto",ernesto,,abbate,ernesto abbate,m
3,"Epstein, John H.",john,,epstein,john epstein,m
4,"Cotroneo, Margaret",margaret,,cotroneo,margaret cotroneo,f


In [18]:
genderizeR.fetch_gender()

Fetching gender data from API of service genderize_io
('Request limit reached', 429, {'Server': 'nginx/1.10.3 (Ubuntu)', 'Content-Type': 'application/json; charset=utf-8', 'ETag': 'W/"21-/l1SSO33EFsZ7fO3uQLHxw"', 'Content-Length': '33', 'Access-Control-Allow-Methods': 'GET', 'X-Powered-By': 'Express', 'Connection': 'keep-alive', 'Date': 'Sun, 10 Dec 2017 14:58:30 GMT', 'Access-Control-Allow-Headers': 'Content-Type', 'Access-Control-Allow-Origin': '*'})
Saving data to dump file test_data/test_data_genderizeR_genderize_io.csv
Test data has not been evaluated yet, won't dump


In [25]:
genderizeR.test_data.head()

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender,count,gender_infered,probability
0,"Thayer, Ann",ann,,thayer,ann thayer,u,1818.0,f,0.99
1,"Chiesa, Paolo",paolo,,chiesa,paolo chiesa,m,781.0,m,0.99
2,"Abbate, Ernesto",ernesto,,abbate,ernesto abbate,m,381.0,m,1.0
3,"Epstein, John H.",john,,epstein,john epstein,m,9931.0,m,0.99
4,"Cotroneo, Margaret",margaret,,cotroneo,margaret cotroneo,f,1101.0,f,0.98


### Compute metrics on genderizeR paper data

In [26]:
genderizeR.compute_confusion_matrix()

In [27]:
genderizeR.confusion_matrix

Unnamed: 0,f_pred,m_pred,u_pred
f,83,3,4
m,13,303,16
u,51,94,7


In [28]:
genderizeR.compare_ground_truth_with_inference(true_gender='u', gender_infered='f')

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender,count,gender_infered,probability
0,"Thayer, Ann",ann,,thayer,ann thayer,u,1818.0,f,0.99
22,"Schmid, Judith Louisa",judith,louisa,schmid,judith louisa schmid,u,750.0,f,1.0
42,"Haight, Molly I.",molly,,haight,molly haight,u,780.0,f,0.99
43,"Harlow, Lisa L.",lisa,,harlow,lisa harlow,u,6394.0,f,1.0
65,"Reid, Sarah",sarah,,reid,sarah reid,u,8371.0,f,1.0
78,"Canning-Glass, Judy",judy,,canning-glass,judy canning-glass,u,1375.0,f,1.0
79,"Lo, Selina",selina,,lo,selina lo,u,176.0,f,1.0
95,"Oray, Linda Esther",linda,esther,oray,linda esther oray,u,4323.0,f,1.0
103,"Money, Anita",anita,,money,anita money,u,1519.0,f,1.0
119,"Giesecke, Robin",robin,,giesecke,robin giesecke,u,1628.0,f,0.59


The names look like typical female names. This means that human evaluators could not find exactly those persons online.

In [29]:
genderizeR.compare_ground_truth_with_inference(true_gender='f', gender_infered='m')

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender,count,gender_infered,probability
325,"Parvati, Comrade",comrade,,parvati,comrade parvati,f,1.0,m,1.0
327,"Quick, Paddy",paddy,,quick,paddy quick,f,91.0,m,0.89
542,"Ljubesic, Nikola",nikola,,ljubesic,nikola ljubesic,f,301.0,m,0.83


**error in 'ground truth data':  'Nikola Ljubešić' is 'm':** https://scholar.google.hr/citations?user=zto4fTQAAAAJ&hl=en

In [30]:
genderizeR.compare_ground_truth_with_inference(true_gender='m', gender_infered='f')

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender,count,gender_infered,probability
98,"Shakeshaft, Robin",robin,,shakeshaft,robin shakeshaft,m,1628.0,f,0.59
112,"Ji, Lilien",lilien,,ji,lilien ji,m,1.0,f,1.0
285,"Bianco, Andrea",andrea,,bianco,andrea bianco,m,5794.0,f,0.79
389,"Kovacs, Kalman",kalman,,kovacs,kalman kovacs,m,19.0,f,0.79
400,"Pujol, Jean-Louis",jean,louis,pujol,jean louis pujol,m,1523.0,f,0.53
439,"Bardon, Jean-Pierre",jean,pierre,bardon,jean pierre bardon,m,1523.0,f,0.53
451,"Shupnik, Margaret A.",margaret,,shupnik,margaret shupnik,m,1101.0,f,0.98
483,"Hartmann, Gerd K.",gerd,,hartmann,gerd hartmann,m,54.0,f,0.52
499,"Franks, Sharon E. R.",sharon,,franks,sharon franks,m,2760.0,f,0.99
504,"Weary, Peyton E.",peyton,,weary,peyton weary,m,37.0,f,0.59


In [31]:
Genderize().get(['jean-louis', 'jean-pierre'])

[{'count': 43, 'gender': 'male', 'name': 'jean-louis', 'probability': 1.0},
 {'count': 122, 'gender': 'male', 'name': 'jean-pierre', 'probability': 1.0}]

**The examples above show that the performance can be improved when full name is used.**

**Another error in ground truth data: 'Shupnik, Margaret A.' is 'f':** https://med.virginia.edu/faculty/faculty-listing/mas3x/

In [32]:
genderizeR.compute_error_with_unknown()
genderizeR.compute_error_without_unknown()
genderizeR.compute_error_unknown()
genderizeR.compute_error_gender_bias()

In [33]:
print("error counting prediction as 'unknown gender' as classification errors: ", genderizeR.error_with_unknown)
print("error ignoring prediction as 'unknown gender' : ", genderizeR.error_without_unknown)
print("error counting proportion of names with unpredicted gender: ", genderizeR.error_unknown)
print("error where negative value suggests that more women than men are missclassified: ", genderizeR.error_gender_bias)

error counting prediction as 'unknown gender' as classification errors:  0.0932642487047
error ignoring prediction as 'unknown gender' :  0.0398009950249
error counting proportion of names with unpredicted gender:  0.0473933649289
error where negative value suggests that more women than men are missclassified:  0.0398009950249


## Test on genderizeR paper data - titles data

In [7]:
genderizeR = GenderizeIoEvaluator("test_data/test_data_genderizeR_titles.csv")
genderizeR.load_data()

In [35]:
genderizeR.test_data.head()

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender
0,Nancy Mary Adams,nancy,mary,adams,nancy mary adams,f
1,Lloyd R Sutherland,lloyd,,sutherland,lloyd sutherland,m
2,John H. Hubbell,john,,hubbell,john hubbell,m
3,Frank Kyte,frank,,kyte,frank kyte,m
4,Nobuo Tanaka,nobuo,,tanaka,nobuo tanaka,m


In [36]:
genderizeR.fetch_gender()

Reading data from dump file test_data/test_data_genderizeR_titles_genderize_io.csv


In [37]:
genderizeR.test_data.head()

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender,count,gender_infered,probability
0,Nancy Mary Adams,nancy,mary,adams,nancy mary adams,f,2716.0,f,1.0
1,Lloyd R Sutherland,lloyd,,sutherland,lloyd sutherland,m,159.0,m,0.99
2,John H. Hubbell,john,,hubbell,john hubbell,m,9931.0,m,0.99
3,Frank Kyte,frank,,kyte,frank kyte,m,1565.0,m,1.0
4,Nobuo Tanaka,nobuo,,tanaka,nobuo tanaka,m,,u,


### Compute metrics on genderizeR paper data

In [38]:
genderizeR.compute_confusion_matrix()

In [39]:
genderizeR.confusion_matrix

Unnamed: 0,f_pred,m_pred,u_pred
f,57,6,6
m,15,398,22
u,0,0,0


In [40]:
genderizeR.compare_ground_truth_with_inference(true_gender='u', gender_infered='f')

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender,count,gender_infered,probability


In [41]:
genderizeR.compare_ground_truth_with_inference(true_gender='f', gender_infered='m')

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender,count,gender_infered,probability
142,Kennedy Shriver,kennedy,,shriver,kennedy shriver,f,37.0,m,0.57
197,Smith Taylor,smith,,taylor,smith taylor,f,28.0,m,0.68
233,Wang Xiaotang,wang,,xiaotang,wang xiaotang,f,54.0,m,0.74
265,Carlson Muyskens,carlson,,muyskens,carlson muyskens,f,1.0,m,1.0
278,Edwin B. Newman,edwin,,newman,edwin newman,f,819.0,m,1.0
418,Levi Montalcini,levi,,montalcini,levi montalcini,f,139.0,m,0.95


In [42]:
genderizeR.compare_ground_truth_with_inference(true_gender='m', gender_infered='f')

Unnamed: 0,raw_name,first_name,middle_name,last_name,full_name,gender,count,gender_infered,probability
6,Ma Ting Ying,ma,ting,ying,ma ting ying,m,251.0,f,0.62
54,Sason Shaik,sason,,shaik,sason shaik,m,1.0,f,1.0
87,Lavett Smith,lavett,,smith,lavett smith,m,1.0,f,1.0
149,Marian Cehelnik,marian,,cehelnik,marian cehelnik,m,404.0,f,0.82
191,Ashley Morris,ashley,,morris,ashley morris,m,3897.0,f,0.9
202,Aubrey Gorbman,aubrey,,gorbman,aubrey gorbman,m,113.0,f,0.82
240,Jean Glenisson,jean,,glenisson,jean glenisson,m,1523.0,f,0.53
266,Kari Sajavaara,kari,,sajavaara,kari sajavaara,m,467.0,f,0.91
284,Leslie Alan Shepard,leslie,alan,shepard,leslie alan shepard,m,1173.0,f,0.88
339,Robin Mckenzie,robin,,mckenzie,robin mckenzie,m,1628.0,f,0.59


In [43]:
Genderize().get(['leslie alan', 'leslie-alan', 'leslie', 'alan'])

[{'gender': None, 'name': 'leslie alan'},
 {'gender': None, 'name': 'leslie-alan'},
 {'count': 1173, 'gender': 'female', 'name': 'leslie', 'probability': 0.88},
 {'count': 2079, 'gender': 'male', 'name': 'alan', 'probability': 1.0}]

In [44]:
genderizeR.compute_error_with_unknown()
genderizeR.compute_error_without_unknown()
genderizeR.compute_error_unknown()
genderizeR.compute_error_gender_bias()

In [45]:
print("error counting prediction as 'unknown gender' as classification errors: ", genderizeR.error_with_unknown)
print("error ignoring prediction as 'unknown gender' : ", genderizeR.error_without_unknown)
print("error counting proportion of names with unpredicted gender: ", genderizeR.error_unknown)
print("error where negative value suggests that more women than men are missclassified: ", genderizeR.error_gender_bias)

error counting prediction as 'unknown gender' as classification errors:  0.107692307692
error ignoring prediction as 'unknown gender' :  0.0441176470588
error counting proportion of names with unpredicted gender:  0.0555555555556
error where negative value suggests that more women than men are missclassified:  0.0441176470588


## From here: TODO

## Define different models based on `count` and `probability`

### Grid Search

In [46]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split