In [1]:
import pandas

print("Importing names... ")
names = pandas.read_csv("data/names.csv", usecols=["name", "n_publs", "likely_gender", "score"])
print("Names imported.")

Importing names... 
Names imported.


In [2]:
# Setting index & accessing cells: https://pythonhow.com/accessing-dataframe-columns-rows-and-cells/
names = names.set_index("name", drop = False)
print(names)

                                          name  n_publs   likely_gender  score
name                                                                          
'Maseka Lesaoana              'Maseka Lesaoana        2  not determined      0
(David) Jing Dai              (David) Jing Dai        1  not determined      0
(Max) Zong-Ming Cheng    (Max) Zong-Ming Cheng        2  not determined      0
(Sophy) Shu-Jiun Chen    (Sophy) Shu-Jiun Chen        2  not determined      0
(Zhou) Bryan Bai              (Zhou) Bryan Bai        2  not determined      0
A Clara Kanmani                A Clara Kanmani        1  not determined      0
A Lun                                    A Lun        1  not determined      0
A Min Tjoa                          A Min Tjoa      211  not determined      0
A S Akshaya                        A S Akshaya        1  not determined      0
A'ang Subiyakto                A'ang Subiyakto        2  not determined      0
A'fza Shafie                      A'fza Shafie      

In [3]:
print("Getting private key... ")
# Get private API Key for NamSor API v2 (contained in txt file)
key = ''

# Import personal key
with open("key.txt", "r") as file:
    key = file.read()

if(len(key) > 0):
    print("Got private key.")
else: 
    print("Could not find private key. Please check the file name and make sure you have an API key.")

Getting private key... 
Got private key.


In [4]:
# Trying out NamSor API v2 to get the gender of a name
# https://www.namsor.com/
# https://v2.namsor.com/NamSorAPIv2/apidoc.html
# using NamSor API v2 Python SDK
# https://github.com/namsor/namsor-python-sdk2
# licensed under GNU Affero General Public License v3.0

# Alternatives? https://genderize.io/ -> But only first name!

# Following script taken from https://github.com/namsor/namsor-python-sdk2 "Getting Started" 
# and adapted to keep key private and remove unnecessary lines.
# It tests the connection to the NamSor API

print("Setting up NamSor API v2 connection settings...")

import openapi_client
from openapi_client.rest import ApiException

# Configure API key authorization: api_key
configuration = openapi_client.Configuration()
configuration.api_key['X-API-KEY'] = key
# create an instance of the API class
pers_api_instance = openapi_client.PersonalApi(openapi_client.ApiClient(configuration))

Setting up NamSor API v2 connection settings...


In [5]:
# Formatting the names using the API's models
def createPersonalNameIn(name):
    return openapi_client.PersonalNameIn(id=name, name=name)

list_of_names = list(map(createPersonalNameIn, names.index.values))

In [9]:
print("The formatted names look like that: {}".format(list_of_names[:100]))
'''
To classify by hand: [
{'id': '(David) Jing Dai', 'name': '(David) Jing Dai'}, 
{'id': '(Max) Zong-Ming Cheng', 'name': '(Max) Zong-Ming Cheng'}, 
{'id': '(Sophy) Shu-Jiun Chen', 'name': '(Sophy) Shu-Jiun Chen'}, 
{'id': '(Zhou) Bryan Bai', 'name': '(Zhou) Bryan Bai'}]
'''

The formatted names look like that: [{'id': "'Maseka Lesaoana", 'name': "'Maseka Lesaoana"}, {'id': '(David) Jing Dai', 'name': '(David) Jing Dai'}, {'id': '(Max) Zong-Ming Cheng', 'name': '(Max) Zong-Ming Cheng'}, {'id': '(Sophy) Shu-Jiun Chen', 'name': '(Sophy) Shu-Jiun Chen'}, {'id': '(Zhou) Bryan Bai', 'name': '(Zhou) Bryan Bai'}, {'id': 'A Clara Kanmani', 'name': 'A Clara Kanmani'}, {'id': 'A Lun', 'name': 'A Lun'}, {'id': 'A Min Tjoa', 'name': 'A Min Tjoa'}, {'id': 'A S Akshaya', 'name': 'A S Akshaya'}, {'id': "A'ang Subiyakto", 'name': "A'ang Subiyakto"}, {'id': "A'fza Shafie", 'name': "A'fza Shafie"}, {'id': "A'lishia Bowman", 'name': "A'lishia Bowman"}, {'id': "A'na Wang", 'name': "A'na Wang"}, {'id': "A'zraa Afhzan Ab Rahim", 'name': "A'zraa Afhzan Ab Rahim"}, {'id': 'A-Chuan Hsueh', 'name': 'A-Chuan Hsueh'}, {'id': 'A-Hadi N. Ahmed', 'name': 'A-Hadi N. Ahmed'}, {'id': 'A-Imam Al-Sammak', 'name': 'A-Imam Al-Sammak'}, {'id': 'A-Long Jin', 'name': 'A-Long Jin'}, {'id': 'A-Min Z

'\nThe formatted names look like that: [\n{\'id\': \'(David) Jing Dai\', \'name\': \'(David) Jing Dai\'}, \n{\'id\': \'(Max) Zong-Ming Cheng\', \'name\': \'(Max) Zong-Ming Cheng\'}, \n{\'id\': \'(Sophy) Shu-Jiun Chen\', \'name\': \'(Sophy) Shu-Jiun Chen\'}, \n{\'id\': \'(Zhou) Bryan Bai\', \'name\': \'(Zhou) Bryan Bai\'}, \n{\'id\': \'A Clara Kanmani\', \'name\': \'A Clara Kanmani\'}, \n{\'id\': \'A Lun\', \'name\': \'A Lun\'}, \n{\'id\': \'A Min Tjoa\', \'name\': \'A Min Tjoa\'}, \n{\'id\': \'A S Akshaya\', \'name\': \'A S Akshaya\'}, \n{\'id\': "A\'ang Subiyakto", \'name\': "A\'ang Subiyakto"}]\n\n'

In [20]:
batch_personal_name_in = openapi_client.BatchPersonalNameIn(personal_names=list_of_names)

In [None]:
len(list_of_names) 

In [22]:
# Now testing the repeated calling of the API, cycling through a list of names, sending in
# one batch at a time and saving the result answer by answer.

batch_size = 1000 #1000 is the API limit given by NamSor
start = 0
end = batch_size
result = []

names_stack = list(names.index.values)

list_of_names = list(map(createPersonalNameIn, names.index.values))

while (len(names_stack) >= batch_size):
    try:
        current_batch = list(map(createPersonalNameIn, names_stack[start:end]))
        batch_personal_name_in = openapi_client.BatchPersonalNameIn(personal_names=current_batch)
        api_response = pers_api_instance.gender_full_batch(batch_personal_name_in=batch_personal_name_in)
        result = result + api_response.personal_names
        
        del names_stack[start:end]
        
        # get remaining names if they are less than a batch size
        if(len(names_stack) < batch_size and len(names_stack) > 0):
            current_batch = list(map(createPersonalNameIn, names_stack))
            batch_personal_name_in = openapi_client.BatchPersonalNameIn(personal_names=current_batch)
            api_response = pers_api_instance.gender_full_batch(batch_personal_name_in=batch_personal_name_in)
            result = result + api_response.personal_names
            names_stack = []
        
        print("Batch of names analyzed")
    except ApiException as e:
        print("Exception when calling PersonalApi: gender_full_batch: %s\n" % e)


print("All batches analyzed.")

{'personal_names': [{'gender_scale': -1.0,
                     'id': 'J.-L. Goffin',
                     'likely_gender': 'male',
                     'name': 'J.-L. Goffin',
                     'score': 1.9698475400442068},
                    {'gender_scale': -1.0,
                     'id': 'Brian Manuel González-Contreras',
                     'likely_gender': 'male',
                     'name': 'Brian Manuel González-Contreras',
                     'score': 11.691685856454608},
                    {'gender_scale': 1.0,
                     'id': 'Lamia Benmouffok',
                     'likely_gender': 'female',
                     'name': 'Lamia Benmouffok',
                     'score': 2.58457361242469}]}
[{'gender_scale': -1.0,
 'id': 'J.-L. Goffin',
 'likely_gender': 'male',
 'name': 'J.-L. Goffin',
 'score': 1.9698475400442068}, {'gender_scale': -1.0,
 'id': 'Brian Manuel González-Contreras',
 'likely_gender': 'male',
 'name': 'Brian Manuel González-Contreras',
 'scor

In [23]:
# Convert results (list of openapi_client.models.personal_name_gendered_out.PersonalNameGenderedOut) to (list of dictionaries)

for oapi_el in result:
    names.at[oapi_el.name, 'likely_gender'] = oapi_el.likely_gender
    names.at[oapi_el.name, 'score'] = oapi_el.score

In [24]:
names

Unnamed: 0_level_0,name,n_publs,likely_gender,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
J.-L. Goffin,J.-L. Goffin,1,male,1
Brian Manuel González-Contreras,Brian Manuel González-Contreras,3,male,11
Lamia Benmouffok,Lamia Benmouffok,2,female,2
Vani Vijayakumar,Vani Vijayakumar,1,female,4
A. R. Hale,A. R. Hale,7,male,3
Rajanik Mark Jayasuriya,Rajanik Mark Jayasuriya,1,male,6
Klaus Illgner,Klaus Illgner,16,male,7
Roberto Gómez-García,Roberto Gómez-García,39,male,9
François Sterboul,François Sterboul,1,male,9
A. Trigg,A. Trigg,1,male,1
