In [5]:
import pandas

print("Importing names... ")
n = pandas.read_csv("data/names.csv", usecols=["name", "n_publs", "likely_gender", "score"])
print("Names imported.")

Importing names... 
Names imported.


In [7]:
print("Retreiving a random sample... ")
names = n.sample(5000)
print("Sample retreived.")

Retreiving a random sample... 
Sample retreived.


In [8]:
print("Saving sample to CSV... ")
names.to_csv("data/names_smpl.csv")
print("Sample saved!")

Saving sample to CSV... 
Sample saved!


In [9]:
import pandas

print("Importing sample names... ")
names = pandas.read_csv("data/names_smpl.csv", usecols=["name", "n_publs", "likely_gender", "score"])
print("Names imported.")

Importing sample names... 
Names imported.


In [10]:
# Setting index & accessing cells: https://pythonhow.com/accessing-dataframe-columns-rows-and-cells/
names = names.set_index("name", drop = False)
print(names)

                                                            name  n_publs  \
name                                                                        
J.-L. Goffin                                        J.-L. Goffin        1   
Brian Manuel González-Contreras  Brian Manuel González-Contreras        3   
Lamia Benmouffok                                Lamia Benmouffok        2   
Vani Vijayakumar                                Vani Vijayakumar        1   
A. R. Hale                                            A. R. Hale        7   
Rajanik Mark Jayasuriya                  Rajanik Mark Jayasuriya        1   
Klaus Illgner                                      Klaus Illgner       16   
Roberto Gómez-García                        Roberto Gómez-García       39   
François Sterboul                              François Sterboul        1   
A. Trigg                                                A. Trigg        1   
Isil Aksan Kurnaz                              Isil Aksan Kurnaz        2   

In [11]:
print("Getting private key... ")
# Get private API Key for NamSor API v2 (contained in txt file)
key = ''

# Import personal key
with open("key.txt", "r") as file:
    key = file.read()

if(len(key) > 0):
    print("Got private key.")
else: 
    print("Could not find private key. Please check the file name and make sure you have an API key.")

Getting private key... 
Got private key.


In [12]:
# Trying out NamSor API v2 to get the gender of a name
# https://www.namsor.com/
# https://v2.namsor.com/NamSorAPIv2/apidoc.html
# using NamSor API v2 Python SDK
# https://github.com/namsor/namsor-python-sdk2
# licensed under GNU Affero General Public License v3.0

# Alternatives? https://genderize.io/ -> But only first name!

# Following script taken from https://github.com/namsor/namsor-python-sdk2 "Getting Started" 
# and adapted to keep key private and remove unnecessary lines.
# It tests the connection to the NamSor API

print("Testing NamSor API v2 connection...")

import openapi_client
from openapi_client.rest import ApiException

# Configure API key authorization: api_key
configuration = openapi_client.Configuration()
configuration.api_key['X-API-KEY'] = key

Testing NamSor API v2 connection...


In [13]:
# create an instance of the API class
admin_api_instance = openapi_client.AdminApi(openapi_client.ApiClient(configuration))

try:
    # Print current API usage.
    api_response = admin_api_instance.api_usage()
    print(api_response)
    print("NamSor API v2 connection successfull!")
except ApiException as e:
    print("Exception when calling AdminApi: api_usage: %s\n" % e)

{'billing_period': {'api_key': '25990686d47a400154af01a141ad3e1f',
                    'billing_status': 'OPEN',
                    'hard_limit': 5000,
                    'period_ended': 0,
                    'period_started': 1552463739841,
                    'soft_limit': 5000,
                    'stripe_current_period_end': 0,
                    'stripe_current_period_start': 0,
                    'subscription_started': 1552463739841,
                    'usage': -4999980},
 'overage_currency': None,
 'overage_excl_tax': 0.0,
 'overage_incl_tax': 0.0,
 'overage_quantity': 0,
 'subscription': {'api_key': '25990686d47a400154af01a141ad3e1f',
                  'currency': 'usd',
                  'currency_factor': 1.0,
                  'plan_base_fees_key': 'namsorapi_v2_BASIC_usd',
                  'plan_ended': 0,
                  'plan_name': 'BASIC',
                  'plan_quota': 5000,
                  'plan_started': 1552463739841,
                  'plan_status': 'O

In [14]:
# create an instance of the API class
pers_api_instance = openapi_client.PersonalApi(openapi_client.ApiClient(configuration))

In [15]:
# Testing a single name in the API
print("Getting gender of a name for testing... ")

t = names.sample(1)

print(t)

Getting gender of a name for testing... 
                name  n_publs   likely_gender  score
name                                                
D. Nalley  D. Nalley        1  not determined      0


In [16]:
testname = t.loc["D. Nalley","name"]

print("Chose to test {}. Continuing...".format(testname))

Chose to test D. Nalley. Continuing...


In [None]:
try:
    api_response = pers_api_instance.gender_full(testname)
    print(api_response)
except ApiException as e:
    print("Exception when calling AdminApi: api_usage: %s\n" % e)

print("Name {} is {} with a chance of {}.".format(testname, api_response.likely_gender, abs(api_response.gender_scale)))

In [17]:
# Testing the API for a batch of names
tst_names = names[:10]
print("Going to test the following names: {}".format(tst_names))

Going to test the following names:                                                             name  n_publs  \
name                                                                        
J.-L. Goffin                                        J.-L. Goffin        1   
Brian Manuel González-Contreras  Brian Manuel González-Contreras        3   
Lamia Benmouffok                                Lamia Benmouffok        2   
Vani Vijayakumar                                Vani Vijayakumar        1   
A. R. Hale                                            A. R. Hale        7   
Rajanik Mark Jayasuriya                  Rajanik Mark Jayasuriya        1   
Klaus Illgner                                      Klaus Illgner       16   
Roberto Gómez-García                        Roberto Gómez-García       39   
François Sterboul                              François Sterboul        1   
A. Trigg                                                A. Trigg        1   

                                  likely

In [18]:
# Formatting the names using the API's models
def createPersonalNameIn(name):
    return openapi_client.PersonalNameIn(id=name, name=name)

list_of_names = list(map(createPersonalNameIn, tst_names.index.values))

In [19]:
print(list_of_names)
print("The formatted names look like that: {}".format(list_of_names))

[{'id': 'J.-L. Goffin', 'name': 'J.-L. Goffin'}, {'id': 'Brian Manuel González-Contreras',
 'name': 'Brian Manuel González-Contreras'}, {'id': 'Lamia Benmouffok', 'name': 'Lamia Benmouffok'}, {'id': 'Vani Vijayakumar', 'name': 'Vani Vijayakumar'}, {'id': 'A. R. Hale', 'name': 'A. R. Hale'}, {'id': 'Rajanik Mark Jayasuriya', 'name': 'Rajanik Mark Jayasuriya'}, {'id': 'Klaus Illgner', 'name': 'Klaus Illgner'}, {'id': 'Roberto Gómez-García', 'name': 'Roberto Gómez-García'}, {'id': 'François Sterboul', 'name': 'François Sterboul'}, {'id': 'A. Trigg', 'name': 'A. Trigg'}]
The formatted names look like that: [{'id': 'J.-L. Goffin', 'name': 'J.-L. Goffin'}, {'id': 'Brian Manuel González-Contreras',
 'name': 'Brian Manuel González-Contreras'}, {'id': 'Lamia Benmouffok', 'name': 'Lamia Benmouffok'}, {'id': 'Vani Vijayakumar', 'name': 'Vani Vijayakumar'}, {'id': 'A. R. Hale', 'name': 'A. R. Hale'}, {'id': 'Rajanik Mark Jayasuriya', 'name': 'Rajanik Mark Jayasuriya'}, {'id': 'Klaus Illgner', 'nam

In [20]:
batch_personal_name_in = openapi_client.BatchPersonalNameIn(personal_names=list_of_names)

In [21]:
print("Calling API to test a batch of names... ")

try:
    api_response = pers_api_instance.gender_full_batch(batch_personal_name_in=batch_personal_name_in)
    print(api_response)
    print("Names successfully analyzed")
except ApiException as e:
    print("Exception when calling PersonalApi: gender_full_batch: %s\n" % e)

Calling API to test a batch of names... 
{'personal_names': [{'gender_scale': -1.0,
                     'id': 'J.-L. Goffin',
                     'likely_gender': 'male',
                     'name': 'J.-L. Goffin',
                     'score': 1.9698475400442068},
                    {'gender_scale': -1.0,
                     'id': 'Brian Manuel González-Contreras',
                     'likely_gender': 'male',
                     'name': 'Brian Manuel González-Contreras',
                     'score': 11.691685856454608},
                    {'gender_scale': 1.0,
                     'id': 'Lamia Benmouffok',
                     'likely_gender': 'female',
                     'name': 'Lamia Benmouffok',
                     'score': 2.58457361242469},
                    {'gender_scale': 1.0,
                     'id': 'Vani Vijayakumar',
                     'likely_gender': 'female',
                     'name': 'Vani Vijayakumar',
                     'score': 4.938766878522

In [22]:
# Now testing the repeated calling of the API, cycling through a list of names, sending in
# one batch at a time and saving the result answer by answer.

batch_size = 3
start = 0
end = batch_size
result = []

names_stack = list(tst_names.index.values)

list_of_names = list(map(createPersonalNameIn, tst_names.index.values))

while (len(names_stack) >= batch_size):
    try:
        current_batch = list(map(createPersonalNameIn, names_stack[start:end]))
        batch_personal_name_in = openapi_client.BatchPersonalNameIn(personal_names=current_batch)
        api_response = pers_api_instance.gender_full_batch(batch_personal_name_in=batch_personal_name_in)
        result = result + api_response.personal_names
        
        del names_stack[start:end]
        
        # get remaining names if they are less than a batch size
        if(len(names_stack) < batch_size and len(names_stack) > 0):
            current_batch = list(map(createPersonalNameIn, names_stack))
            batch_personal_name_in = openapi_client.BatchPersonalNameIn(personal_names=current_batch)
            api_response = pers_api_instance.gender_full_batch(batch_personal_name_in=batch_personal_name_in)
            result = result + api_response.personal_names
            names_stack = []
        
        print(api_response)
        print(result)
        print("Batch of names analyzed")
    except ApiException as e:
        print("Exception when calling PersonalApi: gender_full_batch: %s\n" % e)


print("All batches analyzed.")

{'personal_names': [{'gender_scale': -1.0,
                     'id': 'J.-L. Goffin',
                     'likely_gender': 'male',
                     'name': 'J.-L. Goffin',
                     'score': 1.9698475400442068},
                    {'gender_scale': -1.0,
                     'id': 'Brian Manuel González-Contreras',
                     'likely_gender': 'male',
                     'name': 'Brian Manuel González-Contreras',
                     'score': 11.691685856454608},
                    {'gender_scale': 1.0,
                     'id': 'Lamia Benmouffok',
                     'likely_gender': 'female',
                     'name': 'Lamia Benmouffok',
                     'score': 2.58457361242469}]}
[{'gender_scale': -1.0,
 'id': 'J.-L. Goffin',
 'likely_gender': 'male',
 'name': 'J.-L. Goffin',
 'score': 1.9698475400442068}, {'gender_scale': -1.0,
 'id': 'Brian Manuel González-Contreras',
 'likely_gender': 'male',
 'name': 'Brian Manuel González-Contreras',
 'scor

In [23]:
# Convert results (list of openapi_client.models.personal_name_gendered_out.PersonalNameGenderedOut) to (list of dictionaries)

for oapi_el in result:
    tst_names.at[oapi_el.name, 'likely_gender'] = oapi_el.likely_gender
    tst_names.at[oapi_el.name, 'score'] = oapi_el.score

In [24]:
tst_names

Unnamed: 0_level_0,name,n_publs,likely_gender,score
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
J.-L. Goffin,J.-L. Goffin,1,male,1
Brian Manuel González-Contreras,Brian Manuel González-Contreras,3,male,11
Lamia Benmouffok,Lamia Benmouffok,2,female,2
Vani Vijayakumar,Vani Vijayakumar,1,female,4
A. R. Hale,A. R. Hale,7,male,3
Rajanik Mark Jayasuriya,Rajanik Mark Jayasuriya,1,male,6
Klaus Illgner,Klaus Illgner,16,male,7
Roberto Gómez-García,Roberto Gómez-García,39,male,9
François Sterboul,François Sterboul,1,male,9
A. Trigg,A. Trigg,1,male,1
