In [None]:
# STEP 3 A: TEST
# In this notebook, the functioning of NamSor API and NamSor API SDK for Python is tested.

In [None]:
# >>> Import 'names.csv' (ignore if you have 'names_smlp.csv')

import pandas

print("Importing names... ")
n = pandas.read_csv("data/names.csv", usecols=["name", "n_publs", "likely_gender", "score"])
print("Names imported.")

In [None]:
# >>> Create a sample of names (ignore if you have 'names_smlp.csv')

print("Retreiving a random sample... ")
names = n.sample(5000)
print("Sample retreived.")

In [None]:
# >>> Save the sample of names for later use (ignore if you have 'names_smlp.csv')

print("Saving sample to CSV... ")
names.to_csv("data/names_smpl.csv")
print("Sample saved!")

In [None]:
# >>> Import sample names from 'names_smpl.csv'
import pandas

print("Importing sample names... ")
names = pandas.read_csv("data/names_smpl.csv", usecols=["name", "n_publs", "likely_gender", "score"])
print("Names imported.")

In [None]:
# Setting index & accessing cells: https://pythonhow.com/accessing-dataframe-columns-rows-and-cells/
names = names.set_index("name", drop = False)
print("Some names: {}".format(names[:10]))

In [None]:
# >>> Test the NamSorAPI: 
# using NamSor API v2 Python SDK
# https://github.com/namsor/namsor-python-sdk2
# licensed under GNU Affero General Public License v3.0

# >>>A) Handling the key

# Get private API Key for NamSor API v2 (contained in txt file)
print("Getting private key... ")
key = ''

# Import personal key from file
with open("key.txt", "r") as file:
    key = file.read()

if(len(key) > 0):
    print("Got private key.")
else: 
    print("Could not find private key. Please check the file name and make sure you have an API key.")

In [None]:
# >>> Test the API: B) API Set Up

# Trying out NamSor API v2 to get the gender of a name
# https://www.namsor.com/
# https://v2.namsor.com/NamSorAPIv2/apidoc.html

# Following scripts taken from https://github.com/namsor/namsor-python-sdk2 "Getting Started" 
# and adapted to keep key private and remove unnecessary lines.
# It tests the connection to the NamSor API

import openapi_client
from openapi_client.rest import ApiException

# Configure API key authorization: api_key
configuration = openapi_client.Configuration()
configuration.api_key['X-API-KEY'] = key

In [None]:
# >>> Test the API: C) Test the connection

print("Testing NamSor API v2 connection...")

# create an instance of the Admin API class
admin_api_instance = openapi_client.AdminApi(openapi_client.ApiClient(configuration))

try:
    # Print current API usage.
    api_response = admin_api_instance.api_usage()
    print(api_response)
    print("NamSor API v2 connection successfull!")
except ApiException as e:
    print("Exception when calling AdminApi: api_usage: {}".format(e))

In [None]:
# >>> Test the API: D) Test the classification of a name

# Create an instance of the Personal API class
pers_api_instance = openapi_client.PersonalApi(openapi_client.ApiClient(configuration))

In [None]:
# Get a single row of the dataframe to test
print("Getting gender of a name for testing... ")

t = names.sample(1)

print(t)

In [None]:
# Get the name of the chosen row
testname = t.index.values[0]

print("Chose to test {}. Continuing...".format(testname))

In [None]:
# Send the name to the API 
print("Now calling API to test a single name...")

try:
    api_response = pers_api_instance.gender_full(testname)
    result = api_response
    print("Received: {}".format(result))
except ApiException as e:
    print("Exception when calling AdminApi: api_usage: {}".format(e))

if (result is not None):
    print("Name {} is {} with a score of {}.".format(testname, api_response.likely_gender, abs(api_response.gender_scale)))

In [None]:
# >>> Testing the API for a small batch of names

# Get batch of names to test
tst_names = names[:10]
print("Going to test the following names: {}".format(tst_names))

In [None]:
# Formatting the names using the API's models
def format_name(name):
    "This function formats a full name. It takes a full name and returns a PersonalNameIn"
    return openapi_client.PersonalNameIn(id=name, name=name)

print("Formatting the names. Step 1: Formatting each name.")
list_of_names = list(map(format_name, tst_names.index.values))
print("The formatted names look like that: {}".format(list_of_names))

In [None]:
print("Formatting the names. Step 2: Formatting the batch of names.")
batch_personal_name_in = openapi_client.BatchPersonalNameIn(personal_names=list_of_names)
print("The formatted batch looks like that: {}".format(batch_personal_name_in))

In [None]:
# Call the API
print("Calling API to test a batch of names... ")

try:
    api_response = pers_api_instance.gender_full_batch(batch_personal_name_in=batch_personal_name_in)
    result = api_response
    print("Names successfully analyzed. Received: {}".format(result))
except ApiException as e:
    print("Exception when calling PersonalApi: gender_full_batch: {}".format(e))

In [None]:
# >>> Testing the API for a batch of batches
# Repeated calling of the API, cycling through a list of names, sending in
# one batch at a time and saving the result answer by answer.

# Preparation
batch_size = 3
start = 0
end = batch_size
result = []

names_stack = list(tst_names.index.values)

In [None]:
def format_names(li):
    "This function formats multiple full names. It takes a list of unformatted full names and returns a list of formatted full names."
    l = list(map(format_name, li))
    return l

In [None]:
def format_batch(li):
    "This function formats a batch of formatted full names. It takes a list of formatted full names and returns a formatted batch."
    return openapi_client.BatchPersonalNameIn(personal_names=li)

In [None]:
def fullname_batch(batch):
    "This function calls the API. It takes a formatted batch of full names and returns the API response."
    return pers_api_instance.gender_full_batch(batch_personal_name_in=batch)

In [None]:
def call_api_full_batch(list_of_names):
    "This function prepares a list of unformatted names for the API call and then calls the API calling function. It returns the API's name classifications."
    current_batch = format_names(list_of_names)  # format the names
    batch_personal_name_in = format_batch(current_batch)# format the batch
    api_response =  fullname_batch(batch_personal_name_in)# call api
    return api_response.personal_names # return result

In [None]:
while (len(names_stack) >= batch_size): # for each batch
    try:
        result = result + call_api_full_batch(names_stack[start:end])
        del names_stack[start:end] # delete what we looked up already
        
        print("Batch of names analyzed. The length of the result is now {}. The names_stack length is now {}.".format(len(result), len(names_stack)))
        
        # get remaining names if they are less than a batch size
        if(len(names_stack) < batch_size and len(names_stack) > 0):
            result = result + call_api_full_batch(names_stack)
            names_stack = []
            print("Batch of names analyzed. The length of the result is now {}. The names_stack length is now {}.".format(len(result), len(names_stack)))

    except ApiException as e:
        print("Exception when calling PersonalApi: gender_full_batch: {}".format(e))

if(len(names_stack) != 0): 
    try:
        result = result + call_api_full_batch(names_stack)
        names_stack = []
        print("Batch of names analyzed. The length of the result is now {}. The names_stack length is now {}.".format(len(result), len(names_stack)))
    except ApiException as e:
        print("Exception when calling PersonalApi: gender_full_batch: {}".format(e))
        
print("All batches analyzed.")

In [None]:
# >>> Fill in dictionary with results
# Convert results (list of openapi_client.models.personal_name_gendered_out.PersonalNameGenderedOut) to (list of dictionaries)

print("Adding results to dataframe...")

for oapi_el in result:
    tst_names.at[oapi_el.id, 'likely_gender'] = oapi_el.likely_gender
    tst_names.at[oapi_el.id, 'score'] = oapi_el.score
    
print("Results added to dataframe. {}".format(tst_names))