# Infocard Prototype

This prototype seeks to sketch out possible interfaces that can pull data from names authority databases and supplement the Primary Source Coop.

In [31]:
import requests, re
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup

abs_dir = '/Users/quinn.wi/Documents/Data/JQA_pre-2021-04-14/'

In [65]:
%%time

names_data = pd.read_excel(abs_dir + 'DJQA_Names-List_singleSheet.xlsx', index_col = None) 

names_data.columns = names_data.columns.str.replace('\s', '_')

# names_data = names_data.query('(Last_Name == "Randolph") & (First_Name == "John")')

names_data = names_data.dropna(subset = ['LC_Name_Authority'])

CPU times: user 1min 4s, sys: 350 ms, total: 1min 5s
Wall time: 1min 6s


Expected Data Structure:

```json
{xml_id: 
    {
        loc_id: {info_keys: info_values},
        snac_id: {info_keys: info_values},
        viaf_id: {info_keys: info_values},
        wiki_id: {info_keys: info_values}
    }
}
```

In [57]:
%%time

def parseLOC(identifier):
#     Lookup URI.
    url = f"https://id.loc.gov/authorities/names/{identifier}.madsxml.xml"
    response = requests.get(url).text
    
#     Parse XML with namespace from string.
    root = ET.fromstring(response)
    namespace = re.match(r"{(.*)}", str(root.tag))
    ns = {"ns":namespace.group(1)}
    
#     Gather information.
    namePart = root.find('.//ns:name[@authority="naf"]/ns:namePart', ns).text
    birthDeathDate = root.find('.//ns:name[@authority="naf"]/ns:namePart[@type="date"]', ns).text
    genderedTerm = root.find('.//ns:genderTerm', ns).text
    
    return {identifier: {"name": namePart, "birthDeath": birthDeathDate, "genderedTerm": genderedTerm}}
    

def parseSNAC(identifier):
#     Lookup URI & get JSON format.
    url = f"https://snaccooperative.org/download/{identifier}?type=constellation_json"
    response = requests.get(url).json()

    namePart = response['nameEntries'][0]['original']
    birthDeathDate = re.search('\d{4}-\d{4}', namePart).group(0)
    
    return {identifier: {'name': namePart, 'birthDeath': birthDeathDate}}

CPU times: user 4 µs, sys: 1 µs, total: 5 µs
Wall time: 7.15 µs


In [58]:
%%time

# locID for Lydia Child
locID = "n80001490"

# snacID
snacID = '84910652'

# parseLOC(locID)
parseSNAC(snacID)

CPU times: user 50.6 ms, sys: 16.5 ms, total: 67.1 ms
Wall time: 4.49 s


{'84910652': {'name': 'Child, Lydia Maria, 1802-1880',
  'birthDeath': '1802-1880'}}