## Making TSVs from Enslaved TTL Files for individuals that exist on both Enslaved and Wikipedia

### Each TSV should contain the triples from Wikidata as:
Subject ->  Predicate   -> Object


In [8]:
import json
from tqdm import tqdm # type: ignore
import os
import requests # type: ignore
from icecream import ic # type: ignore

In [41]:
# Dictionary to store found QIDs to avoid redundant API calls
foundQIDs = {}
QIDCount = {}

# Dictionary to store found Props to avoid redundant API calls
props = {}
with open("enslavedPropList.tsv", "r") as f:
    for line in f:
        line = line.strip().split("\t")
        props.update({line[0]: line[1]})
PropsCount = {}

# Stores any errors to then print out at the end
errors = []

#Base Query URL for  Enslaved
baseURL = "https://lod.enslaved.org/w/api.php?action=wbgetentities&format=json&ids="

testEntity = "Q490761"
#ic(requests.get(baseURL + testEntity).json()["entities"][testEntity]["labels"]["en"]["value"])

In [42]:
def findQID(str):
    if str in foundQIDs:
       #print(f"Found {str}: {foundQIDs[str]}")
        QIDCount.update({str: QIDCount.get(str, 0) + 1})
        return foundQIDs[str]
    else:
       #print(f"Querying {str}")
        endURL = f"{str}"
        response = requests.get(baseURL + endURL)
        if response.status_code != 200:
            errors.append(f"Error: {response.status_code} for {str}")
            return None
        else:
            label = response.json()["entities"][str]["labels"]["en"]["value"]
            foundQIDs.update({str: label})
            QIDCount.update({str: 0})
            return label
        
#ic(findQID(testEntity))

In [43]:
# Queries WikidataProps.json to find the property label
def findProperty(str):
    if str in props:
       #print(f"Found Property {str} in props: {props[str]}")
        PropsCount.update({str: PropsCount.get(str, 0) + 1})
        return props[str]
    else:
        endURL = f"{str}"
        response = requests.get(baseURL + endURL)
        if response.status_code != 200:
            errors.append(f"findProperty Error: {response.status_code} for {str}")
            return None
        else:
            label = response.json()["entities"][str]["labels"]["en"]["value"]
            props.update({str: label})
            PropsCount.update({str: 0})
           #ic(props[str])
            return label
        
ic(findProperty("P31"))

In [129]:

print(len(confirmedMatches))

Confirmed: Q491029 - Absalom Jones
Confirmed: Q490677 - Amanda America Dickson
Confirmed: Q490803 - Amos Fortune
Confirmed: Q490534 - Anthony Burns
Confirmed: Q490364 - Archer Alexander
Confirmed: Q491587 - Bethany Veney
Confirmed: Q490720 - Charlotte Dupuy
Confirmed: Q490499 - Clara Brown
Confirmed: Q490642 - Titus Colonel Tye Corlies
Confirmed: Q134124 - Cudjoe
Confirmed: Q490821 - David George
Confirmed: Q490385 - Elijah Abel Able
Confirmed: Q491066 - Elizabeth Hobbs Keckley
Confirmed: Q491039 - Elizabeth Key
Confirmed: Q490490 - George Freeman Bragg Jr
Confirmed: Q490952 - George Moses Horton
Confirmed: Q490641 - Hannah Crafts
Confirmed: Q490972 - Harriet Ann Jacobs
Confirmed: Q491299 - Harriet Powers
Confirmed: Q491539 - Harriet Ross Tubman
Confirmed: Q491594 - Harry Washington
Confirmed: Q490462 - Henry Walton Bibb
Confirmed: Q490515 - Henry Box Brown
Confirmed: Q490831 - Henry Highland Garnet
Confirmed: Q491095 - Jermain Wesley Loguen
Confirmed: Q490991 - John Jea
Confirmed: Q49

In [130]:
import rdflib
from rdflib import URIRef
from rdflib.namespace import Namespace

entity = "https://lod.enslaved.org/entity/"
prop = "https://lod.enslaved.org/prop/direct/"


confirmedMatches = {}
with open("confirmedMatchesv2.txt", "r") as qList:
    for line in qList.readlines():
        line = line.strip().split("\t")
        if len(line) >= 4:
            qid = line[3]
            name = line[4]
            if line[1] == "x":
                print(f"Confirmed: {qid} - {name}")
                confirmedMatches.update({qid: name})
qList.close()
print(len(confirmedMatches))

for qid in tqdm(confirmedMatches):
    filename = confirmedMatches[qid].replace(" ", "_")
    with open(f"../EnslavedTSV/{filename}.tsv", "w") as f:
        g = rdflib.Graph()
        g.parse(f"https://lod.enslaved.org/wiki/Special:EntityData/{qid}.ttl", format="ttl")
        #print(len(g))
        for s, p, o in g.triples((URIRef(f'{entity}{qid}'), None, None)):
            #print(s, p, o)
            #print("\n")
            subject = findQID(s.split("/")[-1])
            predicate = ""
            obj = ""
            

            if p.startswith("https://lod.enslaved.org/prop/direct/"):
                #print(f"pred: {p}")
                predicate = findProperty(p.split("/")[-1])
            #    print(f"predicate: {predicate}")
            #print(f"obj: {o}")
            if o.startswith("https://lod.enslaved.org/entity/Q"):
            #    print(f"obj: {o}")
                object = findQID(o.split("/")[-1])
            #    print(f"object: {object}")
            elif o.startswith("https://lod.enslaved.org/entity/statement/"):
                object = ""
            else:
                object = o
            #    print(f"obj: {o}")
            #    print(f"object: {object}")
    #        if object.startswith("Q"):
            #     object = object.split("-")[0]
            #     object = findQID(object)
            # if object.startswith("P"):
            #     object = findProperty(object)

            if subject != "" and predicate != "" and object != "":
                f.write(f"{subject}\t{predicate}\t{object}\n")
                
        f.close()


Confirmed: Q491029 - Absalom Jones
Confirmed: Q490677 - Amanda America Dickson
Confirmed: Q490803 - Amos Fortune
Confirmed: Q490534 - Anthony Burns
Confirmed: Q490364 - Archer Alexander
Confirmed: Q491587 - Bethany Veney
Confirmed: Q490720 - Charlotte Dupuy
Confirmed: Q490499 - Clara Brown
Confirmed: Q490642 - Titus Colonel Tye Corlies
Confirmed: Q134124 - Cudjoe
Confirmed: Q490821 - David George
Confirmed: Q490385 - Elijah Abel Able
Confirmed: Q491066 - Elizabeth Hobbs Keckley
Confirmed: Q491039 - Elizabeth Key
Confirmed: Q490490 - George Freeman Bragg Jr
Confirmed: Q490952 - George Moses Horton
Confirmed: Q490641 - Hannah Crafts
Confirmed: Q490972 - Harriet Ann Jacobs
Confirmed: Q491299 - Harriet Powers
Confirmed: Q491539 - Harriet Ross Tubman
Confirmed: Q491594 - Harry Washington
Confirmed: Q490462 - Henry Walton Bibb
Confirmed: Q490515 - Henry Box Brown
Confirmed: Q490831 - Henry Highland Garnet
Confirmed: Q491095 - Jermain Wesley Loguen
Confirmed: Q490991 - John Jea
Confirmed: Q49

  0%|          | 0/57 [00:00<?, ?it/s]

100%|██████████| 57/57 [00:36<00:00,  1.55it/s]
