In [2]:
import requests   # best library to manage HTTP transactions
from bs4 import BeautifulSoup # web-scraping library
import json
from time import sleep
import csv
import math
from fuzzywuzzy import fuzz # fuzzy logic matching
from fuzzywuzzy import process
import xml.etree.ElementTree as et # library to traverse XML tree
import urllib
import datetime
import string
from pathlib import Path

# ---------------
# Configuration data
# ---------------

graph_name = 'https://github.com/HeardLibrary/linked-data/blob/29e5d02aaf00cb890792d7dee73707603a506b3e/json_schema/bluffton_presidents.csv'
accept_media_type = 'text/turtle'
sparql_endpoint = "https://sparql.vanderbilt.edu/sparql"
request_header_dictionary = {
    #'Content-Type': 'application/sparql-query',
    'Accept' : accept_media_type
}

# Load endpoint password from file in home directory
directory = 'home'
filename = 'sparql_vanderbilt_edu_password.txt'

# ---------------
# Function definitions
# ---------------

# Load password from local drive
# value of directory should be either 'home' or 'working'
def load_credential(filename, directory):
    cred = ''
    # to change the script to look for the credential in the working directory, change the value of home to empty string
    if directory == 'home':
        home = str(Path.home()) #gets path to home directory; supposed to work for Win and Mac
        credential_path = home + '/' + filename
    else:
        directory = 'working'
        credential_path = filename
    try:
        with open(credential_path, 'rt', encoding='utf-8') as file_object:
            cred = file_object.read()
    except:
        print(filename + ' file not found - is it in your ' + directory + ' directory?')
        exit()
    return(cred)

def retrieve_direct_statements(sparql_endpoint):
    query = '''
construct {?item ?directProp ?value.}
from <''' + graph_name + '''>
where {
  ?item ?p ?statement.
  ?statement ?ps ?value.
  filter(substr(str(?ps),1,39)="http://www.wikidata.org/prop/statement/")
  bind(substr(str(?ps),40) as ?id)
  bind(substr(str(?p),30) as ?id)
  bind(iri(concat("http://www.wikidata.org/prop/direct/", ?id)) as ?directProp)
  }
'''
    results = []
    r = requests.get(sparql_endpoint, params={'query' : query}, headers=request_header_dictionary)
    return r.text

def perform_sparql_update(sparql_endpoint, pwd, update_command):
    # SPARQL Update requires HTTP POST
    hdr = {'Content-Type' : 'application/sparql-update'}
    r = requests.post(sparql_endpoint, auth=('admin', pwd), headers=hdr, data = update_command)
    print(str(r.status_code) + ' ' + r.url)
    print(r.text)


In [3]:
# ---------------
# Construct the direct property statements entailed by the Wikibase model and retrieve from endpoint 
# ---------------
pwd = load_credential(filename, directory)

graph_text = retrieve_direct_statements(sparql_endpoint)
#print(graph_text)
print('constructed triples retrieved')

constructed triples retrieved


In [4]:
# remove prefixes from response Turtle, which are not necessary since IRIs are unabbreviated
graph_text_list = graph_text.split('\n')
# print(graph_text_list)
graph_text = ''
for line in graph_text_list:
    try:
        if line[0] != '@':
            graph_text += line + '\n'
    except:
        pass
#print()
#print(graph_text)

In [5]:
# Send SPARQL 1.1 UPDATE to endpoint to add the constructed triples into the graph

update_command = '''INSERT DATA
{ GRAPH <''' + graph_name + '''> { 
''' + graph_text + '''
}}'''

#print(update_command)

perform_sparql_update(sparql_endpoint, pwd, update_command)

print()
print('done')

200 https://sparql.vanderbilt.edu/sparql
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"><html><head><meta http-equiv="Content-Type" content="text&#47;html;charset=UTF-8"><title>blazegraph&trade; by SYSTAP</title
></head
><body<p>totalElapsed=0ms, elapsed=0ms, connFlush=0ms, batchResolve=0, whereClause=0ms, deleteClause=0ms, insertClause=0ms</p
><hr><p>COMMIT: totalElapsed=251ms, commitTime=1598157003429, mutationCount=40</p
></html
>

done
