# RDF, d3, SPARQL
* Creates an RDF dump of the Elasticsearch index and converts it to `{subject: "", predicate: "", object: ""}` format to be plotted in d3.
* Experimenting with SPARQL searches within rdflib

In [21]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

from heritageconnector import datastore

from rdflib import Graph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph

import matplotlib.pyplot as plt

import networkx as nx
from networkx.readwrite import json_graph

import json

import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

from IPython.display import display

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. Create graph from ES dump

In [66]:
g = datastore.es_to_rdflib_graph()
len(g)

10070

## 2. Convert to json file

In [45]:
export_path = "triples.json"

In [48]:
# convert graph to networkx
g_nx = rdflib_to_networkx_multidigraph(g)
#nx.draw(g_nx, with_labels=False)

In [49]:
# and then to json for d3
links = json_graph.node_link_data(g_nx)['links']

rdf_links = []

for item in links:
    s = item['source']
    p = item['key']
    o = item['target']
    
    rdf_links.append({'subject': s, 'predicate': p, 'object': o})
    
with open(export_path, "w") as f:
    json.dump(rdf_links, f)


## 3. SPARQL queries

In [53]:
# select all women
sparql_query = """
SELECT * WHERE {
    ?item xsd:gender "Female"
}

"""

res = g.query(sparql_query)

print(len(res))
[row for row in res]

23


[(rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp19797')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp19558')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp17288')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp20094')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp17013')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp18982')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp23732')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp16715')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp20120')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp18161')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp15383')),
 (rdflib.term.URIRef('https://collection.sciencemuseum

In [61]:
#Â get all people with links
sparql_query = """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
CONSTRUCT WHERE {
    ?item owl:sameAs ?wdItem .
    ?object foaf:maker ?item.
}
"""

res = g.query(sparql_query)

print(len(res))
[row for row in res]

721


[(rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/objects/co835'),
  rdflib.term.URIRef('http://xmlns.com/foaf/0.1/maker'),
  rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp40549')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/objects/co1215'),
  rdflib.term.URIRef('http://xmlns.com/foaf/0.1/maker'),
  rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp23126')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/objects/co719'),
  rdflib.term.URIRef('http://xmlns.com/foaf/0.1/maker'),
  rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp100723')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/objects/co298'),
  rdflib.term.URIRef('http://xmlns.com/foaf/0.1/maker'),
  rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp44014')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/objects/co808'),
  rdfl

In [85]:
# get all triples where predicate has a certain value
query = """PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
SELECT ?s ?p ?0 WHERE {
    VALUES ?p {owl:sameAs foaf:maker}
    ?s ?p ?o.
}
"""

res = g.query(query)

print(len(res))
allres = [row for row in res]

[{"subject": i[0], "predicate": i[1], "object": i[2]} for i in allres]


1042


[{'subject': rdflib.term.URIRef('http://www.wikidata.org/entity/Q7966280'),
  'predicate': rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs'),
  'object': rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp20473')},
 {'subject': rdflib.term.URIRef('http://www.wikidata.org/entity/Q437466'),
  'predicate': rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs'),
  'object': rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp92760')},
 {'subject': rdflib.term.URIRef('http://www.wikidata.org/entity/Q595756'),
  'predicate': rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs'),
  'object': rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp109228')},
 {'subject': rdflib.term.URIRef('http://www.wikidata.org/entity/Q8750'),
  'predicate': rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs'),
  'object': rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp1072')},
 {'subject': rdfl