# RDF, d3, SPARQL
* Creates an RDF dump of the Elasticsearch index and converts it to `{subject: "", predicate: "", object: ""}` format to be plotted in d3.
* Experimenting with SPARQL searches within rdflib

In [21]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("..")

from heritageconnector import datastore

from rdflib import Graph
from rdflib.extras.external_graph_libs import rdflib_to_networkx_multidigraph

import matplotlib.pyplot as plt

import networkx as nx
from networkx.readwrite import json_graph

import json

import pandas as pd
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

from IPython.display import display

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## 1. Create graph from ES dump

In [44]:
g = datastore.es_to_rdflib_graph()
len(g)

10070

## 2. Convert to json file

In [45]:
export_path = "triples.json"

In [48]:
# convert graph to networkx
g_nx = rdflib_to_networkx_multidigraph(g)
#nx.draw(g_nx, with_labels=False)

In [49]:
# and then to json for d3
links = json_graph.node_link_data(g_nx)['links']

rdf_links = []

for item in links:
    s = item['source']
    p = item['key']
    o = item['target']
    
    rdf_links.append({'subject': s, 'predicate': p, 'object': o})
    
with open(export_path, "w") as f:
    json.dump(rdf_links, f)


## 3. SPARQL queries

In [53]:
# select all women
sparql_query = """
SELECT * WHERE {
    ?item xsd:gender "Female"
}

"""

res = g.query(sparql_query)

print(len(res))
[row for row in res]

23


[(rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp19797')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp19558')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp17288')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp20094')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp17013')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp18982')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp23732')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp16715')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp20120')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp18161')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp15383')),
 (rdflib.term.URIRef('https://collection.sciencemuseum

In [55]:
# get all people with links
sparql_query = """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
CONSTRUCT WHERE {
    ?item owl:sameAs ?wdItem .
    ?item xsd:gender ?gender .
}
"""

res = g.query(sparql_query)

print(len(res))
[row for row in res]

256


[(rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp19433'),
  rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs'),
  rdflib.term.URIRef('http://www.wikidata.org/entity/Q1351687')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp18696'),
  rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#gender'),
  rdflib.term.Literal('Male')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp17392'),
  rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs'),
  rdflib.term.URIRef('http://www.wikidata.org/entity/Q2156577')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp14308'),
  rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#gender'),
  rdflib.term.Literal('Female')),
 (rdflib.term.URIRef('https://collection.sciencemuseumgroup.org.uk/people/cp17871'),
  rdflib.term.URIRef('http://www.w3.org/2002/07/owl#sameAs'),
  rdflib.term.URIRef('http://www.wikidata.org/entity/Q1998075