# Seeking the population to be studied using Wikidata


Wikidata provides the [occupation (P106)](https://www.wikidata.org/wiki/Property:P106) property that seems to represent a good entry point to define the perimeter of the population of astronomers and other related professions.

We will start from following occupations , then explore the related ones.


See also ["field of work" (Property:P101)](http://www.wikidata.org/entity/P101), ["position held" (Property:P39)](http://www.wikidata.org/entity/P39)

---

In [1]:
from SPARQLWrapper import SPARQLWrapper, SPARQLWrapper2, JSON, TURTLE, XML, RDFXML

In [2]:
import csv
import sqlite3 as sql
from datetime import datetime
from importlib import reload
import pprint
import json
import ast

In [3]:
import sparql_functions as spqf
# reload(spqf)

## Storing, documenting and executing SPARQL queries

In [4]:
db = 'db_stores/sparql_queries.db'

In [5]:
### Inspect existing SPARQL queries

cn = sql.connect(db)
c = cn.cursor()
c.execute('SELECT pk_query, label, description, sparql_endpoint FROM query')
r_all = c.fetchall()
cn.close()
r_all[:200]

[(1,
  'Properties of astronomers',
  "Get and count the outgoing and incoming properties of people having astronomer as an occupation:\n\n?person  wdt:P106  wd:Q11063\n\nCette requête ne marche pas lorsque elle est exécutée depuis le SPARQLWrapper. Je l'ai donc exécutée directement dans l'éditeur de requêtes de Wikidata, téléchargé le fichier JSON verbeux du résultat et produit une ligne de la table 'result'. Elle est horodatée.",
  'https://query.wikidata.org/sparql'),
 (2,
  'Properties of physician (Q39631)',
  "Get and count the outgoing and incoming properties of people having physician as an occupation:\n\n?person  wdt:P106  wd:Q39631\t\n\nCette requête ne marche pas lorsque elle est exécutée depuis le SPARQLWrapper. Je l'ai donc exécutée directement dans l'éditeur de requêtes de Wikidata, téléchargé le fichier JSON verbeux du résultat et produit une ligne de la table 'result'. Elle est horodatée.",
  'https://query.wikidata.org/sparql'),
 (3,
  'Find astronomers and insert basi

### Execute a specific SPARQL query

In [6]:
### Define the line of the database to be used (insert the query into the database first)

pk_query = 2

cn = sql.connect(db)
c = cn.cursor()
c.execute('SELECT * FROM query WHERE pk_query = ?', [pk_query])
rc = c.fetchone()
cn.close()

In [7]:
print(rc[2] +  "\n-----\n" + rc[4] +  "\n-----\n" + rc[5])

Get and count the outgoing and incoming properties of people having physician as an occupation:

?person  wdt:P106  wd:Q39631	

Cette requête ne marche pas lorsque elle est exécutée depuis le SPARQLWrapper. Je l'ai donc exécutée directement dans l'éditeur de requêtes de Wikidata, téléchargé le fichier JSON verbeux du résultat et produit une ligne de la table 'result'. Elle est horodatée.
-----
https://query.wikidata.org/sparql
-----
PREFIX  bd:   <http://www.bigdata.com/rdf#>
PREFIX  wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX  wikibase: <http://wikiba.se/ontology#>
PREFIX  xsd:  <http://www.w3.org/2001/XMLSchema#>
PREFIX  rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  wd:   <http://www.wikidata.org/entity/>

SELECT  ?prop ?propLabel ?direction ?eff
WHERE
  { { SELECT  ?prop ?direction (count(*) AS ?eff)
      WHERE
        { {   { ?person  ?p                    ?o .
                ?prop    wikibase:directClaim  ?p
                BIND("out" AS ?direction)
             

In [8]:
### Execute the SPARQL query
qr = spqf.get_json_sparql_result(rc[4],rc[5])


<class 'dict'>


In [9]:
r = [l for l in spqf.sparql_result_to_list(qr)]
print(len(r))
r[:50]

1089


[['http://www.wikidata.org/entity/P106', 'occupation', 'out', '20496'],
 ['http://www.wikidata.org/entity/P569', 'date of birth', 'out', '11422'],
 ['http://www.wikidata.org/entity/P570', 'date of death', 'out', '10755'],
 ['http://www.wikidata.org/entity/P31', 'instance of', 'out', '10595'],
 ['http://www.wikidata.org/entity/P735', 'given name', 'out', '10567'],
 ['http://www.wikidata.org/entity/P21', 'sex or gender', 'out', '10423'],
 ['http://www.wikidata.org/entity/P214', 'VIAF ID', 'out', '9115'],
 ['http://www.wikidata.org/entity/P1343',
  'described by source',
  'out',
  '8024'],
 ['http://www.wikidata.org/entity/P1871', 'CERL Thesaurus ID', 'out', '7868'],
 ['http://www.wikidata.org/entity/P227', 'GND ID', 'out', '7359'],
 ['http://www.wikidata.org/entity/P7859',
  'WorldCat Identities ID',
  'out',
  '7054'],
 ['http://www.wikidata.org/entity/P19', 'place of birth', 'out', '6558'],
 ['http://www.wikidata.org/entity/P213', 'ISNI', 'out', '5975'],
 ['http://www.wikidata.org/ent

In [None]:
# Inspect the first 10 lines
i = 0
for l in qr['results']['bindings']:
    if i < 10:
        print(l)
        i += 1

###  Store and retrieve the JSON result of the former SPARQL query

In [37]:
### OPTIONAL

cn = sql.connect(db)
c = cn.cursor()
values = (pk_query, str(qr))
c.execute("INSERT INTO result (fk_query, result) VALUES (?,?)", values)

## Activate to persist INSERT
## DISABLED !!! ## cn.commit()
cn.close()

In [10]:
### Define the row of the database to be searched (inspect the database first)

pk_result = ['2']
cn = sql.connect(db)
c = cn.cursor()
c.execute('SELECT * FROM result WHERE pk_result = ?', pk_result)
qr = c.fetchone()
cn.close()

In [13]:
### Transform string to dict
## Doc.: https://stackoverflow.com/questions/988228/convert-a-string-representation-of-a-dictionary-to-a-dictionary
d = ast.literal_eval(qr[3])
try:
    r = [l for l in spqf.sparql_result_to_list(d)]
    print(len(r))
except Exception as e:
    print(e)
r[:100]    

901


[['http://www.wikidata.org/entity/P106', 'occupation', 'out', '15619'],
 ['http://www.wikidata.org/entity/P569', 'date of birth', 'out', '8715'],
 ['http://www.wikidata.org/entity/P570', 'date of death', 'out', '8089'],
 ['http://www.wikidata.org/entity/P31', 'instance of', 'out', '8019'],
 ['http://www.wikidata.org/entity/P21', 'sex or gender', 'out', '7934'],
 ['http://www.wikidata.org/entity/P735', 'given name', 'out', '7669'],
 ['http://www.wikidata.org/entity/P214', 'VIAF ID', 'out', '6581'],
 ['http://www.wikidata.org/entity/P1343',
  'described by source',
  'out',
  '6551'],
 ['http://www.wikidata.org/entity/P19', 'place of birth', 'out', '5439'],
 ['http://www.wikidata.org/entity/P7859',
  'WorldCat Identities ID',
  'out',
  '5343'],
 ['http://www.wikidata.org/entity/P227', 'GND ID', 'out', '5242'],
 ['http://www.wikidata.org/entity/P1871', 'CERL Thesaurus ID', 'out', '5180'],
 ['http://www.wikidata.org/entity/P27',
  'country of citizenship',
  'out',
  '4921'],
 ['http://ww

In [103]:
### Mettre en argument de la fonction setQuery, qui définit la requête, le nom de la variable correspondante
#  On définit le format du resultat (JSON), puis on effectue la requête en ligne 
# et on inspecte le type de résultat: le résultat en JSON a été transformé en une variable 'dictionnaire'

sparql.setQuery(query)
sparql.setReturnFormat(JSON)
rc = sparql.queryAndConvert()
type(rc)

dict

In [94]:
sparql = SPARQLWrapper("http://192.168.0.21:7200/repositories/test")

In [8]:
sparql = SPARQLWrapper("http://10.211.55.14:7200/repositories/early-modern-astronomy")

In [29]:
query_3 = """
SELECT * { GRAPH <http://my_test_graph.org> {
  << ?s ?p ?o >> ?q ?z .
}
}
"""

In [17]:
query_3 = """
SELECT * { GRAPH ?g {
  ?s?q ?z .
}
}
"""

In [95]:
query = """PREFIX  wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX  wd:   <http://www.wikidata.org/entity/>
PREFIX bd: <http://www.bigdata.com/rdf#>
SELECT ?person 
WHERE { 
    
    {SELECT ?person
        WHERE {
    {
GRAPH <http://my_project.org/original/wikidata> {
# astronomers in the graph
?person  wdt:P106  wd:Q11063.
            }
        }
   }
LIMIT 10}}"""

In [96]:
try:
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    sparql.setMethod('POST')
    sparql.setCredentials("admin", "root")
    rc = sparql.query().convert()
except Exception as e:
    print(e)

In [97]:
rl = spqf.sparql_result_to_list(rc)
print(len(rl))
rl[:10]

10


[['http://www.wikidata.org/entity/Q10400374'],
 ['http://www.wikidata.org/entity/Q11380799'],
 ['http://www.wikidata.org/entity/Q11427609'],
 ['http://www.wikidata.org/entity/Q12191704'],
 ['http://www.wikidata.org/entity/Q15066880'],
 ['http://www.wikidata.org/entity/Q15688159'],
 ['http://www.wikidata.org/entity/Q15691867'],
 ['http://www.wikidata.org/entity/Q15915482'],
 ['http://www.wikidata.org/entity/Q28106362'],
 ['http://www.wikidata.org/entity/Q28591139']]

In [25]:
rc

{'head': {'vars': ['g', 's', 'p', 'o', 'q', 'z']},
 'results': {'bindings': [{'p': {'type': 'uri',
     'value': 'https://ontome.net/property/1599'},
    'q': {'type': 'uri', 'value': 'http://purl.org/dc/terms/source'},
    's': {'type': 'uri',
     'value': 'urn:uuid:ba9656af-92cc-41ab-a3d8-9ccc6f1dad8f'},
    'g': {'type': 'uri', 'value': 'http://myontology.org/g1'},
    'z': {'type': 'uri', 'value': 'http://dbpedia.org/'},
    'o': {'type': 'uri', 'value': 'http://dbpedia.org/resource/Quebec'}},
   {'p': {'type': 'uri', 'value': 'https://ontome.net/property/1599'},
    'q': {'type': 'uri', 'value': 'http://myontology.org/certainty'},
    's': {'type': 'uri',
     'value': 'urn:uuid:ba9656af-92cc-41ab-a3d8-9ccc6f1dad8f'},
    'g': {'type': 'uri', 'value': 'http://myontology.org/g1'},
    'z': {'datatype': 'http://www.w3.org/2001/XMLSchema#integer',
     'type': 'literal',
     'value': '2'},
    'o': {'type': 'uri', 'value': 'http://dbpedia.org/resource/Quebec'}},
   {'p': {'type': '

In [89]:
insert = """

PREFIX  wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX  wikibase: <http://wikiba.se/ontology#>
PREFIX  xsd:  <http://www.w3.org/2001/XMLSchema#>
PREFIX  rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  wd:   <http://www.wikidata.org/entity/>
PREFIX bd: <http://www.bigdata.com/rdf#>

INSERT {GRAPH <http://my_project.org/original/wikidata> {
?person  wdt:P106  ?occupation.
?occupation rdfs:label ?occupationLabel.
}
}
WHERE {
SERVICE <https://query.wikidata.org/sparql> {
SELECT DISTINCT ?person ?occupation ?occupationLabel
WHERE
  { 
      ?person  wdt:P106  ?occupation.

    SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en,nl,fr". }
}
}
{
GRAPH <http://my_project.org/original/wikidata> {
# astronomers in the graph
?person  wdt:P106  wd:Q11063.
}
}
}
"""

In [66]:
sparul = SPARQLWrapper("http://192.168.0.21:7200/repositories/early-modern-astronomy/statements")

In [90]:
sparul = SPARQLWrapper("http://192.168.0.21:7200/repositories/test/statements")

In [91]:
try:
    sparul.setQuery(insert)
    sparul.setCredentials("admin", "root")
    sparul.setMethod('POST')
    results = sparul.query()
    print(results.response.read())
except Exception as e:
    print(e)

EndPointInternalError: endpoint returned code 500 and response. 

Response:
b'Unkown record type: 83'


### Execute a specific SPARUL query

In [136]:
### Define the line of the database to be used (insert the query into the database first)

pk_query = 4

cn = sql.connect(db)
c = cn.cursor()
c.execute('SELECT * FROM query WHERE pk_query = ?', [pk_query])
rc = c.fetchone()
cn.close()
print(rc[2] +  "\n-----\n" + rc[4] +  "\n-----\n" + rc[5])

Get all occupations from Wikidata for the astronomers stored in the local triplestore
-----
http://192.168.0.21:7200/repositories/early-modern-astronomy/statements
-----
PREFIX  bd:   <http://www.bigdata.com/rdf#>
PREFIX  wd:   <http://www.wikidata.org/entity/>
PREFIX  wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX  wikibase: <http://wikiba.se/ontology#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX ontocl: <https://ontome.net/class/>

INSERT { GRAPH <http://my_project.org/original/wikidata> { 
        ?person wdt:P106 ?occupation. 
        ?occupation  a ontocl:636.
        ontocl:636 rdfs:label 'Occupation (SDHSS Soc)'.
    ?occupation rdfs:label ?label}}
WHERE
  { SERVICE <https://query.wikidata.org/sparql>
      { ?person  wdt:P106  ?occupation.
        OPTIONAL {?occupation rdfs:label ?label. FILTER (lang(?label) in ('e', 'fr'))}
        }
    { SELECT  ?person
      WHERE
        { GRAPH <http://my_project.org/original/wikidata>
              { ?person  wdt:P10

In [137]:
sparul = SPARQLWrapper(rc[4])

In [134]:
repo = "http://192.168.0.21:7200/repositories/test/statements"
sparul = SPARQLWrapper(repo)

In [138]:
try:
    sparul.setQuery(rc[5])
    sparul.setCredentials("admin", "root")
    sparul.setMethod('POST')
    results = sparul.query()
    print(results.response.read())
except Exception as e:
    print(e)

b''


In [None]:
PREFIX  wdt:  <http://www.wikidata.org/prop/direct/>
PREFIX  wikibase: <http://wikiba.se/ontology#>
PREFIX  xsd:  <http://www.w3.org/2001/XMLSchema#>
PREFIX  rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX  wd:   <http://www.wikidata.org/entity/>

SELECT  ?prop ?propLabel ?direction ?eff

{
    {
  SELECT   ?prop ?direction  (COUNT(*) as ?eff) # ?pLabel
 WHERE
  { 
  hint:Query hint:optimizer "None" .
      { ?person  wdt:P106  wd:Q170790.
       ?person ?p ?o.
       ?prop wikibase:directClaim ?p .  
       BIND("out" AS ?direction)
      }
  UNION
       {
       ?person  wdt:P106  wd:Q11063.
       ?s ?p ?person.
       ?prop wikibase:directClaim ?p .  
       BIND("in" AS ?direction)
      }
    UNION
       {
       ?person  wdt:P106  wd:Q155647.
       ?s ?p ?person.
       ?prop wikibase:directClaim ?p .  
       BIND("in" AS ?direction)
      }
  }
  
GROUP BY ?prop ?direction # ?pLabel
               }
SERVICE wikibase:label {bd:serviceParam wikibase:language "en" }
}


ORDER BY DESC(?eff)

