for @MPica

# Requêtes test sur Maritime History et export Turtle

### Imports

In [19]:
import os
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from SPARQLWrapper import SPARQLWrapper, JSON
import rdflib
import csv
import re

### FONCTION : faire un DataFrame Pandas à partir des résultats

In [33]:
def query_to_df(spql_queried):
    
    preparing = {}
    
    try:
        spql_return = spql_queried.queryAndConvert()
        
        for ret in spql_return["results"]["bindings"]:
            for var in ret.keys():
                if var not in preparing.keys():
                    preparing[var] = []
                    
        for ret in spql_return["results"]["bindings"]:
            for var in preparing.keys():
                if var in ret.keys():
                    preparing[var].append(ret[var]['value'])
                else:
                    preparing[var].append('None')
        return pd.DataFrame(preparing)

    except Exception as e:
        print("The query has a problem. Here is the error:\n\t", e)

## Exploration de Maritime History

### Enregistrer le point d'accès et le format de sortie

In [21]:
mh_endpoint = SPARQLWrapper("https://sparql.geovistory.org/api_v1_project_84760")
mh_endpoint.setReturnFormat(JSON)
mh_prefixes = """
    PREFIX onto: <http://www.ontotext.com/>
    PREFIX ont: <http://purl.org/net/ns/ontology-annot#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX xml: <http://www.w3.org/XML/1998/namespace>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX geo: <http://www.opengis.net/ont/geosparql#>
    PREFIX time: <http://www.w3.org/2006/time#>
    PREFIX ontome: <https://ontome.net/ontology/>
    PREFIX geov: <http://geovistory.org/resource/>
"""

### Voir quelles classes on peut y trouver

Merci à @atterebf pour cette requête.

In [4]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT  (STR(?eff) as ?str_eff) (GROUP_CONCAT(?label; SEPARATOR = ' / ')  as ?cct_label)  ?class
    WHERE {
    
    {
        SELECT (COUNT(*) as ?eff) ?class ?label
    WHERE {
      ?inst a ?class.
      ### comment next line to have all classes
      ?class a owl:Class.
          OPTIONAL { ?class rdfs:label ?label}

    }
    GROUP BY ?class ?label

    }
    FILTER(strlen(?label))

    }
    GROUP BY ?class ?eff
    ORDER BY DESC(?eff)
""")

class_summary = query_to_df(mh_endpoint)

In [5]:
class_summary.to_csv("info_data/class_summary.csv")
print(class_summary)

   str_eff                           cct_label  \
0    14303    Person Appellation in a Language   
1     8189                         Ship Voyage   
2     8174                           Time-Span   
3     7614                              Person   
4     7217           Appellation in a Language   
5     5278                  Geographical Place   
6     5269                            Presence   
7     1855                                Ship   
8       28                  Annotation in Text   
9       28                          Definition   
10      10  Type of manifestation product type   
11       8                         VOC Chamber   
12       5         Expression / Source Content   
13       4              Bibliographical Record   
14       4                  Expression portion   
15       3                 Entity Quality Type   
16       3             Expression Portion Type   
17       3             Geographical Place Type   
18       3        Manifestation Singleton Type   


### Voir quels prédicats les instances de ces classes peuvent avoir

In [6]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?pl ?p
    WHERE {
    ?sc a owl:Class .
    ?s rdf:type ?sc ;
        ?p ?o .
    ?p rdfs:label ?pl .
    FILTER (lang(?pl) = "en")
    }
""")

what_predicates = query_to_df(mh_endpoint)

In [7]:
sum_what_preds = what_predicates[["p", "pl"]].value_counts()
print(sum_what_preds)
sum_what_preds.to_csv("info_data/entity_predicates.csv")

p                                                pl                                    
http://www.w3.org/1999/02/22-rdf-syntax-ns#type  has type                                  58005
http://www.w3.org/2000/01/rdf-schema#label       has label                                 58005
https://ontome.net/ontology/p1111                is appellation for language of            21521
https://ontome.net/ontology/p1113                refers to name                            21520
https://ontome.net/ontology/p1111i               has appellation for language              21519
https://ontome.net/ontology/p4i                  is time-span of                            8174
https://ontome.net/ontology/p1338i               had carried out                            8166
https://ontome.net/ontology/p1338                was carried out by                         8166
https://ontome.net/ontology/p4                   has time-span                              8164
https://ontome.net/ontology/p1335      

### Get all ship names and predicates

In [8]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?s ?sl ?pl
    WHERE {
    ?s rdf:type ontome:c522 ;
        ?p ?o ;
        rdfs:label ?sl .
    ?p rdfs:label ?pl .
    FILTER (lang(?pl) = "en")
    }
""")

ship_preds = query_to_df(mh_endpoint)

In [9]:
sum_ship_preds = ship_preds.value_counts()
print(sum_ship_preds)
sum_ship_preds.to_csv("info_data/ship_names_and_predicates.csv")

s                                       sl                   pl                          
http://geovistory.org/resource/i178921  Meijenburg           had carried out                 21
http://geovistory.org/resource/i179754  Zutphen              had carried out                 19
http://geovistory.org/resource/i179061  Oranje               had carried out                 18
http://geovistory.org/resource/i178340  Enkhuizen            had carried out                 18
http://geovistory.org/resource/i178558  Hof Van Zeeland      had carried out                 17
                                                                                             ..
http://geovistory.org/resource/i178623  Huis Te Kraaiestein  has label                        1
                                                             has appellation for language     1
                                                             had carried out                  1
http://geovistory.org/resource/i178622  Huis T

In [10]:
print(f"Il y a {len(np.unique(ship_preds['s']))} navires dans la base de données Maritime History.")
print(f"Il y a {len(ship_preds)} informations sur ces navires.")
print(f"Ces informations sont des types suivants :\n\t{[pl for pl in np.unique(ship_preds['pl'])]}")

Il y a 1855 navires dans la base de données Maritime History.
Il y a 13786 informations sur ces navires.
Ces informations sont des types suivants :
	['had carried out', 'has appellation for language', 'has label', 'has type', 'is annotated by']


### Get all person names and predicates

In [11]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?s ?sl ?pl
    WHERE {
    ?s rdf:type ontome:c21 ;
        ?p ?o ;
        rdfs:label ?sl .
    ?p rdfs:label ?pl .
    FILTER (lang(?pl) = "en")
    }
""")

person_preds = query_to_df(mh_endpoint)

In [12]:
sum_person_preds = person_preds.value_counts()
print(sum_person_preds)
sum_person_preds.to_csv("info_data/person_names_and_predicates.csv")

s                                      sl                  pl                          
http://geovistory.org/resource/i87622  Willem Simonse      has appellation for language    7
http://geovistory.org/resource/i88746  Cornelis Stevensen  has appellation for language    7
http://geovistory.org/resource/i91714  Amon Roulofse       has appellation for language    7
http://geovistory.org/resource/i84896  Pieter Doolhagen    has appellation for language    7
http://geovistory.org/resource/i86690  Jan Nijster         has appellation for language    6
                                                                                          ..
http://geovistory.org/resource/i87698  Jacob Clauwers      had as participant              1
http://geovistory.org/resource/i87697  Simon Jobse         has type                        1
                                                           has label                       1
                                                           had as participa

In [13]:
print(f"Il y a {len(np.unique(person_preds['s']))} personnes dans la base de données Maritime History.")
print(f"Il y a {len(person_preds)} informations sur ces personnes.")
print(f"Ces informations sont des types suivants :\n\t{[pl for pl in np.unique(person_preds['pl'])]}")

Il y a 7614 personnes dans la base de données Maritime History.
Il y a 37265 informations sur ces personnes.
Ces informations sont des types suivants :
	['had as participant', 'has appellation for language', 'has label', 'has type', 'is annotated by']


### Get all voyages and predicates

In [14]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?s ?sl ?pl
    WHERE {
    ?s rdf:type ontome:c523 ;
        ?p ?o ;
        rdfs:label ?sl .
    ?p rdfs:label ?pl .
    FILTER (lang(?pl) = "en")
    }
""")

voyage_preds = query_to_df(mh_endpoint)

In [15]:
sum_voy_preds = voyage_preds.value_counts()
print(sum_voy_preds)
sum_voy_preds.to_csv("info_data/voyage_names_and_predicates.csv")

s                                       sl                                                 pl                 
http://geovistory.org/resource/i151443  Fort Rammekens NL, Sri Lanka LK, Duinenburg        participated in        172
http://geovistory.org/resource/i157150  Fort Rammekens NL, Jakarta ID, Walcheren           participated in        154
http://geovistory.org/resource/i156015  Fort Rammekens NL, Jakarta ID, Sloterdijk          participated in        149
http://geovistory.org/resource/i149984  Fort Rammekens NL, Jakarta ID, Admiraal De Ruyter  participated in        144
http://geovistory.org/resource/i155016  Fort Rammekens NL, Jakarta ID, Pallas              participated in        144
                                                                                                                 ... 
http://geovistory.org/resource/i152613  Texel NL, Jakarta ID, Hollandia                    had arrival place        1
                                                               

In [16]:
print(f"Il y a {len(np.unique(voyage_preds['s']))} voyages dans la base de données Maritime History.")
print(f"Il y a {len(voyage_preds)} informations sur ces voyages.")
print(f"Ces informations sont des types suivants :\n\t{[pl for pl in np.unique(voyage_preds['pl'])]}")

Il y a 8189 voyages dans la base de données Maritime History.
Il y a 64239 informations sur ces voyages.
Ces informations sont des types suivants :
	['had arrival place', 'had departure place', 'has label', 'has set up', 'has time-span', 'has type', 'participated in', 'was carried out by']


### Exporter les objets pertinents

In [36]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?instance ?instlabel ?eob ?boe ?ship ?shipname ?dplace ?dplabel ?dude ?participant
    WHERE {
    ?instance rdf:type ontome:c523 ;
        rdfs:label ?instlabel ;
        ontome:p4 ?tspan ;
        ontome:p1335 ?dplace ;
        ontome:p1338 ?ship .
  ?ship rdfs:label ?shipname .
  ?tspan ontome:p151/rdfs:label ?boe ;
    ontome:p150/rdfs:label ?eob .
  ?dplace rdfs:label ?dplabel .
  OPTIONAL {
    ?instance ontome:p1359 ?dude .
    ?dude ontome:p1111i/rdfs:label ?participant .
  }
}
""")

complete = query_to_df(mh_endpoint)
complete.to_csv("output_voyages.csv")