for @MPica

# Requêtes test sur Maritime History et export Turtle

### Imports

In [None]:
import os
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from SPARQLWrapper import SPARQLWrapper, SPARQLWrapper2, JSON, TURTLE, XML, RDFXML
import csv
import re

### FONCTION : faire un DataFrame Pandas à partir des résultats

In [None]:
def query_to_df(spql_queried):
    
    preparing = {}
    
    try:
        spql_return = spql_queried.queryAndConvert()
        
        for ret in spql_return["results"]["bindings"]:
            for var in ret.keys():
                if var not in preparing.keys():
                    preparing[var] = []
                    
        for ret in spql_return["results"]["bindings"]:
            for var in preparing.keys():
                if var in ret.keys():
                    preparing[var].append(ret[var]['value'])
                else:
                    preparing[var].append('None')
        return pd.DataFrame(preparing)

    except Exception as e:
        print("The query has a problem. Here is the error:\n\t", e)

## Exploration de Maritime History

### Enregistrer le point d'accès et le format de sortie

In [None]:
mh_endpoint = SPARQLWrapper("https://sparql.geovistory.org/api_v1_project_84760")
mh_endpoint.setReturnFormat(JSON)
mh_prefixes = """
    PREFIX onto: <http://www.ontotext.com/>
    PREFIX ont: <http://purl.org/net/ns/ontology-annot#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX owl: <http://www.w3.org/2002/07/owl#>
    PREFIX xml: <http://www.w3.org/XML/1998/namespace>
    PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
    PREFIX geo: <http://www.opengis.net/ont/geosparql#>
    PREFIX time: <http://www.w3.org/2006/time#>
    PREFIX ontome: <https://ontome.net/ontology/>
    PREFIX geov: <http://geovistory.org/resource/>
"""

### Voir quelles classes on peut y trouver

Merci à @atterebf pour cette requête.

In [None]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT  (STR(?eff) as ?str_eff) (GROUP_CONCAT(?label; SEPARATOR = ' / ')  as ?cct_label)  ?class
    WHERE {
    
    {
        SELECT (COUNT(*) as ?eff) ?class ?label
    WHERE {
      ?inst a ?class.
      ### comment next line to have all classes
      ?class a owl:Class.
          OPTIONAL { ?class rdfs:label ?label}

    }
    GROUP BY ?class ?label

    }
    FILTER(strlen(?label))

    }
    GROUP BY ?class ?eff
    ORDER BY DESC(?eff)
""")

class_summary = query_to_df(mh_endpoint)

In [None]:
class_summary.to_csv("info_data/class_summary.csv")
print(class_summary)

### Voir quels prédicats les instances de ces classes peuvent avoir

In [None]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?pl ?p
    WHERE {
    ?sc a owl:Class .
    ?s rdf:type ?sc ;
        ?p ?o .
    ?p rdfs:label ?pl .
    FILTER (lang(?pl) = "en")
    }
""")

what_predicates = query_to_df(mh_endpoint)

In [None]:
sum_what_preds = what_predicates[["p", "pl"]].value_counts()
print(sum_what_preds)
sum_what_preds.to_csv("info_data/entity_predicates.csv")

### Get all ship names and predicates

In [None]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?s ?sl ?pl
    WHERE {
    ?s rdf:type ontome:c522 ;
        ?p ?o ;
        rdfs:label ?sl .
    ?p rdfs:label ?pl .
    FILTER (lang(?pl) = "en")
    }
""")

ship_preds = query_to_df(mh_endpoint)

In [None]:
sum_ship_preds = ship_preds.value_counts()
print(sum_ship_preds)
sum_ship_preds.to_csv("info_data/ship_names_and_predicates.csv")

In [None]:
print(f"Il y a {len(np.unique(ship_preds['s']))} navires dans la base de données Maritime History.")
print(f"Il y a {len(ship_preds)} informations sur ces navires.")
print(f"Ces informations sont des types suivants :\n\t{[pl for pl in np.unique(ship_preds['pl'])]}")

### Get all person names and predicates

In [None]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?s ?sl ?pl
    WHERE {
    ?s rdf:type ontome:c21 ;
        ?p ?o ;
        rdfs:label ?sl .
    ?p rdfs:label ?pl .
    FILTER (lang(?pl) = "en")
    }
""")

person_preds = query_to_df(mh_endpoint)

In [None]:
sum_person_preds = person_preds.value_counts()
print(sum_person_preds)
sum_person_preds.to_csv("info_data/person_names_and_predicates.csv")

In [None]:
print(f"Il y a {len(np.unique(person_preds['s']))} personnes dans la base de données Maritime History.")
print(f"Il y a {len(person_preds)} informations sur ces personnes.")
print(f"Ces informations sont des types suivants :\n\t{[pl for pl in np.unique(person_preds['pl'])]}")

### Get all voyages and predicates

In [None]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?s ?sl ?pl
    WHERE {
    ?s rdf:type ontome:c523 ;
        ?p ?o ;
        rdfs:label ?sl .
    ?p rdfs:label ?pl .
    FILTER (lang(?pl) = "en")
    }
""")

voyage_preds = query_to_df(mh_endpoint)

In [None]:
sum_voy_preds = voyage_preds.value_counts()
print(sum_voy_preds)
sum_voy_preds.to_csv("info_data/voyage_names_and_predicates.csv")

In [None]:
print(f"Il y a {len(np.unique(voyage_preds['s']))} voyages dans la base de données Maritime History.")
print(f"Il y a {len(voyage_preds)} informations sur ces voyages.")
print(f"Ces informations sont des types suivants :\n\t{[pl for pl in np.unique(voyage_preds['pl'])]}")

### Exporter les objets pertinents

In [None]:
mh_endpoint.setQuery(mh_prefixes + """
    SELECT ?instance ?instlabel ?eob ?boe ?ship ?shipname ?dplace ?dplabel ?dude ?participant
    WHERE {
    ?instance rdf:type ontome:c523 ;
        rdfs:label ?instlabel ;
        ontome:p4 ?tspan ;
        ontome:p1335 ?dplace ;
        ontome:p1338 ?ship .
  ?ship rdfs:label ?shipname .
  ?tspan ontome:p151/rdfs:label ?boe ;
    ontome:p150/rdfs:label ?eob .
  ?dplace rdfs:label ?dplabel .
  OPTIONAL {
    ?instance ontome:p1359 ?dude .
    ?dude ontome:p1111i/rdfs:label ?participant .
  }
}
""")

complete = query_to_df(mh_endpoint)
complete.to_csv("output_voyages.csv")