In [32]:
from functools import partial

import pandas as pd
import plotly.express as px
import sparql_dataframe

In [33]:
Q_CESAR_TEMPLATE = """

        PREFIX owl: <http://www.w3.org/2002/07/owl#>
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
        PREFIX xml: <http://www.w3.org/XML/1998/namespace>
        PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
        PREFIX ecrm: <http://erlangen-crm.org/current/>
        PREFIX crmpc: <http://dramabase/ontology/crm_property_classes/>
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        PREFIX efrbroo: <http://erlangen-crm.org/efrbroo/>
        PREFIX dram: <http://dramabase/ontology/>
        PREFIX dc: <http://purl.org/dc/elements/1.1/>
          PREFIX dcterms: <http://purl.org/dc/terms/>
        PREFIX foaf: <http://xmlns.com/foaf/0.1/>



SELECT ?dates (count(?dates) as ?oc) (?parent_label as ?lieux)
                # (group_concat(distinct(?loc_name); separator = ", ") as ?lieux)
WHERE 

{{
                   ?performance dram:R66_included_performed_version_of <{piece_uri}> . 
                         ?performance a efrbroo:F31_Performance ;
                                                      ecrm:P4_has_time-span ?pts .
                            ?pts dram:P82a_begin_of_the_begin ?start .
  
                                    OPTIONAL {{
   ?performance ecrm:P7_took_place_at ?location .
   ?location  ecrm:P87_is_identified_by ?loc_appelation .
   ?loc_appelation rdfs:label ?loc_name .
    
   
    OPTIONAL {{
      
      
      ?location ecrm:P89_falls_within ?parent .
     ?parent ecrm:P87_is_identified_by ?parent_appelation .
      ?parent_appelation rdfs:label ?parent_label . 
    
    }} 
    
  }}
  
  BIND (YEAR( ?start) as ?dates)
                        
}}
GROUP BY ?dates ?parent  ?parent_label #?location
ORDER BY ?dates
"""

In [34]:
Q_RCF_TEMPLATE = """
PREFIX rcf: <http://rcf.logilab.fr/>

SELECT (?annee as ?dates) (COUNT(?representation) as ?oc) ("Paris" as ?lieux) WHERE {{
    ?representation rcf:aPourPiece <{piece_uri}>.
    ?journee rcf:aPourRepresentation ?representation.
    ?journee rcf:aPourDate ?date.
    BIND(year(?date) as ?annee)
}} GROUP BY ?annee ORDER BY ?annee
"""

In [35]:
def _get_data(endpoint, template, origin, piece_uri):
    import warnings

    warnings.simplefilter("ignore")  # Requests to CESAR generates warning.
    q = template.format(piece_uri=piece_uri)
    d = sparql_dataframe.get(endpoint, q)
    d["origin"] = origin
    return d

In [36]:
get_data_cesar = partial(
    _get_data, "https://cesar2.huma-num.fr/sparql/", Q_CESAR_TEMPLATE, "CÉSAR"
)
get_data_rcf = partial(
    _get_data, "https://rcf-sparql.demo.logilab.fr/sparql/", Q_RCF_TEMPLATE, "RCF"
)
# get_data = lambda cesar_uri, rcf_uri: pd.concat((get_data_cesar(cesar_uri), get_data_rcf(rcf_uri)))
get_data = lambda cesar_uri, rcf_uri: get_data_rcf(rcf_uri)

In [37]:
PIECES = pd.read_csv(
    "/home/schabot/downloads/cesare_rcf_works_alignments.csv"
).set_index("name")
PIECES = {k: v for k, v in PIECES.iterrows()}

In [38]:
from ipywidgets import interact


@interact
def plot_data(piece=sorted(PIECES.keys())):
    df = get_data(PIECES[piece].cesare_uri, PIECES[piece].rcf_uri)
    return px.scatter(
        df,
        x="dates",
        y="lieux",
        size="oc",
        color="origin",
        marginal_x="histogram",
        marginal_y="histogram",
    )

interactive(children=(Dropdown(description='piece', options=('Amphitryon', 'Andromaque', 'Andromède', 'Ariane'…

In [50]:
piece = "Phèdre et Hippolyte ou Phèdre"
df = get_data(PIECES[piece].cesare_uri, PIECES[piece].rcf_uri)
df.rename(columns={"oc": "nombre_de_representations"}).head(10).to_csv(index=False)

'dates,nombre_de_representations,lieux,origin\n1680,6,Paris,RCF\n1681,4,Paris,RCF\n1682,4,Paris,RCF\n1683,5,Paris,RCF\n1684,5,Paris,RCF\n1685,6,Paris,RCF\n1686,7,Paris,RCF\n1687,9,Paris,RCF\n1688,7,Paris,RCF\n1689,6,Paris,RCF\n'