In [None]:
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd

endpoint_url = "https://query.wikidata.org/sparql"

# Get cities
query = """SELECT DISTINCT ?poet ?name (MIN(?date_birth_raw) AS ?date_birth) (MIN(?date_death_raw) AS ?date_death) 
                (GROUP_CONCAT(DISTINCT ?workLabel; separator=", ") AS ?works) 
                (GROUP_CONCAT(DISTINCT ?movementLabel; separator=", ") AS ?movements)  
WHERE {
  ?poet wdt:P106 wd:Q49757.
  ?poet wdt:P1412 wd:Q150.
  ?poet wdt:P569 ?date_birth_raw.  # Date de naissance obligatoire
  OPTIONAL { ?poet wdt:P570 ?date_death_raw. } # Date de décès optionnelle
  OPTIONAL { 
    ?poet wdt:P800 ?work. 
    ?work rdfs:label ?workLabel.
    FILTER (LANG(?workLabel) = "fr") 
  } # Œuvres principales optionnelles avec leur label en français
  OPTIONAL { 
    ?poet wdt:P135 ?movement. 
    ?movement rdfs:label ?movementLabel.
    FILTER (LANG(?movementLabel) = "fr") 
  } # Mouvements artistiques optionnels avec leur label en français

  ?poet rdfs:label ?name.

  FILTER (LANG(?name) = "fr")
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
}
GROUP BY ?poet ?name


LIMIT 11
"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:

    # Tester si avec api : https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/fr.wikipedia.org/all-access/user/Henri_Hertz/daily/20250201/20250207

    array.append(
        (
            result["poet"]["value"],
            result["name"]["value"],
            result["date_birth"]["value"],
            result.get("date_death", {}).get("value", None),  # Vérification clé absente
            result.get("works", {}).get("value", ""),  # Vérification clé absente
            result.get("movements", {}).get("value", ""),
        )
    )

dataframe = pd.DataFrame(array, columns=["poet", "name", "date_birth", "date_death", "works", "movements"])
dataframe = dataframe.astype(
    dtype={
        "poet": "<U200",       # Chaîne Unicode jusqu'à 200 caractères
        "name": "<U200",       # Nom du poète (chaîne)
        "date_birth": "<U200",  # Date de naissance (datetime)
        "date_death": "<U200",  # Date de décès (datetime, peut contenir NaT)
        "works": "<U1000",     # Liste d'œuvres sous forme de chaîne concaténée
        "movements": "<U500",  # Liste de mouvements littéraires sous forme de chaîne concaténée
    }
)

dataframe

Unnamed: 0,poet,name,date_birth,date_death,works,movements
0,http://www.wikidata.org/entity/Q3131267,Henri Hertz,1875-06-17T00:00:00Z,1966-10-11T00:00:00Z,,
1,http://www.wikidata.org/entity/Q124713352,fr,1985-07-25T00:00:00Z,,,
2,http://www.wikidata.org/entity/Q55836844,Eugène Tourneux,1809-10-06T00:00:00Z,1867-06-26T00:00:00Z,,
3,http://www.wikidata.org/entity/Q3171429,Jean Cortot,1925-02-14T00:00:00Z,2018-12-28T00:00:00Z,,
4,http://www.wikidata.org/entity/Q85808013,Hippolyte Stupuy,1832-06-18T00:00:00Z,1900-05-01T00:00:00Z,,
5,http://www.wikidata.org/entity/Q3123819,Gérard Caramaro,1953-05-14T00:00:00Z,,,
6,http://www.wikidata.org/entity/Q447941,Alioune Badara Bèye,1945-09-28T00:00:00Z,2024-12-01T00:00:00Z,,
7,http://www.wikidata.org/entity/Q2958880,Charles Derennes,1882-08-04T00:00:00Z,1930-01-01T00:00:00Z,Le Bestiaire sentimental,
8,http://www.wikidata.org/entity/Q25982527,André Malartre,1921-04-24T00:00:00Z,1995-09-13T00:00:00Z,,
9,http://www.wikidata.org/entity/Q116474792,François Peyrey,1873-01-01T00:00:00Z,1934-06-01T00:00:00Z,,
