In [40]:
import requests

def get_wikipedia_views(article: str, year: str):
    base_url = "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article"
    project = "fr.wikipedia.org"
    access = "all-access"  # (desktop + mobile)
    agent = "user"  # (exclut robots et spiders)
    granularity = "monthly"  # 🔹 Changement ici pour obtenir les vues annuelles

    start_date = f"{year}0101"  # Début de l'année
    end_date = f"{year}1231"  # Fin de l'année
   
    formatted_article = format_article_title(article)
    url = f"{base_url}/{project}/{access}/{agent}/{formatted_article}/{granularity}/{start_date}/{end_date}"
    
    #url = "https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/fr.wikipedia.org/all-access/user/Henri_Hertz/daily/20250201/20250207"
    

    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
    }

    response = requests.get(url, headers=headers)
    all_views = 0
    if response.status_code == 200:
        data = response.json()
        for item in data["items"]:
            #all_views += item["timestamp"][:8]: item["views"] for item in data["items"]
            all_views += item["views"]
        
        
    else:
        print(f"Erreur {response.status_code}: Impossible de récupérer les données.")

    return all_views


def format_article_title(name: str) -> str:
    return name.replace(" ", "_")

print(get_wikipedia_views("Harry Potter","2024"))

470002


In [38]:
import sys
from SPARQLWrapper import SPARQLWrapper, JSON
import pandas as pd


endpoint_url = "https://query.wikidata.org/sparql"

# Get cities
query = """SELECT DISTINCT ?poet ?name (MIN(?date_birth_raw) AS ?date_birth) (MIN(?date_death_raw) AS ?date_death) 
                (GROUP_CONCAT(DISTINCT ?workLabel; separator=", ") AS ?works) 
                (GROUP_CONCAT(DISTINCT ?movementLabel; separator=", ") AS ?movements)  
WHERE {
  ?poet wdt:P106 wd:Q49757.
  ?poet wdt:P1412 wd:Q150.
  ?poet wdt:P569 ?date_birth_raw.  # Date de naissance obligatoire
  OPTIONAL { ?poet wdt:P570 ?date_death_raw. } # Date de décès optionnelle
  OPTIONAL { 
    ?poet wdt:P800 ?work. 
    ?work rdfs:label ?workLabel.
    FILTER (LANG(?workLabel) = "fr") 
  } # Œuvres principales optionnelles avec leur label en français
  OPTIONAL { 
    ?poet wdt:P135 ?movement. 
    ?movement rdfs:label ?movementLabel.
    FILTER (LANG(?movementLabel) = "fr") 
  } # Mouvements artistiques optionnels avec leur label en français

  ?poet rdfs:label ?name.

  FILTER (LANG(?name) = "fr")
  
  SERVICE wikibase:label { bd:serviceParam wikibase:language "fr". }
}
GROUP BY ?poet ?name


LIMIT 11
"""


def get_results(endpoint_url, query):
    user_agent = "WDQS-example Python/%s.%s" % (
        sys.version_info[0],
        sys.version_info[1],
    )
    sparql = SPARQLWrapper(endpoint_url, agent=user_agent)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()


array = []
results = get_results(endpoint_url, query)

for result in results["results"]["bindings"]:
    name = result["name"]["value"]
    nbr_views = get_wikipedia_views(name, "2024")
    print(nbr_views)
    # Tester si avec api : https://wikimedia.org/api/rest_v1/metrics/pageviews/per-article/fr.wikipedia.org/all-access/user/Henri_Hertz/daily/20250201/20250207

    array.append(
        (
            result["poet"]["value"],
            name,
            result["date_birth"]["value"],
            result.get("date_death", {}).get("value", None),  # Vérification clé absente
            result.get("works", {}).get("value", ""),  # Vérification clé absente
            result.get("movements", {}).get("value", ""),
            nbr_views,

        )
    )

dataframe = pd.DataFrame(array, columns=["poet", "name", "date_birth", "date_death", "works", "movements", "popularity"])
dataframe = dataframe.astype(
    dtype={
        "poet": "<U200",       # Chaîne Unicode jusqu'à 200 caractères
        "name": "<U200",       # Nom du poète (chaîne)
        "date_birth": "<U200",  # Date de naissance (datetime)
        "date_death": "<U200",  # Date de décès (datetime, peut contenir NaT)
        "works": "<U1000",     # Liste d'œuvres sous forme de chaîne concaténée
        "movements": "<U500",  # Liste de mouvements littéraires sous forme de chaîne concaténée
        "popularity":"int32"
    }
)

dataframe

498
155
177
2024
Erreur 404: Impossible de récupérer les données.
0
Erreur 404: Impossible de récupérer les données.
0
3335
577
123
Erreur 404: Impossible de récupérer les données.
0
4514


Unnamed: 0,poet,name,date_birth,date_death,works,movements,popularity
0,http://www.wikidata.org/entity/Q3131267,Henri Hertz,1875-06-17T00:00:00Z,1966-10-11T00:00:00Z,,,498
1,http://www.wikidata.org/entity/Q124713352,fr,1985-07-25T00:00:00Z,,,,155
2,http://www.wikidata.org/entity/Q55836844,Eugène Tourneux,1809-10-06T00:00:00Z,1867-06-26T00:00:00Z,,,177
3,http://www.wikidata.org/entity/Q3171429,Jean Cortot,1925-02-14T00:00:00Z,2018-12-28T00:00:00Z,,,2024
4,http://www.wikidata.org/entity/Q85808013,Hippolyte Stupuy,1832-06-18T00:00:00Z,1900-05-01T00:00:00Z,,,0
5,http://www.wikidata.org/entity/Q3123819,Gérard Caramaro,1953-05-14T00:00:00Z,,,,0
6,http://www.wikidata.org/entity/Q447941,Alioune Badara Bèye,1945-09-28T00:00:00Z,2024-12-01T00:00:00Z,,,3335
7,http://www.wikidata.org/entity/Q2958880,Charles Derennes,1882-08-04T00:00:00Z,1930-01-01T00:00:00Z,Le Bestiaire sentimental,,577
8,http://www.wikidata.org/entity/Q25982527,André Malartre,1921-04-24T00:00:00Z,1995-09-13T00:00:00Z,,,123
9,http://www.wikidata.org/entity/Q116474792,François Peyrey,1873-01-01T00:00:00Z,1934-06-01T00:00:00Z,,,0


In [39]:
# Convertir data frame en Json : 
import json

def save_dataframe_to_json(dataframe: pd.DataFrame, filename: str, orient: str = "records", indent: int = 4):
    """
    Convertit un DataFrame pandas en JSON et l'enregistre dans un fichier.

    :param dataframe: Le DataFrame à convertir.
    :param filename: Nom du fichier de sortie (ex: "data.json").
    :param orient: Format du JSON (ex: "records", "split", "index", "columns", "values").
    :param indent: Nombre d'espaces pour l'indentation du JSON.
    """
    json_data = dataframe.to_json(orient=orient, indent=indent, force_ascii=False)
    
    with open(filename, "w", encoding="utf-8") as file:
        file.write(json_data)
save_dataframe_to_json(dataframe, "data.json")