In [None]:
import time
from datetime import time
import pandas as pd
from datetime import datetime, timedelta
import os
import json

In [None]:
from modules.scrapping_functions import init_driver, fetch_json_event

# Récupération sur OpenAgenda

In [None]:
# Liste des ID de lieux (plusieurs ID possible pour un seul lieu)
location_uids = [
    "47538434", "21547859", "95545941", "41936666", "34310089",
    "57179132", "57196237", "79753849", "91702963", "27572337", "45334118"
]

start_date = datetime(2025, 2, 15)
api_base_url = "https://openagenda.com/fr/nantesmetropole?timings%5Bgte%5D={start}&timings%5Blte%5D={end}&timings%5Btz%5D=Europe%2FParis&locationUid%5B0%5D={location}"

os.makedirs("json_data", exist_ok=True)

driver = init_driver()

dataframes = []

# Boucle sur les mois jusqu’en décembre
while start_date.year == 2025:
    first_day = start_date if start_date.month == 2 else datetime(start_date.year, start_date.month, 1)
    next_month = first_day.replace(day=28) + timedelta(days=4)  
    last_day = next_month.replace(day=1) - timedelta(seconds=1)  

    for location_uid in location_uids:
        start_iso = first_day.isoformat() + "Z"
        end_iso = last_day.isoformat() + "Z"
        url = api_base_url.format(start=start_iso, end=end_iso, location=location_uid)

        print(f"Collecting data for {first_day.strftime('%Y-%m')} - Location {location_uid}...")

        # Ajout de 3 tentatives avant d’abandonner
        for attempt in range(3):
            response_json = fetch_json_event(url, driver)
            if response_json:
                break
            print(f"Tentative {attempt + 1} échouée, nouvelle tentative...")
            time.sleep(5) 

        if response_json:
            file_name = f"json_data/{first_day.strftime('%Y-%m')}_{location_uid}.json"
            with open(file_name, "w", encoding="utf-8") as f:
                json.dump(response_json, f, indent=4, ensure_ascii=False)


        time.sleep(3)

    start_date = next_month  

# Fermer Selenium après toutes les requêtes
driver.quit()


📡 Fetching data for 2025-12 - Location 47538434...
📡 Fetching data for 2025-12 - Location 21547859...
📡 Fetching data for 2025-12 - Location 95545941...
📡 Fetching data for 2025-12 - Location 41936666...
📡 Fetching data for 2025-12 - Location 34310089...
📡 Fetching data for 2025-12 - Location 57179132...
📡 Fetching data for 2025-12 - Location 57196237...
📡 Fetching data for 2025-12 - Location 79753849...
📡 Fetching data for 2025-12 - Location 91702963...
📡 Fetching data for 2025-12 - Location 27572337...
📡 Fetching data for 2025-12 - Location 45334118...


In [None]:
# Dossier contenant les fichiers JSON
folder_path = "data/raw/json_data"

event_rows = []

# Lire tous les fichiers JSON
for file_name in os.listdir(folder_path):
    if file_name.endswith(".json"):  
        file_path = os.path.join(folder_path, file_name)
        
        with open(file_path, "r", encoding="utf-8") as f:
            json_data = json.load(f)

        # Extraire le mois et le location_uid depuis le nom du fichier
        parts = file_name.replace(".json", "").split("_")
        month = parts[0]  
        location_uid = parts[1]  

        # Vérifier si @graph contient des événements
        if "@graph" in json_data and len(json_data["@graph"]) > 0:
            for event in json_data["@graph"]:
                event_data = {
                    "event_id": event.get("@id", ""),
                    "name": event.get("name", ""),
                    "description": event.get("description", ""),
                    "startDate": event.get("startDate", ""),
                    "endDate": event.get("endDate", ""),
                    "eventAttendanceMode": event.get("eventAttendanceMode", ""),
                    "eventStatus": event.get("eventStatus", ""),
                    "image": event.get("image", ""),
                    "url": event.get("url", ""),
                    "location_name": event.get("location", {}).get("name", ""),
                    "address": event.get("location", {}).get("address", {}).get("streetAddress", ""),
                    "city": event.get("location", {}).get("address", {}).get("addressLocality", ""),
                    "region": event.get("location", {}).get("address", {}).get("addressRegion", ""),
                    "postalCode": event.get("location", {}).get("address", {}).get("postalCode", ""),
                    "country": event.get("location", {}).get("address", {}).get("addressCountry", ""),
                    "latitude": event.get("location", {}).get("geo", {}).get("latitude", ""),
                    "longitude": event.get("location", {}).get("geo", {}).get("longitude", ""),
                    "month": month,
                    "location_uid": location_uid
                }
                event_rows.append(event_data)

df_clean = pd.DataFrame(event_rows)

df_clean.to_csv("/Users/manu/Desktop/SUP/Projet 2/AI_model_urban_mobility/data/preprocessed/event_data_nanntes_last_version.csv", index=False, encoding="utf-8")

print("Extraction et sauvegarde des événements terminées !")


Extraction et sauvegarde des événements terminées !
