In [25]:
import time
from datetime import time
import pandas as pd
from datetime import datetime, timedelta
import os
import json
import uuid

In [2]:
from modules.scrapping_functions import init_driver, fetch_json_event

# Récupération sur OpenAgenda

In [None]:
# Liste des ID de lieux (plusieurs ID possible pour un seul lieu)
location_uids = [
    "47538434", "21547859", "95545941", "41936666", "34310089",
    "57179132", "57196237", "79753849", "91702963", "27572337", "45334118"
]

start_date = datetime(2025, 2, 15)
api_base_url = "https://openagenda.com/fr/nantesmetropole?timings%5Bgte%5D={start}&timings%5Blte%5D={end}&timings%5Btz%5D=Europe%2FParis&locationUid%5B0%5D={location}"

os.makedirs("json_data", exist_ok=True)

driver = init_driver()

dataframes = []

# Boucle sur les mois jusqu’en décembre
while start_date.year == 2025:
    first_day = start_date if start_date.month == 2 else datetime(start_date.year, start_date.month, 1)
    next_month = first_day.replace(day=28) + timedelta(days=4)  
    last_day = next_month.replace(day=1) - timedelta(seconds=1)  

    for location_uid in location_uids:
        start_iso = first_day.isoformat() + "Z"
        end_iso = last_day.isoformat() + "Z"
        url = api_base_url.format(start=start_iso, end=end_iso, location=location_uid)

        print(f"Collecting data for {first_day.strftime('%Y-%m')} - Location {location_uid}...")

        # Ajout de 3 tentatives avant d’abandonner
        for attempt in range(3):
            response_json = fetch_json_event(url, driver)
            if response_json:
                break
            print(f"Tentative {attempt + 1} échouée, nouvelle tentative...")
            time.sleep(5) 

        if response_json:
            file_name = f"json_data/{first_day.strftime('%Y-%m')}_{location_uid}.json"
            with open(file_name, "w", encoding="utf-8") as f:
                json.dump(response_json, f, indent=4, ensure_ascii=False)


        time.sleep(3)

    start_date = next_month  

# Fermer Selenium après toutes les requêtes
driver.quit()


📡 Fetching data for 2025-12 - Location 47538434...
📡 Fetching data for 2025-12 - Location 21547859...
📡 Fetching data for 2025-12 - Location 95545941...
📡 Fetching data for 2025-12 - Location 41936666...
📡 Fetching data for 2025-12 - Location 34310089...
📡 Fetching data for 2025-12 - Location 57179132...
📡 Fetching data for 2025-12 - Location 57196237...
📡 Fetching data for 2025-12 - Location 79753849...
📡 Fetching data for 2025-12 - Location 91702963...
📡 Fetching data for 2025-12 - Location 27572337...
📡 Fetching data for 2025-12 - Location 45334118...


# Data cleaning

In [None]:
# Dossier contenant les fichiers JSON
folder_path = "AI_model_urban_mobility/data/raw/json_data"

event_rows = []

# Lire tous les fichiers JSON
for file_name in os.listdir(folder_path):
    if file_name.endswith(".json"):  
        file_path = os.path.join(folder_path, file_name)
        
        with open(file_path, "r", encoding="utf-8") as f:
            json_data = json.load(f)

        # Extraire le mois et le location_uid depuis le nom du fichier
        parts = file_name.replace(".json", "").split("_")
        location_uid = parts[1]  

        # Vérifier si @graph contient des événements
        if "@graph" in json_data and len(json_data["@graph"]) > 0:
            for event in json_data["@graph"]:
                event_data = {
                    "event_id": event.get("@id", ""),
                    "name": event.get("name", ""),
                    "description": event.get("description", ""),
                    "startDate": event.get("startDate", ""),
                    "endDate": event.get("endDate", ""),
                    "eventAttendanceMode": event.get("eventAttendanceMode", ""),
                    "eventStatus": event.get("eventStatus", ""),
                    "image": event.get("image", ""),
                    "url": event.get("url", ""),
                    "location_name": event.get("location", {}).get("name", ""),
                    "address": event.get("location", {}).get("address", {}).get("streetAddress", ""),
                    "city": event.get("location", {}).get("address", {}).get("addressLocality", ""),
                    "region": event.get("location", {}).get("address", {}).get("addressRegion", ""),
                    "postalCode": event.get("location", {}).get("address", {}).get("postalCode", ""),
                    "country": event.get("location", {}).get("address", {}).get("addressCountry", ""),
                    "latitude": event.get("location", {}).get("geo", {}).get("latitude", ""),
                    "longitude": event.get("location", {}).get("geo", {}).get("longitude", ""),
                    "location_uid": location_uid
                }
                event_rows.append(event_data)

df_clean = pd.DataFrame(event_rows)

print("Extraction et sauvegarde des événements terminées !")


Extraction et sauvegarde des événements terminées !


In [18]:
df_clean.head()

Unnamed: 0,event_id,name,description,startDate,endDate,eventAttendanceMode,eventStatus,image,url,location_name,address,city,region,postalCode,country,latitude,longitude,location_uid
0,https://openagenda.com/nantesmetropole/events/...,Football : FC Nantes / Paris SG,Match - 29e journée - Ligue 1 - Saison 2024/2025,2025-04-13T17:00:00+02:00,2025-04-13T19:00:00+02:00,https://schema.org/OfflineEventAttendanceMode,https://schema.org/EventScheduled,https://cdn.openagenda.com/main/04f31a30c11f4e...,https://openagenda.com/nantesmetropole/events/...,Stade de la Beaujoire Louis Fonteneau,"330 Route de Saint Joseph, Nantes",Nantes,Pays de la Loire,44300,FR,47.25867,-1.527812,95545941
1,https://openagenda.com/nantesmetropole/events/...,Football : FC Nantes / Toulouse FC,Match - 31e journée - Ligue 1 - Saison 2024/2025,2025-04-27T17:00:00+02:00,2025-04-27T19:00:00+02:00,https://schema.org/OfflineEventAttendanceMode,https://schema.org/EventScheduled,https://cdn.openagenda.com/main/e0644760d0e54b...,https://openagenda.com/nantesmetropole/events/...,Stade de la Beaujoire Louis Fonteneau,"330 Route de Saint Joseph, Nantes",Nantes,Pays de la Loire,44300,FR,47.25867,-1.527812,95545941
2,https://openagenda.com/nantesmetropole/events/...,Blandine Lehout - La Vie de ta mère,Blandine Lehout - La Vie de ta mère,2025-10-01T20:00:00+02:00,2025-10-01T22:30:00+02:00,https://schema.org/OfflineEventAttendanceMode,https://schema.org/EventScheduled,https://cdn.openagenda.com/main/102d0d4b300344...,https://openagenda.com/nantesmetropole/events/...,Cité internationale des Congrès,"5 Rue de Valmy, Nantes",Nantes,Pays de la Loire,44000,FR,47.213314,-1.544058,47538434
3,https://openagenda.com/nantesmetropole/events/...,Alexandre Kominek - Bâtard sensible,Alexandre Kominek - Bâtard sensible,2025-03-01T20:30:00+01:00,2025-03-01T22:00:00+01:00,https://schema.org/OfflineEventAttendanceMode,https://schema.org/EventScheduled,https://cdn.openagenda.com/main/b97f52494c2b45...,https://openagenda.com/nantesmetropole/events/...,Cité internationale des Congrès,"5 Rue de Valmy, Nantes",Nantes,Pays de la Loire,44000,FR,47.213314,-1.544058,47538434
4,https://openagenda.com/nantesmetropole/events/...,Comédie Le Clan des divorcées,Comédie Le Clan des divorcées,2025-03-02T15:00:00+01:00,2025-03-02T16:40:00+01:00,https://schema.org/OfflineEventAttendanceMode,https://schema.org/EventScheduled,https://cdn.openagenda.com/main/f77c73aed44949...,https://openagenda.com/nantesmetropole/events/...,Cité internationale des Congrès,"5 Rue de Valmy, Nantes",Nantes,Pays de la Loire,44000,FR,47.213314,-1.544058,47538434


In [19]:
df_clean = df_clean.drop_duplicates()

In [20]:
column_to_drop = ['event_id', 'eventAttendanceMode', 'eventStatus', 'image', 'url', 'region', 'country']

In [21]:
df_clean = df_clean.drop(columns=column_to_drop)

In [22]:
df_clean.head()

Unnamed: 0,name,description,startDate,endDate,location_name,address,city,postalCode,latitude,longitude,location_uid
0,Football : FC Nantes / Paris SG,Match - 29e journée - Ligue 1 - Saison 2024/2025,2025-04-13T17:00:00+02:00,2025-04-13T19:00:00+02:00,Stade de la Beaujoire Louis Fonteneau,"330 Route de Saint Joseph, Nantes",Nantes,44300,47.25867,-1.527812,95545941
1,Football : FC Nantes / Toulouse FC,Match - 31e journée - Ligue 1 - Saison 2024/2025,2025-04-27T17:00:00+02:00,2025-04-27T19:00:00+02:00,Stade de la Beaujoire Louis Fonteneau,"330 Route de Saint Joseph, Nantes",Nantes,44300,47.25867,-1.527812,95545941
2,Blandine Lehout - La Vie de ta mère,Blandine Lehout - La Vie de ta mère,2025-10-01T20:00:00+02:00,2025-10-01T22:30:00+02:00,Cité internationale des Congrès,"5 Rue de Valmy, Nantes",Nantes,44000,47.213314,-1.544058,47538434
3,Alexandre Kominek - Bâtard sensible,Alexandre Kominek - Bâtard sensible,2025-03-01T20:30:00+01:00,2025-03-01T22:00:00+01:00,Cité internationale des Congrès,"5 Rue de Valmy, Nantes",Nantes,44000,47.213314,-1.544058,47538434
4,Comédie Le Clan des divorcées,Comédie Le Clan des divorcées,2025-03-02T15:00:00+01:00,2025-03-02T16:40:00+01:00,Cité internationale des Congrès,"5 Rue de Valmy, Nantes",Nantes,44000,47.213314,-1.544058,47538434


In [23]:
df_clean['coordinates_geo'] = df_clean.apply(lambda x: "{" + str(x['longitude']) + ", " + str(x['latitude']) + "}", axis=1)

In [28]:
df_clean.drop(columns=['latitude', 'longitude'], inplace = True)

In [29]:
df_clean.head()

Unnamed: 0,name,description,startDate,endDate,location_name,address,city,postalCode,location_uid,coordinates_geo,event_id
0,Football : FC Nantes / Paris SG,Match - 29e journée - Ligue 1 - Saison 2024/2025,2025-04-13T17:00:00+02:00,2025-04-13T19:00:00+02:00,Stade de la Beaujoire Louis Fonteneau,"330 Route de Saint Joseph, Nantes",Nantes,44300,95545941,"{-1.527812, 47.25867}",926afe1f-3bcc-4cc1-a2bd-3597dd36d86d
1,Football : FC Nantes / Toulouse FC,Match - 31e journée - Ligue 1 - Saison 2024/2025,2025-04-27T17:00:00+02:00,2025-04-27T19:00:00+02:00,Stade de la Beaujoire Louis Fonteneau,"330 Route de Saint Joseph, Nantes",Nantes,44300,95545941,"{-1.527812, 47.25867}",19bb79da-1c20-41a2-a53d-c45bcae5a0ce
2,Blandine Lehout - La Vie de ta mère,Blandine Lehout - La Vie de ta mère,2025-10-01T20:00:00+02:00,2025-10-01T22:30:00+02:00,Cité internationale des Congrès,"5 Rue de Valmy, Nantes",Nantes,44000,47538434,"{-1.544058, 47.213314}",da51b672-08bc-49e6-bcd6-516090c48e77
3,Alexandre Kominek - Bâtard sensible,Alexandre Kominek - Bâtard sensible,2025-03-01T20:30:00+01:00,2025-03-01T22:00:00+01:00,Cité internationale des Congrès,"5 Rue de Valmy, Nantes",Nantes,44000,47538434,"{-1.544058, 47.213314}",209c09e0-8d45-4429-ab15-7e1bf979f9c1
4,Comédie Le Clan des divorcées,Comédie Le Clan des divorcées,2025-03-02T15:00:00+01:00,2025-03-02T16:40:00+01:00,Cité internationale des Congrès,"5 Rue de Valmy, Nantes",Nantes,44000,47538434,"{-1.544058, 47.213314}",2eae861f-2e7f-4ff4-8c77-8f72b719fb23


In [26]:
df_clean['event_id'] = [str(uuid.uuid4()) for _ in range(len(df_clean))]

In [None]:
#df_clean.to_csv("AI_model_urban_mobility/data/preprocessed/event_data_nantes_last_version.csv", index=False, encoding="utf-8")
