In [4]:
# ===============================================
# üå¶Ô∏è G√©n√©ration des donn√©es externes de Paris
# (M√©t√©o, jours f√©ri√©s, vacances scolaires, √©v√©nements)
# ===============================================

import pandas as pd
import requests
from datetime import datetime, timedelta
import os

# ------------------------------------------------------------
# üìÅ 0Ô∏è‚É£ Cr√©ation du dossier de sortie
# ------------------------------------------------------------
output_dir = "external_data"
os.makedirs(output_dir, exist_ok=True)

# ------------------------------------------------------------
# 1Ô∏è‚É£ Grille temporelle horaire
# ------------------------------------------------------------
start = datetime(2023, 10, 1)
end = datetime(2024, 11, 11, 23)
hours = pd.date_range(start=start, end=end, freq="H")

features = pd.DataFrame({"time": hours})
features["date"] = features["time"].dt.date
features["hour"] = features["time"].dt.hour

# ------------------------------------------------------------
# 2Ô∏è‚É£ Donn√©es m√©t√©o (Open-Meteo)
# ------------------------------------------------------------
print("T√©l√©chargement des donn√©es m√©t√©o...")

try:
    meteo_params = {
        "latitude": 48.8566,
        "longitude": 2.3522,
        "start_date": start.strftime("%Y-%m-%d"),
        "end_date": end.strftime("%Y-%m-%d"),
        "hourly": "temperature_2m,precipitation,cloud_cover,wind_speed_10m",
        "timezone": "Europe/Paris",
    }

    r = requests.get("https://archive-api.open-meteo.com/v1/archive", params=meteo_params)
    r.raise_for_status()
    meteo = pd.DataFrame(r.json()["hourly"])
    meteo["time"] = pd.to_datetime(meteo["time"])
    meteo.to_csv(os.path.join(output_dir, "meteo_raw.csv"), index=False)

    features = features.merge(meteo, on="time", how="left")
except Exception as e:
    print("‚ö†Ô∏è Erreur t√©l√©chargement m√©t√©o :", e)

# ------------------------------------------------------------
# 3Ô∏è‚É£ Jours f√©ri√©s
# ------------------------------------------------------------
print("T√©l√©chargement des jours f√©ri√©s...")

try:
    feries_url = "https://etalab.github.io/jours-feries-france-data/csv/jours_feries_metropole.csv"
    feries = pd.read_csv(feries_url)
    feries["date"] = pd.to_datetime(feries["date"]).dt.date
    feries["is_holiday"] = 1
    feries.to_csv(os.path.join(output_dir, "jours_feries.csv"), index=False)

    features = features.merge(feries[["date", "is_holiday"]], on="date", how="left").fillna({"is_holiday": 0})
except Exception as e:
    print("‚ö†Ô∏è Erreur t√©l√©chargement jours f√©ri√©s :", e)
    features["is_holiday"] = 0

# ------------------------------------------------------------
# 4Ô∏è‚É£ Vacances scolaires (Zone C)
# ------------------------------------------------------------
print("T√©l√©chargement des vacances scolaires...")

try:
    vac_url = "https://data.education.gouv.fr/explore/dataset/fr-en-calendrier-scolaire/download/?format=csv"
    vac = pd.read_csv(vac_url, sep=";", engine="python", on_bad_lines="skip")
    vac = vac[vac["zones"].str.contains("Zone C", na=False)]
    vac["date_debut"] = pd.to_datetime(vac["date_debut"], errors="coerce")
    vac["date_fin"] = pd.to_datetime(vac["date_fin"], errors="coerce")
    vac = vac.dropna(subset=["date_debut", "date_fin"])
    vac.to_csv(os.path.join(output_dir, "vacances_zoneC.csv"), index=False)

    def in_vacation(date):
        for _, row in vac.iterrows():
            if row["date_debut"].date() <= date <= row["date_fin"].date():
                return 1
        return 0

    features["is_vacation"] = features["date"].apply(in_vacation)
except Exception as e:
    print("‚ö†Ô∏è Erreur t√©l√©chargement vacances :", e)
    features["is_vacation"] = 0

# ------------------------------------------------------------
# 5Ô∏è‚É£ √âv√©nements publics (Paris)
# ------------------------------------------------------------
print("T√©l√©chargement des √©v√©nements...")

try:
    events_url = "https://opendata.paris.fr/explore/dataset/que-faire-a-paris-/download/?format=csv"
    events = pd.read_csv(events_url, sep=";", engine="python", on_bad_lines="skip", low_memory=False)
    events["date_start"] = pd.to_datetime(events["date_start"], errors="coerce")
    events["date_end"] = pd.to_datetime(events["date_end"], errors="coerce")
    events = events.dropna(subset=["date_start", "date_end"])
    events = events[["date_start", "date_end", "address_text"]]
    events.to_csv(os.path.join(output_dir, "evenements_raw.csv"), index=False)

    def has_event(date):
        for _, row in events.iterrows():
            if row["date_start"].date() <= date <= row["date_end"].date():
                return 1
        return 0

    features["has_event"] = features["date"].apply(has_event)
except Exception as e:
    print("‚ö†Ô∏è Erreur t√©l√©chargement √©v√©nements :", e)
    features["has_event"] = 0

# ------------------------------------------------------------
# 6Ô∏è‚É£ Sauvegarde finale
# ------------------------------------------------------------
output_path = os.path.join(output_dir, "external_features_paris.csv")
features.to_csv(output_path, index=False)
print(f"\n‚úÖ Donn√©es externes fusionn√©es sauvegard√©es dans : {output_path}\n")

# Aper√ßu
print(features.head())


T√©l√©chargement des donn√©es m√©t√©o...
T√©l√©chargement des jours f√©ri√©s...
T√©l√©chargement des vacances scolaires...
‚ö†Ô∏è Erreur t√©l√©chargement vacances : 'date_debut'
T√©l√©chargement des √©v√©nements...
‚ö†Ô∏è Erreur t√©l√©chargement √©v√©nements : The 'low_memory' option is not supported with the 'python' engine

‚úÖ Donn√©es externes fusionn√©es sauvegard√©es dans : external_data\external_features_paris.csv

                 time        date  hour  temperature_2m  precipitation  \
0 2023-10-01 00:00:00  2023-10-01     0            14.2            0.0   
1 2023-10-01 01:00:00  2023-10-01     1            13.9            0.0   
2 2023-10-01 02:00:00  2023-10-01     2            13.5            0.0   
3 2023-10-01 03:00:00  2023-10-01     3            13.2            0.0   
4 2023-10-01 04:00:00  2023-10-01     4            12.8            0.0   

   cloud_cover  wind_speed_10m  is_holiday  is_vacation  has_event  
0            0             7.5         0.0            0     