In [2]:
import pandas as pd
import numpy as np
import joblib
from datetime import datetime, timedelta

# Charger les données réelles météo
df = pd.read_csv("weather_data_umoa.csv")
df["date"] = pd.to_datetime(df["date"])

# Dernière ligne de chaque ville
last_entries = df.sort_values("date").groupby("city").tail(1)

# Charger modèle et encodeurs
model = joblib.load("meteo_model.pkl")
enc_city = joblib.load("city_encoder.pkl")
enc_country = joblib.load("country_encoder.pkl")
enc_desc = joblib.load("desc_encoder.pkl")

# Générer 7 jours × 24 heures (heure entière)
future_data = []
start_date = datetime.now().replace(minute=0, second=0, microsecond=0)

for _, row in last_entries.iterrows():
    try:
        city_enc = enc_city.transform([row["city"]])[0]
        country_enc = enc_country.transform([row["country"]])[0]
    except Exception as e:
        print(f"❌ Ville ignorée : {row['city']} ({row['country']}) — {e}")
        continue

    for day_offset in range(7):
        for hour in range(24):
            dt = start_date + timedelta(days=day_offset, hours=hour)
            future_data.append({
                "latitude": row["latitude"],
                "longitude": row["longitude"],
                "hour": dt.hour,
                "day": dt.day,
                "month": dt.month,
                "year": dt.year,
                "city_enc": city_enc,
                "country_enc": country_enc,
                "city": row["city"],
                "country": row["country"],
                "date": dt
            })

# Préparation et prédiction
df_inputs = pd.DataFrame(future_data)
X_pred = df_inputs[["latitude", "longitude", "hour", "day", "month", "year", "city_enc", "country_enc"]]
predictions = model.predict(X_pred)

# Création du DataFrame final
pred_df = pd.DataFrame(predictions, columns=[
    "temp", "humidity", "wind_speed", "wind_deg", "visibility",
    "sunrise_sec", "sunset_sec", "description_enc"
])

# Décodage
pred_df["description"] = enc_desc.inverse_transform(pred_df["description_enc"].astype(int))
pred_df["sunrise"] = pd.to_datetime(pred_df["sunrise_sec"], unit='s').dt.strftime("%H:%M")
pred_df["sunset"] = pd.to_datetime(pred_df["sunset_sec"], unit='s').dt.strftime("%H:%M")
pred_df["date"] = df_inputs["date"]
pred_df["city"] = df_inputs["city"]
pred_df["country"] = df_inputs["country"]

# Colonnes finales
final_df = pred_df[["date", "city", "country", "temp", "humidity", "wind_speed", "wind_deg", "visibility", "sunrise", "sunset", "description"]]

# Export CSV
final_df.to_csv("previsions_meteo_7jours_horaire.csv", index=False)
print("✅ Fichier généré : previsions_meteo_7jours_horaire.csv")


❌ Ville ignorée : Léona (SN) — y contains previously unseen labels: 'Léona'
❌ Ville ignorée : Cotonou (BJ) — y contains previously unseen labels: 'Cotonou'
✅ Fichier généré : previsions_meteo_7jours_horaire.csv
