In [None]:
import requests
import datetime
import pandas as pd

# EXTRACT DATA ABOUT WEATHER IN 5 TOURIST CITYS AROUND WORLD


cities = {
    "Madrid": (40.4168, -3.7038),
    "Lima": (-12.0464, -77.0428),
    "Los Angeles": (34.0522, -118.2437),
    "Paris": (48.8566, 2.3522),
    "Ciudad de Mexico": (19.4326, -99.1332)
}

end_date = datetime.date.today()
start_date = end_date - datetime.timedelta(days=5*365)  # ~5 años

parameters = [
    "T2M_MAX",       # Temp máxima
    "T2M_MIN",       # Temp mínima
    "T2M",           # Temp media
    "RH2M",          # Humedad relativa
    "WS2M",          # Viento a 2m
    "PRECTOTCORR",   # Precipitación
    "ALLSKY_SFC_SW_DWN", # Radiación solar
    "ALLSKY_KT"      # Nubosidad (índice de claridad)
]

all_data = pd.DataFrame()

for city, (lat, lon) in cities.items():
    url = (
        "https://power.larc.nasa.gov/api/temporal/daily/point"
        f"?parameters={','.join(parameters)}"
        f"&community=AG"
        f"&longitude={lon}&latitude={lat}"
        f"&start={start_date.strftime('%Y%m%d')}"
        f"&end={end_date.strftime('%Y%m%d')}"
        f"&format=JSON"
    )

    print(f"📡 Consultando datos para {city}...")
    response = requests.get(url)

    if response.status_code == 200:
        data = response.json()
        daily = data["properties"]["parameter"]

        df = pd.DataFrame({
            "date": list(daily["T2M_MAX"].keys()),
            "temp_max": list(daily["T2M_MAX"].values()),
            "temp_min": list(daily["T2M_MIN"].values()),
            "temp_mean": list(daily["T2M"].values()),
            "humidity": list(daily["RH2M"].values()),
            "wind_speed": list(daily["WS2M"].values()),
            "precip_mm": list(daily["PRECTOTCORR"].values()),
            "solar_radiation": list(daily["ALLSKY_SFC_SW_DWN"].values()),
            "cloud_index": list(daily["ALLSKY_KT"].values())
        })

        df["date"] = pd.to_datetime(df["date"], format="%Y%m%d")
        df["city"] = city  # ➕ Columna ciudad

        all_data = pd.concat([all_data, df], ignore_index=True)

    else:
        print(f"❌ Error al consultar {city}: {response.status_code}")

all_data.reset_index(inplace=True)
all_data.rename(columns={"index": "id"}, inplace=True)
all_data["id"] = all_data["id"] + 1  # que empiece en 1

all_data.to_csv("weather_data2.csv", index=False)
print("\n✅ Datos guardados en weather_data.csv")
print(all_data.head())


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score, mean_squared_error

# ------------------------------
# LOAD DATASET WITHOUT NULL
# ------------------------------
df = pd.read_csv("weather_data.csv")
df['date'] = pd.to_datetime(df['date'])
df.replace(-999, np.nan, inplace=True)
df.dropna(inplace=True)

# ------------------------------
# ENCODE CITY
# ------------------------------
le_city = LabelEncoder()
df['city_encoded'] = le_city.fit_transform(df['city'])

# ------------------------------
# EXTRACT DATE
# ------------------------------
df['year'] = df['date'].dt.year
df['month'] = df['date'].dt.month
df['day'] = df['date'].dt.day

# ------------------------------
# VARIABLES
# ------------------------------
X = df[['city_encoded', 'year', 'month', 'day']]
y = df[['temp_max', 'temp_min', 'temp_mean', 'humidity',
        'wind_speed', 'precip_mm', 'solar_radiation', 'cloud_index']]

# ------------------------------
# SEPARATE DATASET IN TRAIN AND TEST
# ------------------------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ------------------------------
# TRAIN WITH RANDOM FOREST
# ------------------------------
rf_model = RandomForestRegressor(
    n_estimators=500,
    max_depth=15,
    random_state=42,
    n_jobs=-1
)
rf_model.fit(X_train, y_train)

# ------------------------------
# TEST MODEL
# ------------------------------
y_pred = rf_model.predict(X_test)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"R²: {r2:.3f}")
print(f"RMSE: {rmse:.3f}")

# ------------------------------
# PREDICTION FUNCTIO
# ------------------------------
def predecir_fecha_ciudad_rf(ciudad, fecha):
    fecha = pd.to_datetime(fecha)
    ciudad_enc = le_city.transform([ciudad])[0]

    X_pred = pd.DataFrame([{
        'city_encoded': ciudad_enc,
        'year': fecha.year,
        'month': fecha.month,
        'day': fecha.day
    }])

    pred = rf_model.predict(X_pred)

    return {
        'max_temp': float(pred[0][0]),
        'min_temp': float(pred[0][1]),
        'pro_temp': float(pred[0][2]),
        'pro_rain': float(pred[0][5]),       # precip_mm
        'cloud_cover': float(pred[0][7])     # cloud_index
    }


In [None]:
resultado = predecir_fecha_ciudad_rf("Madrid", "2025-10-05")
print("\nPredicción Random Forest para Madrid 2025-10-10:")
print(resultado)

In [None]:
import pickle

#SAVE MODEL AND ENODER IN A .PKL TO USE IN BACKEND

modelo_completo = {
    'model': rf_model,
    'label_encoder': le_city
}

with open("rf_weather_full.pkl", "wb") as f:
    pickle.dump(modelo_completo, f)

print("Modelo y LabelEncoder guardados en rf_weather_full.pkl")
