In [None]:
import os
from pathlib import Path
import json
import joblib
import numpy as np
import datetime
import pandas as pd

# Cargar el archivo CSV
df = pd.read_csv("power_yucatan_25yrs.csv")

# Convertir la columna 'date' desde formato yyyymmdd
df['date'] = pd.to_datetime(df['date'].astype(str), format="%Y%m%d")

# Extraer variables temporales
df['DOY'] = df['date'].dt.dayofyear
df['MONTH'] = df['date'].dt.month
df['YEAR'] = df['date'].dt.year


# Transformar la precipitación para mejorar el aprendizaje
df['PRECTOTCORR_log'] = np.log1p(df['PRECTOTCORR'])  # log(1 + x)

# Verifica columnas disponibles
print(df.columns)
df.head()

Index(['date', 'lat', 'lon', 'PRECTOTCORR', 'T2M_MAX', 'T2M_MIN', 'RH2M',
       'WS10M', 'ALLSKY_SFC_SW_DWN', 'DOY', 'MONTH', 'YEAR',
       'PRECTOTCORR_log'],
      dtype='object')


Unnamed: 0,date,lat,lon,PRECTOTCORR,T2M_MAX,T2M_MIN,RH2M,WS10M,ALLSKY_SFC_SW_DWN,DOY,MONTH,YEAR,PRECTOTCORR_log
0,1998-01-01,18.0,-90.0,2.67,22.79,13.84,91.55,1.26,13.56,1,1,1998,1.300192
1,1998-01-02,18.0,-90.0,10.93,24.5,14.65,94.3,1.01,11.17,2,1,1998,2.479056
2,1998-01-03,18.0,-90.0,5.84,26.57,19.78,94.18,1.17,9.82,3,1,1998,1.922788
3,1998-01-04,18.0,-90.0,1.03,27.19,20.47,91.01,1.05,9.81,4,1,1998,0.708036
4,1998-01-05,18.0,-90.0,0.25,27.67,20.96,89.5,1.04,12.98,5,1,1998,0.223144


In [38]:
# Entradas: solo fecha y coordenadas
input_features = ['DOY', 'MONTH', 'YEAR', 'lat', 'lon']

# Salidas: variables climáticas
output_features = ['PRECTOTCORR_log', 'T2M_MAX', 'T2M_MIN', 'RH2M', 'WS10M', 'ALLSKY_SFC_SW_DWN']

X = df[input_features].values
y = df[output_features].values

In [39]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Dividir
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Escalar
scaler_input = MinMaxScaler()
scaler_output = MinMaxScaler()

X_train_scaled = scaler_input.fit_transform(X_train)
X_test_scaled = scaler_input.transform(X_test)

y_train_scaled = scaler_output.fit_transform(y_train)
y_test_scaled = scaler_output.transform(y_test)

In [51]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping

model = Sequential()
model.add(Dense(128, input_shape=(X_train_scaled.shape[1],)))
model.add(LeakyReLU(alpha=0.1))
model.add(Dropout(0.3))

model.add(Dense(64))
model.add(LeakyReLU(alpha=0.1))
model.add(Dropout(0.3))

model.add(Dense(32))
model.add(LeakyReLU(alpha=0.1))

model.add(Dense(6))  # 6 salidas climáticas

model.compile(optimizer='adam', loss='mae')
model.summary()
model.compile(optimizer='adam', loss='mae')
model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [52]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(
    X_train_scaled, y_train_scaled,
    epochs=20,
    batch_size=8,
    validation_data=(X_test_scaled, y_test_scaled),
    callbacks=[early_stop],
    verbose=1
)

Epoch 1/20
[1m44742/44742[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m279s[0m 6ms/step - loss: 0.0768 - val_loss: 0.0762
Epoch 2/20
[1m44742/44742[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m291s[0m 5ms/step - loss: 0.0740 - val_loss: 0.0745
Epoch 3/20
[1m44742/44742[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 3ms/step - loss: 0.0735 - val_loss: 0.0746
Epoch 4/20
[1m44742/44742[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m463s[0m 10ms/step - loss: 0.0733 - val_loss: 0.0755
Epoch 5/20
[1m44742/44742[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m336s[0m 8ms/step - loss: 0.0732 - val_loss: 0.0748
Epoch 6/20
[1m44742/44742[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m228s[0m 5ms/step - loss: 0.0731 - val_loss: 0.0775
Epoch 7/20
[1m44742/44742[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m129s[0m 3ms/step - loss: 0.0730 - val_loss: 0.0756
Epoch 8/20
[1m44742/44742[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 4ms/step - loss: 0.0730 - val_loss

In [53]:
# Predicción
y_pred_scaled = model.predict(X_test_scaled)

# Inversión de escala
y_test_inv = scaler_output.inverse_transform(y_test_scaled)
y_pred_inv = scaler_output.inverse_transform(y_pred_scaled)

# Inversión de logaritmo solo para PRECTOTCORR
y_pred_inv[:, 0] = np.expm1(y_pred_inv[:, 0])
y_test_inv[:, 0] = np.expm1(y_test_inv[:, 0])


# Métricas
from sklearn.metrics import mean_absolute_error, r2_score

mae = mean_absolute_error(y_test_inv, y_pred_inv)
r2 = r2_score(y_test_inv, y_pred_inv)

print(f"📉 MAE total: {mae:.2f}")
print(f"📈 R² global: {r2:.2f} → {(r2*100):.1f}% de ajuste")

[1m2797/2797[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 824us/step
📉 MAE total: 2.48
📈 R² global: 0.44 → 44.0% de ajuste


In [54]:
for i, var in enumerate(output_features):
    mae_var = mean_absolute_error(y_test_inv[:, i], y_pred_inv[:, i])
    r2_var = r2_score(y_test_inv[:, i], y_pred_inv[:, i])
    print(f"🔍 {var}: MAE = {mae_var:.2f} | R² = {r2_var:.2f} → {(r2_var*100):.1f}%")

🔍 PRECTOTCORR_log: MAE = 2.83 | R² = -0.01 → -1.3%
🔍 T2M_MAX: MAE = 1.76 | R² = 0.44 → 44.4%
🔍 T2M_MIN: MAE = 1.27 | R² = 0.63 → 62.7%
🔍 RH2M: MAE = 6.00 | R² = 0.54 → 53.6%
🔍 WS10M: MAE = 0.50 | R² = 0.59 → 58.9%
🔍 ALLSKY_SFC_SW_DWN: MAE = 2.53 | R² = 0.45 → 45.5%


In [55]:
def predecir_por_fecha_coordenadas(fecha_str, lat, lon, modelo, scaler_in, scaler_out):
    fecha = pd.to_datetime(fecha_str, format="%Y%m%d")
    entrada = pd.DataFrame([{
        'DOY': fecha.dayofyear,
        'MONTH': fecha.month,
        'YEAR': fecha.year,
        'lat': lat,
        'lon': lon
    }])

    entrada_scaled = scaler_in.transform(entrada.values)
    pred_scaled = modelo.predict(entrada_scaled)
    pred_real = scaler_out.inverse_transform(pred_scaled)

    resultado = dict(zip(output_features, pred_real[0]))
    print(f"📍 Predicción para {fecha_str} en ({lat}, {lon}):")
    for var, val in resultado.items():
        print(f"🔮 {var}: {val:.2f}")
    return resultado

In [56]:
predecir_por_fecha_coordenadas("20251004", 20.97, -89.62, model, scaler_input, scaler_output)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
📍 Predicción para 20251004 en (20.97, -89.62):
🔮 PRECTOTCORR_log: 1.16
🔮 T2M_MAX: 30.18
🔮 T2M_MIN: 23.95
🔮 RH2M: 85.72
🔮 WS10M: 1.42
🔮 ALLSKY_SFC_SW_DWN: 18.96


{'PRECTOTCORR_log': np.float32(1.1603078),
 'T2M_MAX': np.float32(30.175667),
 'T2M_MIN': np.float32(23.953657),
 'RH2M': np.float32(85.718605),
 'WS10M': np.float32(1.415863),
 'ALLSKY_SFC_SW_DWN': np.float32(18.962738)}

In [None]:
# --- Guardar modelo y scalers automáticamente (ejecuta DESPUES de entrenar) ---
# Esta celda intenta detectar objetos comunes y guardarlos en models/
os.makedirs("models", exist_ok=True)

def _auto_save_rna4_artifacts(prefix="models"):
    """
    Intenta detectar variables habituales y guardarlas:
    busca en globals(): model_rna4, model, scaler_input, scaler_output, scaler_in, scaler_out
    """
    g = globals()
    saved = {}
    # posibles nombres de modelos y scalers
    posibles_modelos = ["model_rna4", "model", "modelo", "modelo_rna4"]
    posibles_scalers_in = ["scaler_input", "scaler_in", "scalerX", "scaler_input_rna4", "scaler_in_rna4"]
    posibles_scalers_out = ["scaler_output", "scaler_out", "scalerY", "scaler_output_rna4", "scaler_out_rna4"]

    for name in posibles_modelos:
        if name in g:
            joblib.dump(g[name], f"{prefix}/modelo_rna4.joblib")
            saved["model"] = name
            break

    for name in posibles_scalers_in:
        if name in g:
            joblib.dump(g[name], f"{prefix}/scaler_in_rna4.joblib")
            saved["scaler_in"] = name
            break

    for name in posibles_scalers_out:
        if name in g:
            joblib.dump(g[name], f"{prefix}/scaler_out_rna4.joblib")
            saved["scaler_out"] = name
            break

    return saved


In [None]:
 #--- Funciones operativas para usar el modelo ya guardado (NO reentrena) ---

# Rutas por defecto (ajusta si es necesario)
DEFAULT_MODEL_PATH = "models/modelo_rna4.joblib"
DEFAULT_SCALER_IN = "models/scaler_in_rna4.joblib"
DEFAULT_SCALER_OUT = "models/scaler_out_rna4.joblib"
LUGARES_JSON = "data/lugares.json"
PREDICCIONES_RNA4_JSON = "data/predicciones_rna4.json"

def parse_fecha(fecha_str):
    if isinstance(fecha_str, datetime.date):
        return fecha_str
    if "-" in fecha_str:
        return datetime.datetime.strptime(fecha_str, "%Y-%m-%d").date()
    return datetime.datetime.strptime(fecha_str, "%Y%m%d").date()

def convertir_fecha_a_numero(fecha_date, fecha_base=None):
    if fecha_base is None:
        fecha_base = datetime.date(1998, 1, 1)
    return (fecha_date - fecha_base).days

def cargar_lugares(path=LUGARES_JSON):
    p = Path(path)
    if not p.exists():
        raise FileNotFoundError(f"No existe el archivo de lugares en {path}")
    with p.open("r", encoding="utf-8") as f:
        return json.load(f)

def cargar_modelo_rna4(model_path=DEFAULT_MODEL_PATH, scaler_in_path=DEFAULT_SCALER_IN, scaler_out_path=DEFAULT_SCALER_OUT):
    model = joblib.load(model_path)
    scaler_in = joblib.load(scaler_in_path) if Path(scaler_in_path).exists() else None
    scaler_out = joblib.load(scaler_out_path) if Path(scaler_out_path).exists() else None
    return model, scaler_in, scaler_out

def _map_model_output_to_variables(y_pred):
    y = np.asarray(y_pred)
    if y.ndim == 1 or (y.ndim == 2 and y.shape[1] == 1):
        # un solo valor por muestra
        return {"PRECTOTCORR": float(y.ravel()[0])}
    vals = y[0] if y.ndim == 2 else y
    vals = list(vals)
    while len(vals) < 6:
        vals.append(0.0)
    return {
        "PRECTOTCORR": float(vals[0]),
        "T2M_MAX": float(vals[1]),
        "T2M_MIN": float(vals[2]),
        "RH2M": float(vals[3]),
        "WS10M": float(vals[4]),
        "ALLSKY_SFC_SW_DWN": float(vals[5])
    }

def predecir_por_fecha_lugares(fecha_str, model, scaler_in=None, scaler_out=None,
                               lugares_path=LUGARES_JSON, output_file=PREDICCIONES_RNA4_JSON,
                               fecha_base=None):
    """
    Recorre todos los lugares en lugares_path y predice usando model (ya entrenado).
    Sobrescribe output_file con la lista de dicts resultantes.
    """
    fecha_date = parse_fecha(fecha_str)
    dias = convertir_fecha_a_numero(fecha_date, fecha_base)

    lugares = cargar_lugares(lugares_path)
    resultados = []

    for lugar in lugares:
        lat = float(lugar.get("lat"))
        lon = float(lugar.get("lon"))
        X = np.array([[dias, lat, lon]], dtype=float)
        X_model = scaler_in.transform(X) if scaler_in is not None else X
        y_pred = model.predict(X_model)

        # Si hay scaler_out, intentar invertir (si aplica)
        if scaler_out is not None:
            try:
                y_unscaled = scaler_out.inverse_transform(y_pred)
            except Exception:
                y_unscaled = y_pred
        else:
            y_unscaled = y_pred

        variables = _map_model_output_to_variables(y_unscaled)

        resultados.append({
            "id": lugar.get("id", lugar.get("nombre")),
            "nombre": lugar.get("nombre"),
            "lat": lat,
            "lon": lon,
            "category": lugar.get("category"),
            "fecha": fecha_date.isoformat(),
            "dias_desde_base": int(dias),
            "variables": variables
        })

    outp = Path(output_file)
    outp.parent.mkdir(parents=True, exist_ok=True)
    with outp.open("w", encoding="utf-8") as f:
        json.dump(resultados, f, ensure_ascii=False, indent=2)

    return resultados

# %%
# EJEMPLO DE USO (no lo ejecutes hasta que hayas guardado tus modelos con joblib)
# model, s_in, s_out = cargar_modelo_rna4()
# res = predecir_por_fecha_lugares("2025-10-04", model, s_in, s_out)
# print(len(res), res[:2])

In [None]:
#model.save("modelo_clima_mlp.h5")



In [None]:
#from tensorflow.keras.models import load_model
#modelo_cargado = load_model("modelo_clima_mlp.h5")