In [None]:
!pip install xgboost
!pip install lightgbm
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.8-cp312-cp312-manylinux2014_x86_64.whl (99.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m99.2/99.2 MB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.8


In [None]:
!pip install lime

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/275.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283834 sha256=8c051dc4dc0e4aefea0601414e69365975d7ca803fde0bd1a9e47a7882cb32c6
  Stored in directory: /root/.cache/pip/wheels/e7/5d/0e/4b4fff9a47468fed5633211fb3b76d1db43fe806a17fb7486a
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1


# **Codigo para compartir**

In [None]:
# ============================
# Predicción con Varios Modelos + SHAP y LIME
# ============================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score

import xgboost as xgb
import lightgbm as lgb
import catboost as cb

# =======================
# 1. Cargar base de datos
# =======================
file_path = "nueva_base_con_rezagosfinal.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Eliminar filas con valores nulos
df = df.dropna()

# ==============================
# 2. Seleccionar variables
# ==============================
y = df["ITCRM"]

# Eliminar columna dependiente + columnas tipo datetime
X = df.drop(columns=["ITCRM"])
X = X.select_dtypes(exclude=["datetime", "datetime64[ns]"])

# ==============================
# 3. Separar en train y test
# ==============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

# ==============================
# 4. Definir modelos
# ==============================
modelos = {
    "RandomForest": RandomForestRegressor(n_estimators=300, max_depth=10, random_state=42),
    "XGBoost": xgb.XGBRegressor(n_estimators=300, max_depth=6, learning_rate=0.1, random_state=42),
    "LightGBM": lgb.LGBMRegressor(n_estimators=300, max_depth=10, learning_rate=0.1, random_state=42),
    "CatBoost": cb.CatBoostRegressor(n_estimators=300, depth=8, learning_rate=0.1, random_state=42, verbose=0)
}

resultados = {}
predicciones = pd.DataFrame(index=y_test.index)

# ==============================
# 5. Entrenamiento y evaluación
# ==============================
for nombre, modelo in modelos.items():
    print(f"\nEntrenando {nombre}...")
    modelo.fit(X_train, y_train)
    y_pred = modelo.predict(X_test)

    # Guardar predicciones
    predicciones[nombre] = y_pred

    # Evaluación
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mape = mean_absolute_percentage_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    resultados[nombre] = {
        "MAE": mae,
        "RMSE": rmse,
        "MAPE": mape,
        "R2": r2
    }

    print(f"{nombre} -> MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}, R²: {r2:.4f}")

# ==============================
# 6. Importancia de variables
# ==============================
importancias = pd.DataFrame()

for nombre, modelo in modelos.items():
    try:
        if nombre == "CatBoost":
            imp = modelo.get_feature_importance()
        else:
            imp = modelo.feature_importances_
        temp = pd.DataFrame({
            "Variable": X.columns,
            "Importancia": imp,
            "Modelo": nombre
        })
        importancias = pd.concat([importancias, temp], axis=0)
    except:
        pass

# ==============================
# 7. Intervalo de confianza CatBoost
# ==============================
if "CatBoost" in modelos:
    y_pred_cat = predicciones["CatBoost"].values
    residuales = y_test.values - y_pred_cat
    sigma = np.std(residuales)

    ic_inf = y_pred_cat - 1.96 * sigma
    ic_sup = y_pred_cat + 1.96 * sigma

    predicciones["CatBoost_IC_inf"] = ic_inf
    predicciones["CatBoost_IC_sup"] = ic_sup

    print(f"\nIntervalo de confianza (aprox) CatBoost con σ={sigma:.4f}")

# ==============================
# 8. Gráfico conjunto
# ==============================
plt.figure(figsize=(14,7))
plt.plot(y_test.values, label="Real", marker="o", linewidth=2)

for nombre in modelos.keys():
    plt.plot(predicciones[nombre].values, label=nombre, linestyle="--")

plt.title("Comparación de Modelos - Predicción ITCRM")
plt.xlabel("Observaciones")
plt.ylabel("ITCRM")
plt.legend()
plt.grid(True)
plt.show()

# ==============================
# 9. Importancia de variables (gráfico)
# ==============================
plt.figure(figsize=(12,8))
sns.barplot(data=importancias, x="Importancia", y="Variable", hue="Modelo")
plt.title("Importancia de Variables por Modelo")
plt.show()

# ==============================
# 10. Seleccionar mejor modelo según R²
# ==============================
resultados_df = pd.DataFrame(resultados).T
mejor_modelo_nombre = resultados_df["R2"].idxmax()
mejor_modelo = modelos[mejor_modelo_nombre]

print(f"\nMejor modelo según R²: {mejor_modelo_nombre}")

# ==============================
# 11. Interpretabilidad con SHAP
# ==============================
import shap
interpretabilidad = {}

try:
    if mejor_modelo_nombre == "XGBoost":
        explainer = shap.TreeExplainer(mejor_modelo)
        shap_values = explainer.shap_values(X_test)
    elif mejor_modelo_nombre == "LightGBM":
        explainer = shap.TreeExplainer(mejor_modelo.booster_)
        shap_values = explainer.shap_values(X_test)
    elif mejor_modelo_nombre == "CatBoost":
        explainer = shap.TreeExplainer(mejor_modelo)
        shap_values = explainer.shap_values(X_test)
    elif mejor_modelo_nombre == "RandomForest":
        explainer = shap.TreeExplainer(mejor_modelo)
        shap_values = explainer.shap_values(X_test)
    else:
        shap_values = None

    if shap_values is not None:
        shap_importancia = pd.DataFrame({
            "Variable": X.columns,
            "SHAP_Importancia": np.abs(shap_values).mean(axis=0)
        }).sort_values(by="SHAP_Importancia", ascending=False)

        interpretabilidad["SHAP"] = shap_importancia

        shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
        plt.title(f"SHAP Importancia Global - {mejor_modelo_nombre}")
        plt.tight_layout()
        plt.savefig("shap_summary.png")
        plt.close()

except Exception as e:
    print(f"Error con SHAP: {e}")

# ==============================
# 12. Interpretabilidad con LIME
# ==============================
from lime.lime_tabular import LimeTabularExplainer

try:
    lime_explainer = LimeTabularExplainer(
        training_data=np.array(X_train),
        feature_names=X.columns,
        mode="regression"
    )

    lime_resultados = []
    for i in range(min(5, len(X_test))):  # explicamos 5 casos
        exp = lime_explainer.explain_instance(
            data_row=X_test.iloc[i].values,
            predict_fn=mejor_modelo.predict
        )
        temp = pd.DataFrame(exp.as_list(), columns=["Variable", "Efecto"])
        temp["Observacion"] = i
        lime_resultados.append(temp)

    lime_df = pd.concat(lime_resultados, axis=0)
    interpretabilidad["LIME"] = lime_df

except Exception as e:
    print(f"Error con LIME: {e}")

# ==============================
# 13. Guardar resultados en Excel (final)
# ==============================
with pd.ExcelWriter("resultados_modelos.xlsx") as writer:
    # Métricas
    resultados_df.to_excel(writer, sheet_name="Metricas")

    # Predicciones + Intervalo CatBoost
    salida = pd.concat([y_test.reset_index(drop=True), predicciones.reset_index(drop=True)], axis=1)
    salida.rename(columns={"ITCRM": "Real"}, inplace=True)
    salida.to_excel(writer, sheet_name="Predicciones", index=False)

    # Importancia de variables
    importancias.to_excel(writer, sheet_name="Importancias", index=False)

    # SHAP
    if "SHAP" in interpretabilidad:
        interpretabilidad["SHAP"].to_excel(writer, sheet_name="SHAP", index=False)

    # LIME
    if "LIME" in interpretabilidad:
        interpretabilidad["LIME"].to_excel(writer, sheet_name="LIME", index=False)

print("\nResultados guardados en 'resultados_modelos.xlsx' con SHAP y LIME para el mejor modelo")


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score

import xgboost as xgb
import lightgbm as lgb
import catboost as cb
import shap
from lime.lime_tabular import LimeTabularExplainer

# =======================
# 1. Cargar base de datos
# =======================
file_path = "nueva_base_con_rezagosfinal.xlsx"
df = pd.read_excel(file_path, sheet_name="Sheet1")

# Eliminar filas con valores nulos
df = df.dropna()

# ==============================
# 2. Seleccionar variables
# ==============================
y = df["ITCRM"]

# Eliminar columna dependiente + columnas tipo datetime
X = df.drop(columns=["ITCRM"])
X = X.select_dtypes(exclude=["datetime", "datetime64[ns]"])

# ==============================
# 3. Separar en train y test
# ==============================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

# ==============================
# 4. Definir modelos
# ==============================
modelos = {
    "RandomForest": RandomForestRegressor(n_estimators=300, max_depth=10, random_state=42),
    "XGBoost": xgb.XGBRegressor(n_estimators=300, max_depth=6, learning_rate=0.1, random_state=42),
    "LightGBM": lgb.LGBMRegressor(n_estimators=300, max_depth=10, learning_rate=0.1, random_state=42),
    "CatBoost": cb.CatBoostRegressor(n_estimators=300, depth=8, learning_rate=0.1, random_state=42, verbose=0)
}

resultados = {}
predicciones = pd.DataFrame(index=y_test.index)

# ==============================
# 5. Entrenamiento y evaluación
# ==============================
for nombre, modelo in modelos.items():
    print(f"\nEntrenando {nombre}...")
    modelo.fit(X_train, y_train)
    y_pred = modelo.predict(X_test)

    # Guardar predicciones
    predicciones[nombre] = y_pred

    # Evaluación
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mape = mean_absolute_percentage_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    resultados[nombre] = {
        "MAE": mae,
        "RMSE": rmse,
        "MAPE": mape,
        "R2": r2
    }

    print(f"{nombre} -> MAE: {mae:.4f}, RMSE: {rmse:.4f}, MAPE: {mape:.4f}, R²: {r2:.4f}")

# ==============================
# 6. Importancia de variables
# ==============================
importancias = pd.DataFrame()

for nombre, modelo in modelos.items():
    try:
        if nombre == "CatBoost":
            imp = modelo.get_feature_importance()
        else:
            imp = modelo.feature_importances_
        temp = pd.DataFrame({
            "Variable": X.columns,
            "Importancia": imp,
            "Modelo": nombre
        })
        importancias = pd.concat([importancias, temp], axis=0)
    except:
        pass

# ==============================
# 7. Intervalo de confianza CatBoost
# ==============================
if "CatBoost" in modelos:
    y_pred_cat = predicciones["CatBoost"].values
    residuales = y_test.values - y_pred_cat
    sigma = np.std(residuales)

    ic_inf = y_pred_cat - 1.96 * sigma
    ic_sup = y_pred_cat + 1.96 * sigma

    predicciones["CatBoost_IC_inf"] = ic_inf
    predicciones["CatBoost_IC_sup"] = ic_sup

    print(f"\nIntervalo de confianza (aprox) CatBoost con σ={sigma:.4f}")

# ==============================
# 8. Gráfico conjunto
# ==============================
plt.figure(figsize=(14,7))
plt.plot(y_test.values, label="Real", marker="o", linewidth=2)

for nombre in modelos.keys():
    plt.plot(predicciones[nombre].values, label=nombre, linestyle="--")

plt.title("Comparación de Modelos - Predicción ITCRM")
plt.xlabel("Observaciones")
plt.ylabel("ITCRM")
plt.legend()
plt.grid(True)
plt.show()

# ==============================
# 9. Importancia de variables (gráfico)
# ==============================
plt.figure(figsize=(12,8))
sns.barplot(data=importancias, x="Importancia", y="Variable", hue="Modelo")
plt.title("Importancia de Variables por Modelo")
plt.show()

# ==============================
# 10. Seleccionar mejor modelo según R²
# ==============================
resultados_df = pd.DataFrame(resultados).T
mejor_modelo_nombre = resultados_df["R2"].idxmax()
mejor_modelo = modelos[mejor_modelo_nombre]

print(f"\nMejor modelo según R²: {mejor_modelo_nombre}")

# ==============================
# 11. Interpretabilidad con SHAP (para todos los modelos)
# ==============================
interpretabilidad = {}
shap_importancias = pd.DataFrame()

for nombre, modelo in modelos.items():
    try:
        if nombre == "XGBoost":
            explainer = shap.TreeExplainer(modelo)
            shap_values = explainer.shap_values(X_test)
        elif nombre == "LightGBM":
            explainer = shap.TreeExplainer(modelo.booster_)
            shap_values = explainer.shap_values(X_test)
        elif nombre == "CatBoost":
            explainer = shap.TreeExplainer(modelo)
            shap_values = explainer.shap_values(X_test)
        elif nombre == "RandomForest":
            explainer = shap.TreeExplainer(modelo)
            shap_values = explainer.shap_values(X_test)
        else:
            shap_values = None

        if shap_values is not None:
            shap_importancia = pd.DataFrame({
                "Variable": X.columns,
                "SHAP_Importancia": np.abs(shap_values).mean(axis=0),
                "Modelo": nombre
            }).sort_values(by="SHAP_Importancia", ascending=False)

            shap_importancias = pd.concat([shap_importancias, shap_importancia], axis=0)

            # Gráfico SHAP
            shap.summary_plot(shap_values, X_test, plot_type="bar", show=False)
            plt.title(f"SHAP Importancia Global - {nombre}")
            plt.tight_layout()
            plt.savefig(f"shap_summary_{nombre}.png")
            plt.close()

    except Exception as e:
        print(f"Error con SHAP para {nombre}: {e}")

interpretabilidad["SHAP"] = shap_importancias

# ==============================
# 12. Interpretabilidad con LIME (para todos los modelos)
# ==============================
try:
    lime_explainer = LimeTabularExplainer(
        training_data=np.array(X_train),
        feature_names=X.columns,
        mode="regression"
    )

    lime_resultados = []
    for nombre, modelo in modelos.items():
        for i in range(min(5, len(X_test))):  # explicamos 5 casos por modelo
            exp = lime_explainer.explain_instance(
                data_row=X_test.iloc[i].values,
                predict_fn=modelo.predict
            )
            temp = pd.DataFrame(exp.as_list(), columns=["Variable", "Efecto"])
            temp["Observacion"] = i
            temp["Modelo"] = nombre
            lime_resultados.append(temp)

            # Gráfico LIME
            fig = exp.as_pyplot_figure()
            plt.title(f"LIME Explicación para {nombre} - Observación {i+1}")
            plt.tight_layout()
            plt.savefig(f"lime_{nombre}_obs{i+1}.png")
            plt.close()

    lime_df = pd.concat(lime_resultados, axis=0)
    interpretabilidad["LIME"] = lime_df

except Exception as e:
    print(f"Error con LIME: {e}")

# ==============================
# 13. Guardar resultados en Excel (final)
# ==============================
with pd.ExcelWriter("resultados_modelos.xlsx") as writer:
    # Métricas
    resultados_df.to_excel(writer, sheet_name="Metricas")

    # Predicciones + Intervalo CatBoost
    salida = pd.concat([y_test.reset_index(drop=True), predicciones.reset_index(drop=True)], axis=1)
    salida.rename(columns={"ITCRM": "Real"}, inplace=True)
    salida.to_excel(writer, sheet_name="Predicciones", index=False)

    # Importancia de variables
    importancias.to_excel(writer, sheet_name="Importancias", index=False)

    # SHAP
    if "SHAP" in interpretabilidad:
        interpretabilidad["SHAP"].to_excel(writer, sheet_name="SHAP", index=False)

    # LIME
    if "LIME" in interpretabilidad:
        interpretabilidad["LIME"].to_excel(writer, sheet_name="LIME", index=False)

print("\nResultados guardados en 'resultados_modelos.xlsx' con SHAP y LIME para todos los modelos, incluyendo gráficos guardados como PNG.")

#########################################################################################

In [None]:
# ===========================
# VECM con train-test split, pronósticos y métricas
# ===========================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from statsmodels.tsa.vector_ar.vecm import coint_johansen, VECM

# ===========================
# 1. Cargar y preparar datos
# ===========================
df = pd.read_excel("baseVECMfinal.xlsx")

tcr = "ITCER"
fundamentales = [
    "IPC", "energia", "agua", "gasliquido",
    "X", "M", "RIN",
    "activa", "pasivaahorro", "pasivafijo", "libor3", "FEDFUNDS",
    "EGRESOSCORRIENTES", "INGRESOSCORRIENTES", "EGRESOSCAPITAL", "INGRESOSCAPITAL",
    "Oro", "Petroleo1", "Zinc", "Plata", "Estano",
    "temperatura", "precipitation", "drought"
]

model_df = df[[tcr] + fundamentales].dropna()

# Variables que van en log
log_vars = ["ITCER","IPC","energia","agua","gasliquido","X","M","RIN",
            "Oro","Petroleo1","Zinc","Plata","Estano"]

for var in log_vars:
    model_df["ln_" + var] = np.log(model_df[var])

# Dataset final
Y = model_df[["ln_ITCER","ln_IPC","ln_X","ln_M","ln_RIN","ln_Oro","ln_Petroleo1",
              "ln_Zinc","ln_Plata","ln_Estano","activa","pasivaahorro","pasivafijo",
              "libor3","FEDFUNDS","EGRESOSCORRIENTES","INGRESOSCORRIENTES",
              "EGRESOSCAPITAL","INGRESOSCAPITAL","temperatura","precipitation","drought"]]

# ===========================
# 2. Train-test split
# ===========================
train_size = int(len(Y) * 0.8)
train, test = Y.iloc[:train_size], Y.iloc[train_size:]

# ===========================
# 3. Prueba de cointegración en train
# ===========================
johansen_test = coint_johansen(train, det_order=0, k_ar_diff=2)
print("Trace test:", johansen_test.lr1)
print("Critical values:", johansen_test.cvt)

# ===========================
# 4. Estimación VECM en train
# ===========================
vecm = VECM(train, k_ar_diff=2, coint_rank=1, deterministic="co")
vecm_res = vecm.fit()
print(vecm_res.summary())

# ===========================
# 5. Pronósticos
# ===========================
# Pronóstico en horizonte de test
n_test = len(test)
forecast_test = vecm_res.predict(steps=n_test)
forecast_test_df = pd.DataFrame(forecast_test,
                                index=test.index,
                                columns=Y.columns)

# Pronóstico extendido 78 pasos adelante
forecast_78 = vecm_res.predict(steps=78)
forecast_78_df = pd.DataFrame(forecast_78,
                              columns=Y.columns)

# ===========================
# 6. Guardar en Excel
# ===========================
with pd.ExcelWriter("pronosticos_VECM.xlsx") as writer:
    forecast_test_df.to_excel(writer, sheet_name="Forecast_Test")
    forecast_78_df.to_excel(writer, sheet_name="Forecast_78")

# ===========================
# 7. Métricas en variable clave (ln_ITCER)
# ===========================
aligned = pd.concat([test["ln_ITCER"], forecast_test_df["ln_ITCER"]], axis=1).dropna()
aligned.columns = ["y_true", "y_pred"]

y_true = aligned["y_true"].values
y_pred = aligned["y_pred"].values

# Métricas
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)  # corregido
mae = mean_absolute_error(y_true, y_pred)

with np.errstate(divide='ignore', invalid='ignore'):
    mape = np.mean(np.abs((y_true - y_pred) / y_true))

r2 = r2_score(y_true, y_pred)

print(f"R²: {r2:.4f}")
print(f"RMSE: {rmse:.6f}")
print(f"MSE: {mse:.6f}")
print(f"MAE: {mae:.6f}")
print(f"MAPE: {mape*100:.4f}%")

# ===========================
# 8. Gráfico comparativo
# ===========================
plt.figure(figsize=(12,6))
plt.plot(train.index, train["ln_ITCER"], label="Train", color="blue")
plt.plot(aligned.index, aligned["y_true"], label="Test Real", color="black")
plt.plot(aligned.index, aligned["y_pred"], label="Pronóstico", linestyle="--", color="red")
plt.title("Pronóstico VECM vs Valores Reales (ln_ITCER)")
plt.xlabel("Tiempo")
plt.ylabel("ln_ITCER")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# ===========================
# ECM (Regresión Lineal) con Train-Test, Pronósticos y Métricas
# ===========================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# ===========================
# 1. Cargar y preparar datos
# ===========================
df = pd.read_excel("baseVECMfinal.xlsx")

tcr = "ITCER"
fundamentales = [
    "IPC", "energia", "agua", "gasliquido",
    "X", "M", "RIN",
    "activa", "pasivaahorro", "pasivafijo", "libor3", "FEDFUNDS",
    "EGRESOSCORRIENTES", "INGRESOSCORRIENTES", "EGRESOSCAPITAL", "INGRESOSCAPITAL",
    "Oro", "Petroleo1", "Zinc", "Plata", "Estano",
    "temperatura", "precipitation", "drought"
]

model_df = df[[tcr] + fundamentales].dropna()

# Variables en log
log_vars = ["ITCER","IPC","energia","agua","gasliquido","X","M","RIN",
            "Oro","Petroleo1","Zinc","Plata","Estano"]

for var in log_vars:
    model_df["ln_" + var] = np.log(model_df[var])

# Dataset: dependiente + regresores
Y = model_df["ln_ITCER"]
X = model_df.drop(columns=[tcr, "ITCER", "ln_ITCER"])  # quitamos duplicados

# ===========================
# 2. Train-test split
# ===========================
train_size = int(len(X) * 0.8)
X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
Y_train, Y_test = Y.iloc[:train_size], Y.iloc[train_size:]

# ===========================
# 3. Estimación de regresión lineal
# ===========================
model = LinearRegression()
model.fit(X_train, Y_train)

# ===========================
# 4. Pronósticos
# ===========================
Y_pred = model.predict(X_test)

# ===========================
# 5. Métricas
# ===========================
mse = mean_squared_error(Y_test, Y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(Y_test, Y_pred)
r2 = r2_score(Y_test, Y_pred)

with np.errstate(divide='ignore', invalid='ignore'):
    mape = np.mean(np.abs((Y_test.values - Y_pred) / Y_test.values))

print(f"R²: {r2:.4f}")
print(f"RMSE: {rmse:.6f}")
print(f"MSE: {mse:.6f}")
print(f"MAE: {mae:.6f}")
print(f"MAPE: {mape*100:.4f}%")

# ===========================
# 6. Guardar en Excel
# ===========================
results_df = pd.DataFrame({
    "Real": Y_test.values,
    "Pronosticado": Y_pred
}, index=Y_test.index)

with pd.ExcelWriter("pronosticos_ECM.xlsx") as writer:
    results_df.to_excel(writer, sheet_name="Forecast_Test")
    pd.DataFrame({
        "R2":[r2], "RMSE":[rmse], "MSE":[mse], "MAE":[mae], "MAPE":[mape]
    }).to_excel(writer, sheet_name="Metrics", index=False)

# ===========================
# 7. Gráfico comparativo
# ===========================
plt.figure(figsize=(12,6))
plt.plot(Y_train.index, Y_train, label="Train", color="blue")
plt.plot(Y_test.index, Y_test, label="Test Real", color="black")
plt.plot(Y_test.index, Y_pred, label="Pronóstico", linestyle="--", color="red")
plt.title("Regresión Lineal (ECM) - Pronóstico vs Real (ln_ITCER)")
plt.xlabel("Tiempo")
plt.ylabel("ln_ITCER")
plt.legend()
plt.grid(True)
plt.show()
