In [2]:
import sys
from pathlib import Path

# Ubicación del notebook
NOTEBOOK_DIR = Path.cwd()

# Raíz del proyecto = subir un nivel desde Notebooks/
PROJECT_ROOT = NOTEBOOK_DIR.parent

# Añadir raíz del proyecto al sys.path
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("Proyecto raíz detectado:", PROJECT_ROOT)

Proyecto raíz detectado: c:\Users\cathe\Didier Jesus\EduFinance\EduFinance_Simulator


In [3]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

from pathlib import Path
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error

import matplotlib.pyplot as plt

# Rutas
from utils.paths import DATA_DIR, FIG_DIR
from utils.loader import load_csv

# Directorios de resultados
MODELS_DIR = PROJECT_ROOT / "models_results"
ARIMA_RESULTS_DIR = MODELS_DIR / "Arima_results"
ARIMA_FIG_DIR = FIG_DIR / "Arima_fig_results"

# Crear directorios si no existen
MODELS_DIR.mkdir(parents=True, exist_ok=True)
ARIMA_RESULTS_DIR.mkdir(parents=True, exist_ok=True)
ARIMA_FIG_DIR.mkdir(parents=True, exist_ok=True)


In [5]:
prices = load_csv(DATA_DIR / "time_series" / "prices.csv")
prices.index = pd.to_datetime(prices.index)

tickers = [t for t in prices.columns if t.lower() != "date"]
len(prices), tickers


(1931, ['BTC-USD', 'EUNL.DE', 'QQQ', 'TSLA', 'V', 'VOO', 'XAR', 'XRP-USD'])

In [6]:
def evaluate_arima_model(series, order, train_size=30):
    """
    Ajusta ARIMA, predice últimos 30 valores y devuelve:
    - RMSE
    - MAE
    - MAPE (%)
    - AIC
    - BIC
    - predictions vs actual
    """
    try:
        # División en entrenamiento + validación
        train = series[:-train_size]
        valid = series[-train_size:]

        # Ajustar modelo
        model = ARIMA(train, order=order)
        model_fit = model.fit()

        # Forecast
        forecast = model_fit.forecast(steps=train_size)

        # Métricas
        rmse = np.sqrt(mean_squared_error(valid, forecast))
        mae = mean_absolute_error(valid, forecast)
        mape = np.mean(np.abs((valid - forecast) / valid)) * 100

        return {
            "order": order,
            "rmse": rmse,
            "mae": mae,
            "mape": mape,
            "aic": model_fit.aic,
            "bic": model_fit.bic,
            "valid": valid,
            "forecast": forecast
        }

    except:
        return None


In [7]:
p_values = [0, 1, 2]
d_values = [1, 2]
q_values = [0, 1, 2]

best_models = {}
all_metrics = []


In [8]:
for tk in tickers:
    print(f"\n=== Procesando {tk} ===")
    series = prices[tk].dropna()

    best_score = np.inf
    best_model = None

    for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p, d, q)
                result = evaluate_arima_model(series, order)

                if result is None:
                    continue

                # Seleccionar mejor modelo según MAPE
                if result["mape"] < best_score:
                    best_score = result["mape"]
                    best_model = result

    print(f"Mejor modelo para {tk}: {best_model['order']}  | MAPE={best_model['mape']:.3f}%")

    best_models[tk] = best_model
    all_metrics.append({
        "ticker": tk,
        "order": best_model["order"],
        "rmse": best_model["rmse"],
        "mae": best_model["mae"],
        "mape": best_model["mape"],
        "aic": best_model["aic"],
        "bic": best_model["bic"],
        "train_size": len(series) - 30,
        "valid_size": 30
    })



=== Procesando BTC-USD ===


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Mejor modelo para BTC-USD: (0, 1, 0)  | MAPE=3.513%

=== Procesando EUNL.DE ===


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Mejor modelo para EUNL.DE: (1, 2, 2)  | MAPE=1.189%

=== Procesando QQQ ===


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Mejor modelo para QQQ: (0, 2, 1)  | MAPE=0.843%

=== Procesando TSLA ===


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Mejor modelo para TSLA: (0, 2, 2)  | MAPE=3.502%

=== Procesando V ===


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Mejor modelo para V: (2, 1, 2)  | MAPE=2.601%

=== Procesando VOO ===


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Mejor modelo para VOO: (0, 2, 1)  | MAPE=0.673%

=== Procesando XAR ===


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Mejor modelo para XAR: (0, 1, 0)  | MAPE=1.943%

=== Procesando XRP-USD ===


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._

Mejor modelo para XRP-USD: (2, 1, 1)  | MAPE=4.479%




In [9]:
metrics_df = pd.DataFrame(all_metrics).sort_values("mape")
metrics_path = ARIMA_RESULTS_DIR / "ARIMA_all_metrics.csv"
metrics_df.to_csv(metrics_path, index=False)

metrics_df


Unnamed: 0,ticker,order,rmse,mae,mape,aic,bic,train_size,valid_size
5,VOO,"(0, 2, 1)",4.911867,3.943731,0.673095,10716.075028,10727.173193,1901,30
2,QQQ,"(0, 2, 1)",6.19967,4.816919,0.842847,11225.004623,11236.102788,1901,30
1,EUNL.DE,"(1, 2, 2)",1.414059,1.243876,1.189251,3977.863784,4000.060114,1901,30
6,XAR,"(0, 1, 0)",4.78959,4.179092,1.943456,7537.62826,7543.17787,1901,30
4,V,"(2, 1, 2)",10.763754,8.853065,2.600909,9872.03024,9899.778285,1901,30
3,TSLA,"(0, 2, 2)",14.000949,11.218253,3.501747,13319.009313,13335.656561,1901,30
0,BTC-USD,"(0, 1, 0)",4909.431384,3968.872656,3.512971,33169.007218,33174.556827,1901,30
7,XRP-USD,"(2, 1, 1)",0.164383,0.132767,4.478736,-4641.640577,-4619.442141,1901,30


In [10]:
for tk, model in best_models.items():

    df_compare = pd.DataFrame({
        "actual": model["valid"],
        "predicted": model["forecast"],
    })

    df_compare["abs_error"] = np.abs(df_compare["actual"] - df_compare["predicted"])
    df_compare["mape (%)"] = (df_compare["abs_error"] / df_compare["actual"]) * 100

    df_compare.index = model["valid"].index

    df_compare.to_csv(ARIMA_RESULTS_DIR / f"{tk}_comparison.csv")


In [11]:
for tk, model in best_models.items():
    valid = model["valid"]
    forecast = model["forecast"]

    # Graficar
    plt.figure(figsize=(10,5))
    plt.plot(valid.index, valid, label="Actual", linewidth=2)
    plt.plot(valid.index, forecast, label="Predicción", linestyle="--", linewidth=2)
    plt.title(f"{tk} — ARIMA {model['order']} (últimos 30 días)")
    plt.legend()
    plt.grid(True)

    plt.savefig(ARIMA_FIG_DIR / f"{tk}_forecast.png", dpi=300)
    plt.close()
