In [2]:
import sys, pathlib
import pandas as pd
import numpy as np
import joblib

# Ruta al proyecto
PROJECT_ROOT = pathlib.Path().resolve().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

from src import config as cfg
from src import evol_utils as eu
from sklearn.preprocessing import StandardScaler

# === Paths dinámicos ===
if cfg.MODEL_TYPE == "lstm":
    MODEL_PATH = cfg.MODELS / cfg.LSTM_MODEL_NAME
    DATA_PATH  = cfg.DATA / "processed" / "lstm_data.pkl"
elif cfg.MODEL_TYPE == "lstm5d":
    MODEL_PATH = cfg.MODELS / cfg.LSTM5D_MODEL_NAME
    DATA_PATH  = cfg.DATA / "processed" / "lstm5d_data.pkl"
elif cfg.MODEL_TYPE == "gru5d":
    MODEL_PATH = cfg.MODELS / cfg.GRU5D_MODEL_NAME
    DATA_PATH = cfg.DATA / "processed" / "gru5d_data.pkl"
elif cfg.MODEL_TYPE == "xgb":
    MODEL_PATH = cfg.MODELS / cfg.XGB_MODEL_NAME
    DATA_PATH  = cfg.DATA / "processed" / "xgb_data.pkl"
elif cfg.MODEL_TYPE == "cnn5d":
    MODEL_PATH = cfg.MODELS / "cnn5d.keras"
    DATA_PATH = cfg.DATA / "processed" / "cnn5d_data.pkl"
    scaler_cnn5d = joblib.load(cfg.MODELS / "scaler_X_cnn5d.pkl")


else:
    raise ValueError(f"Modelo '{cfg.MODEL_TYPE}' no soportado")

PRICES_PATH = cfg.DATA / "raw" / "prices.parquet"

print(f"🧠 Modelo activo: {cfg.MODEL_TYPE}")

🧠 Modelo activo: lstm


In [4]:
# === Datos ===
df_prices = pd.read_parquet(PRICES_PATH).sort_index()
lstm_data = joblib.load(DATA_PATH)
tickers = lstm_data["tickers"]
df_prices = df_prices[tickers]
df_ret = np.log(df_prices / df_prices.shift(1)).dropna()

# Momentum + features
ret5 = df_ret.rolling(5).sum()
vol5 = df_ret.rolling(5).std()
momentum = (ret5 / vol5).shift(1)
df_feat = pd.concat([df_ret.shift(1), momentum], axis=1).dropna()

print("✅ df_feat shape:", df_feat.shape)

# === Cargar modelo y escalador ===
if cfg.MODEL_TYPE in ["lstm", "lstm5d","cnn5d"]:
    from tensorflow import keras
    model = keras.models.load_model(MODEL_PATH, compile=False)
elif cfg.MODEL_TYPE == "gru5d":
    from tensorflow import keras
    model = keras.models.load_model(cfg.MODELS / cfg.GRU5D_MODEL_NAME, compile=False)
    DATA_PATH = cfg.DATA / "processed" / "gru5d_data.pkl"
elif cfg.MODEL_TYPE == "xgb":
    model = joblib.load(MODEL_PATH)
else:
    raise ValueError(f"Modelo '{cfg.MODEL_TYPE}' no soportado")

# Solo para lstm5d
if cfg.MODEL_TYPE == "lstm5d":
    scaler_lstm5d = joblib.load(cfg.MODELS / "scaler_X_lstm5d.pkl")

    print(f"✅ Modelo CNN cargado y listo: {model.input_shape} → {model.output_shape}")

✅ df_feat shape: (1495, 80)


In [6]:
# === rebalanceo / main.py (fragmento relevante) ===

def rebalancear_en_fecha(fecha, df_feat, model, w_prev=None):
    try:
        idx = df_feat.index.get_loc(fecha)
        ventana = df_feat.iloc[idx - cfg.WINDOW: idx]

        if cfg.MODEL_TYPE == "lstm":
            ventana = ventana.iloc[:, :len(tickers)]
            scaler = StandardScaler()
            X_input = scaler.fit_transform(ventana.values)
            X_input = np.expand_dims(X_input, 0)
            r_hat = model.predict(X_input, verbose=0)[0]

        elif cfg.MODEL_TYPE == "lstm5d":
            X_input = scaler_lstm5d.transform(ventana.values).reshape(1, cfg.WINDOW, -1)
            r_hat = model.predict(X_input, verbose=0)[0]

        elif cfg.MODEL_TYPE == "cnn5d":
            X_input = scaler_cnn5d.transform(ventana.values).reshape(1, cfg.WINDOW, -1)
            r_hat = model.predict(X_input, verbose=0)[0]

        elif cfg.MODEL_TYPE == "gru5d":
            ventana_ret = df_ret.iloc[idx - cfg.WINDOW: idx]
            ret5 = df_ret.rolling(5).sum()
            vol5 = df_ret.rolling(5).std()
            momentum = (ret5 / (vol5 + 1e-6)).shift(1)
            ventana_mom = momentum.loc[ventana_ret.index]
            ventana = pd.concat([ventana_ret, ventana_mom], axis=1)
            scaler = StandardScaler()
            X_input = scaler.fit_transform(ventana.values).reshape(1, cfg.WINDOW, -1)
            r_hat = model.predict(X_input, verbose=0)[0]

        elif cfg.MODEL_TYPE == "xgb":
            scaler = StandardScaler()
            X_input = scaler.fit_transform(ventana.values)
            r_hat = np.array([
                model[i].predict(X_input[-1].reshape(1, -1))[0]
                for i in range(X_input.shape[1])
            ])
        else:
            raise ValueError("Tipo de modelo no reconocido")

        fecha_ret = df_feat.index[idx]
        ventana_ret = df_ret.loc[fecha_ret - pd.Timedelta(days=cfg.WINDOW*2):fecha_ret]
        Sigma = ventana_ret[-cfg.WINDOW:].cov().values

        if r_hat.shape[0] != Sigma.shape[0]:
            print(f"⚠️ Dim mismatch {fecha.date()}")
            return None

        # Optimización con penalización (solo dentro del solver)
        res = eu.resolver_optimizacion(r_hat, Sigma, w_prev=w_prev)
        w_star = eu.elegir_w_star(res, r_hat, Sigma, w_prev=w_prev)


        # Turnover medido fuera para logging
        turnover = np.sum(np.abs(w_star - w_prev)) if w_prev is not None else 1.0

        ret_bruto = df_ret.iloc[idx: idx + cfg.REBAL_FREQ].values @ w_star
        ret_neto = ret_bruto.sum() - turnover * cfg.COST_TRADE

        return {
            "fecha": fecha,
            "ret_bruto": ret_bruto.sum(),
            "ret_neto": ret_neto,
            "turnover": turnover,
            "w_star": w_star
        }

    except Exception as e:
        print(f"ERROR {fecha.date()}: {e}")
        return None


In [8]:
# === Bucle de back-test (corregido) ===

fechas = df_feat.loc[cfg.START_BACKTEST:].index
resultados = []
w_prev = None                       # ← 1) sin cartera previa en t₀

for i in range(cfg.WINDOW, len(fechas) - cfg.REBAL_FREQ, cfg.REBAL_FREQ):
    fecha = fechas[i]
    out = rebalancear_en_fecha(fecha, df_feat, model, w_prev=w_prev)

    if out is not None:
        resultados.append(out)

        # 2) turnover correcto para el log
        turno_log = 0.0 if w_prev is None else np.sum(np.abs(out["w_star"] - w_prev))

        print(f"✅ {fecha.date()} | Retorno bruto {out['ret_bruto']:.4%} | "
              f"neto {out['ret_neto']:.4%} | turnover {turno_log:.2%}")

        w_prev = out["w_star"]      # guarda la cartera para el próximo ciclo
    else:
        print(f"⚠️ {fecha.date()} | Resultado nulo")


✅ 2019-04-24 | Retorno bruto 3.3329% | neto 3.1329% | turnover 0.00%
✅ 2019-05-10 | Retorno bruto 1.0716% | neto 0.9785% | turnover 46.55%
✅ 2019-05-30 | Retorno bruto 0.3522% | neto 0.2524% | turnover 49.90%
✅ 2019-06-18 | Retorno bruto -1.9611% | neto -2.0570% | turnover 47.97%
✅ 2019-07-09 | Retorno bruto 3.4546% | neto 3.3604% | turnover 47.09%
✅ 2019-07-25 | Retorno bruto 1.1135% | neto 1.0154% | turnover 49.05%
✅ 2019-08-13 | Retorno bruto -0.9476% | neto -1.0370% | turnover 44.68%
✅ 2019-08-29 | Retorno bruto 3.0448% | neto 2.9492% | turnover 47.80%
✅ 2019-09-18 | Retorno bruto 0.8127% | neto 0.7242% | turnover 44.21%
✅ 2019-10-04 | Retorno bruto 2.1206% | neto 2.0364% | turnover 42.09%
✅ 2019-10-23 | Retorno bruto 1.1815% | neto 1.0859% | turnover 47.82%
✅ 2019-11-08 | Retorno bruto 0.7826% | neto 0.6835% | turnover 49.60%
✅ 2019-11-27 | Retorno bruto 1.3007% | neto 1.2172% | turnover 41.74%
✅ 2019-12-18 | Retorno bruto 0.1033% | neto 0.0086% | turnover 47.39%
✅ 2020-01-10 | Re

In [16]:
import pandas as pd
import joblib

res_df = pd.DataFrame(resultados).set_index("fecha")
joblib.dump(res_df, cfg.RESULT / f"backtest_{cfg.MODEL_TYPE}.pkl")
print("✅ Backtest guardado:", cfg.RESULT / f"backtest_{cfg.MODEL_TYPE}.pkl")
print(pd.DataFrame(resultados).columns)


✅ Backtest guardado: C:\Users\ferra\Documents\TFM\results\backtest_lstm.pkl
Index(['fecha', 'ret_bruto', 'ret_neto', 'turnover', 'w_star'], dtype='object')
