# ACCIONES

In [None]:
# ================================================
# Pair Trading en Colab (sin pip, 100% compatible)
# Fuente de datos: Stooq (CSV HTTP)
# ================================================

import io
import time
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Intentar usar statsmodels (si no está, seguimos sin Engle–Granger)
try:
    import statsmodels.api as sm
    from statsmodels.tsa.stattools import coint
    HAS_SM = True
except Exception:
    HAS_SM = False
    sm = None

In [None]:
plt.rcParams["figure.figsize"] = (10, 4.5)
plt.rcParams["axes.grid"] = True
pd.set_option("display.float_format", lambda x: f"{x:,.6f}")

In [None]:
# ------------------------------
# 1) Loader robusto desde Stooq
# ------------------------------
def stooq_url(ticker: str, interval: str = "d") -> str:
    """
    Construye la URL CSV de Stooq para un 'ticker' (ej. 'aapl', 'msft').
    interval: 'd' diario, 'w' semanal, 'm' mensual.
    """
    # Stooq usa minúsculas; tickers USA típicos: AAPL -> aapl
    t = ticker.strip().lower()
    return f"https://stooq.com/q/d/l/?s={t}&i={interval}"

def fetch_stooq_series(ticker: str, start=None, end=None, tries: int = 3, pause: float = 1.5) -> pd.Series:
    """
    Descarga Close diario de un ticker desde Stooq, con reintentos.
    Devuelve una Serie indexada por fecha (datetime) con nombre = ticker en MAYÚSCULA.
    """
    last_err = None
    url = stooq_url(ticker, "d")
    for k in range(tries):
        try:
            s = pd.read_csv(url)
            # Estructura esperada: Date,Open,High,Low,Close,Volume
            if "Date" in s.columns and "Close" in s.columns:
                s["Date"] = pd.to_datetime(s["Date"], errors="coerce", utc=False)
                s = s.dropna(subset=["Date"]).sort_values("Date")
                s = s.set_index("Date")["Close"].astype(float)
                if start is not None:
                    s = s[s.index >= pd.to_datetime(start)]
                if end is not None:
                    s = s[s.index <= pd.to_datetime(end)]
                s.name = ticker.upper()
                if not s.empty:
                    return s
        except Exception as e:
            last_err = e
        time.sleep(pause * (k + 1))
    raise RuntimeError(f"Stooq fallo para {ticker}. Último error: {last_err}")

def load_prices_stooq(tickers, start=None, end=None) -> pd.DataFrame:
    """
    Descarga 'Close' para varios tickers desde Stooq y devuelve
    un DataFrame (columnas = tickers en MAYÚSCULA).
    """
    series = []
    for t in tickers:
        s = fetch_stooq_series(t, start=start, end=end)
        series.append(s)
    df = pd.concat(series, axis=1)
    # Stooq a veces tiene huecos no simultáneos: forward-fill y limpiar faltantes iniciales
    df = df.ffill().dropna(how="any")
    return df

def regress_y_on_x(y: pd.Series, x: pd.Series):
    if HAS_SM:
        X = sm.add_constant(x)
        res = sm.OLS(y, x, missing="drop").fit()
        const = float(res.params.get("const",0.0))
        beta = float(res.params.get(x.name, 1.0))
        return const, beta
    else:
        cov = np.cov(x.values, y.values)[0,1]
        var = np.var(x.values)
        beta = cov / var if var > 0 else 1.0
        const = float(y.mean() - beta * x.mean())
        return const, beta

def build_spread(y: pd.Series, x: pd.Series) -> pd.Series:

    const, beta = regress_y_on_x(y, x)
    spread = y - (const + beta * x)
    return spread

def zscore(series: pd.Series, win: int = 60) -> pd.Series:
    m = series.rolling(win, min_periods=max(10, win/3)).mean()
    s = series.rolling(win, min_periods=max(10, win/3)).std()
    z = (series - m) / s
    z.name = "zscore"
    return z

### Señales de backtest

def signal_from_z(z: pd.Series, z_in = 2.0, z_out = 0.5) -> pd.Series:

    pos = np.zeros(len(z), dtype=int)
    for i in range(1, len(z)):
      p = pos[i-1]
      zi = z.iloc[i]
      if p== 0:
        if zi > z_in: p = -1
        if zi < -z_in: p = +1
      else:
        if abs(zi) < z_out:
          p = 0
      pos[i] = p
    return pd.Series(pos, index=z.index, name="position")

def pnl_from_spread(spread: pd.Series, position: pd.Series,
                    cost_bps_per_leg: float = 5.0, legs: int = 2) -> pd.DataFrame:

    dS = spread.diff()
    pos_eff = position.shift(1).fillna(0).astype(int)
    pnl_raw = pos_eff * dS

    turns = pos_eff.diff().abs().fillna(0)
    cost_per_turn = (cost_bps_per_leg / 1e4) * legs
    costs = turns * cost_per_turn

    pnl_net = pnl_raw - costs
    out = pd.DataFrame({
        "spread": spread,
        "dS": dS,
        "positition": position,
        "position_eff": pos_eff,
        "turns": turns,
        "costs": costs,
        "pnl_raw": pnl_raw,
        "pnl_net": pnl_net,
        "pnl_acum": pnl_net.cumsum()
    })
    return out

In [None]:
# Parametros

START = "2018-05-21"
END = None
WIN_Z = 60
Z_IN, Z_OUT