
# Cuaderno de estudio: *Quantopian* (offline)
Este cuaderno replica las ideas de las lecciones de Quantopian con:
- mini-API para backtesting simple,
- investigación y estrategia de *pairs*,
- bandas de Bollinger (ejercicio + solución),
- demo estilo *pipelines* (factores, filtros, clasificadores).


In [None]:

import math, numpy as np, pandas as pd, matplotlib.pyplot as plt
np.random.seed(42)
pd.options.display.max_rows = 10



## 1) Datos sintéticos


In [None]:

import pandas as pd, numpy as np, math
from datetime import timedelta

def make_synth_prices(name, start_price, dates, drift=0.08, vol=0.25, common=None, idiosyn=None):
    n = len(dates)
    if common is None:
        common = np.random.normal(0, 1, n)
    if idiosyn is None:
        idiosyn = np.random.normal(0, 1, n)
    dt = 1/252.0
    shock = 0.7*common + 0.3*idiosyn
    log_rets = (drift - 0.5*vol**2)*dt + vol*np.sqrt(dt)*shock
    prices = [start_price]
    for r in log_rets[1:]:
        prices.append(max(0.5, prices[-1]*math.exp(r)))
    return pd.Series(prices, index=dates, name=name)

dates = pd.bdate_range("2016-01-01","2018-12-31", freq="C")
common_pair = np.random.normal(0,1,len(dates))
aal = make_synth_prices("AAL", 50, dates, drift=0.05, vol=0.28, common=common_pair)
ual = make_synth_prices("UAL", 47, dates, drift=0.055, vol=0.27, common=common_pair)
def rand_asset(tag, p, drift, vol): 
    return make_synth_prices(tag, p, dates, drift=drift, vol=vol)
jnj  = rand_asset("JNJ", 100, 0.06, 0.18)
aapl = rand_asset("AAPL",  95, 0.12, 0.35)
amzn = rand_asset("AMZN", 300, 0.18, 0.50)
prices = pd.concat([aal, ual, jnj, aapl, amzn], axis=1)
prices.head()



## 2) Mini-API tipo Quantopian


In [None]:

class Context:
    def __init__(self, initial_cash=1_000_000.0):
        self.initial_cash = float(initial_cash)
        self.cash = float(initial_cash)
        self.positions = {}
        self._records = {}
        self._equity_curve = []
        self._dates = []
    @property
    def portfolio_value(self):
        return getattr(self, "_last_pv", self.initial_cash)

def record(context, **kv):
    for k,v in kv.items():
        context._records.setdefault(k, []).append(v)

def order_target_percent(context, data, asset, target_pct, price):
    target_val = target_pct * context.portfolio_value
    current_shares = context.positions.get(asset, 0.0)
    current_val = current_shares * price
    delta_val = target_val - current_val
    shares_delta = int(np.floor(delta_val / price))
    if shares_delta == 0:
        return
    cost = shares_delta * price
    commission = max(1.0, 0.0005 * abs(cost))
    total = cost + np.sign(cost)*commission
    if total > context.cash and shares_delta > 0:
        affordable = int(np.floor((context.cash - commission)/price))
        shares_delta = max(0, affordable)
        total = shares_delta*price + (commission if shares_delta!=0 else 0.0)
    context.cash -= total
    context.positions[asset] = context.positions.get(asset, 0.0) + shares_delta

def mark_to_market(context, data_row):
    pv = context.cash
    for asset, sh in context.positions.items():
        pv += sh * data_row[asset]
    context._last_pv = pv
    return pv

def run_daily_backtest(prices_df, algo_step, start=None, end=None, initial_cash=1_000_000.0):
    context = Context(initial_cash)
    if start is not None:
        prices_df = prices_df.loc[start:]
    if end is not None:
        prices_df = prices_df.loc[:end]
    for date, row in prices_df.iterrows():
        algo_step(context, row, date)
        pv = mark_to_market(context, row)
        context._dates.append(date)
        context._equity_curve.append(pv)
    rec = {k: pd.Series(v, index=context._dates, name=k) for k,v in context._records.items()}
    equity = pd.Series(context._equity_curve, index=context._dates, name="equity")
    return context, equity, rec



## 3) Investigación Pairs (AAL vs UAL)


In [None]:

pair = prices[['AAL','UAL']].dropna()
corr = pair.corr().iloc[0,1]
print("Correlación AAL-UAL:", round(corr,3))
spread = pair['AAL'] - pair['UAL']
def zscore(x): return (x-x.mean())/x.std()
zs = zscore(spread)

plt.figure(figsize=(10,4)); spread.plot(); plt.axhline(spread.mean()); plt.title("Spread AAL-UAL"); plt.show()
plt.figure(figsize=(10,4)); zs.plot(); plt.axhline(0); plt.axhline(1, linestyle='--'); plt.axhline(-1, linestyle='--'); plt.title("Z-score (estático)"); plt.show()

ma1 = spread.rolling(1).mean()
ma30 = spread.rolling(30).mean()
sd30 = spread.rolling(30).std()
z_rolling = (ma1 - ma30)/sd30

plt.figure(figsize=(10,4)); z_rolling.plot(); plt.axhline(0); plt.axhline(1, linestyle='--'); plt.axhline(-1, linestyle='--'); plt.title("Z-score rolling (30)"); plt.show()


**Interpretation:**  
- The spread oscillates around its mean, which is good for mean-reversion strategies.  
- The Z-score normalizes deviations — values > +1 suggest shorting AAL and buying UAL;  
values < -1 suggest the opposite.  
We expect the spread to revert toward 0 over time.



### 3.1 Estrategia Pairs (simplificada)


In [None]:

def run_pairs_backtest(prices_df, up=1.0, dn=1.0, z_exit=0.1, initial_cash=1_000_000.0):
    pair_px = prices_df[['AAL','UAL']].dropna()
    spr = pair_px['AAL'] - pair_px['UAL']
    ma1 = spr.rolling(1).mean()
    ma30 = spr.rolling(30).mean()
    sd30 = spr.rolling(30).std()
    z = (ma1 - ma30)/sd30
    state = {"leg": 0}
    def algo(context, row, date):
        zz = z.reindex([date]).iloc[0]
        if np.isnan(zz): return
        if state["leg"] == 0:
            if zz > up:
                order_target_percent(context, row, "AAL", -0.5, row["AAL"])
                order_target_percent(context, row, "UAL",  0.5, row["UAL"])
                state["leg"] = -1; record(context, signal=-1)
            elif zz < -dn:
                order_target_percent(context, row, "AAL",  0.5, row["AAL"])
                order_target_percent(context, row, "UAL", -0.5, row["UAL"])
                state["leg"] = +1; record(context, signal=+1)
        else:
            if abs(zz) < z_exit:
                order_target_percent(context, row, "AAL", 0.0, row["AAL"])
                order_target_percent(context, row, "UAL", 0.0, row["UAL"])
                state["leg"] = 0; record(context, signal=0)
        record(context, z=zz)
    ctx, equity, rec = run_daily_backtest(pair_px, algo, initial_cash=initial_cash)
    return ctx, equity, rec

ctx, equity, rec = run_pairs_backtest(prices, up=1.0, dn=1.0, z_exit=0.1)
stats = pd.Series({
    "Retorno total": equity.iloc[-1]/equity.iloc[0]-1,
    "Max DD": (equity/equity.cummax()-1).min(),
    "Sharpe≈": np.sqrt(252)*np.mean(equity.pct_change().dropna())/np.std(equity.pct_change().dropna())
})
print(stats.apply(lambda x: round(x,4)))

plt.figure(figsize=(10,4)); equity.plot(); plt.title("Equity curve – Pairs"); plt.show()
z_series = rec.get("z", pd.Series(dtype=float))
if len(z_series)>0:
    plt.figure(figsize=(10,3)); z_series.plot(); plt.axhline(0); plt.axhline(1, linestyle='--'); plt.axhline(-1, linestyle='--'); plt.title("Z-score y señales"); plt.show()



## 4) Bandas de Bollinger (ejercicio + solución)


### 🎯 Goal of Bollinger Bands

Bollinger Bands are used to detect **overbought** or **oversold** conditions:
- **Upper band = SMA + k×STD** → price above this may be too expensive.
- **Lower band = SMA − k×STD** → price below this may be too cheap.

The strategy is to **enter trades when price touches bands**  
and exit when it returns to the SMA.


In [None]:

def bollinger_backtest(prices_df, asset="JNJ", lookback=20, k=2.0, initial_cash=1_000_000.0):
    px = prices_df[[asset]].dropna().rename(columns={asset:"px"})
    sma = px['px'].rolling(lookback).mean()
    std = px['px'].rolling(lookback).std()
    upper = sma + k*std
    lower = sma - k*std
    state = {"side": 0}
    def algo(context, row, date):
        p=row[asset]; u=upper.reindex([date]).iloc[0]; l=lower.reindex([date]).iloc[0]
        if np.isnan(u) or np.isnan(l): return
        if p > u and state["side"] != -1:
            order_target_percent(context, row, asset, -1.0, p); state["side"]=-1; record(context, boll_signal=-1)
        elif p < l and state["side"] != +1:
            order_target_percent(context, row, asset, +1.0, p); state["side"]=+1; record(context, boll_signal=+1)
        record(context, price=p, upper=u, lower=l, ma=sma.reindex([date]).iloc[0])
    ctx, equity, rec = run_daily_backtest(prices_df[[asset]], algo, initial_cash=initial_cash)
    bands = pd.DataFrame({
        "price": rec.get("price", pd.Series(dtype=float)),
        "upper": rec.get("upper", pd.Series(dtype=float)),
        "lower": rec.get("lower", pd.Series(dtype=float)),
        "ma": rec.get("ma", pd.Series(dtype=float)),
    })
    return ctx, equity, rec, bands

ctx_b, eq_b, rec_b, bands = bollinger_backtest(prices, asset="JNJ", lookback=20, k=2.0)
stats_b = pd.Series({
    "Retorno total": eq_b.iloc[-1]/eq_b.iloc[0]-1,
    "Max DD": (eq_b/eq_b.cummax()-1).min(),
    "Sharpe≈": np.sqrt(252)*np.mean(eq_b.pct_change().dropna())/np.std(eq_b.pct_change().dropna())
})
print(stats_b.apply(lambda x: round(x,4)))
plt.figure(figsize=(10,4)); eq_b.plot(); plt.title("Equity curve – Bollinger (JNJ)"); plt.show()
if len(bands): 
    ax = bands[['price','ma','upper','lower']].plot(figsize=(10,4))
    ax.set_title("Bandas de Bollinger – JNJ")
    plt.show()



## 5) Demo estilo Pipelines (factores, filtros, clasificadores, máscara)



### 🔧 Pipeline Demo

Pipelines allow:
- **Factors**: numerical scores (e.g., momentum, P/E ratio).
- **Filters**: boolean masks to select securities.
- **Classifiers**: grouping assets into categories.

In practice, pipelines help build systematic screens:  
"Select the top 50 stocks by momentum, in the Technology sector, with low volatility."


In [None]:

# Universo sintético
n_assets = 200
symbols = [f"SYM{i:03d}" for i in range(n_assets)]
common = np.random.normal(0,1,len(dates))
universe = {}
for s in symbols:
    idio = np.random.normal(0,1,len(dates))
    start = np.random.uniform(2, 50)
    universe[s] = make_synth_prices(s, start, dates, drift=np.random.uniform(0.02,0.15), vol=np.random.uniform(0.15,0.5), common=common, idiosyn=idio)
univ_df = pd.DataFrame(universe)

# "Clasificador" de exchange
rng = np.random.RandomState(123)
exchanges = pd.Series(rng.choice(["NYSE","NASDAQ"], size=n_assets), index=symbols, name="exchange")

asof_date = "2017-01-03"
px_asof = univ_df.loc[:asof_date].tail(252)
latest_close = px_asof.tail(1).T[px_asof.index[-1]]
latest_close.name = "latest_close"

# Máscara por precio
mask_small_price = latest_close < 5.0

def sma(series, win): return series.rolling(win).mean().iloc[-1]

sma10 = {}; sma30 = {}
for sym in symbols:
    if not mask_small_price.get(sym, False): 
        continue
    s = px_asof[sym]
    sma10[sym] = sma(s, 10)
    sma30[sym] = sma(s, 30)

sma10 = pd.Series(sma10, name="sma10")
sma30 = pd.Series(sma30, name="sma30")
factors = pd.concat([latest_close, sma10, sma30], axis=1).dropna()

pct_diff = (factors['sma10'] - factors['sma30'])/factors['sma30']
factors['pct_diff'] = pct_diff

screen = (factors['pct_diff']>0) & (factors['latest_close']<5.0) & (exchanges.reindex(factors.index)=="NYSE")
longlist = factors[screen].sort_values('pct_diff', ascending=False).head(15)
longlist.head(10)



## 6) Ejercicios sugeridos
1. **Pairs**: añade stop por *drawdown*, iguala exposición por beta o volatilidad.
2. **Bollinger**: define salida al cruzar SMA y compara resultados.
3. **Pipelines**: agrega un "sector" aleatorio y filtra por sector + máscara de volatilidad.
4. **Costes**: añade *slippage* y comisiones en `order_target_percent`.
