In [112]:
import yfinance as yf
import numpy as np
import pandas as pd

#### **1) Download 6 months historical data for AAPL**

In [113]:
prices = yf.download("AAPL", start="2023-07-23", end="2024-01-23")['Close']
prices

  prices = yf.download("AAPL", start="2023-07-23", end="2024-01-23")['Close']
[*********************100%***********************]  1 of 1 completed


Ticker,AAPL
Date,Unnamed: 1_level_1
2023-07-24,190.438339
2023-07-25,191.297897
2023-07-26,192.167328
2023-07-27,190.902695
2023-07-28,193.481369
...,...
2024-01-16,181.912048
2024-01-17,180.970932
2024-01-18,186.865250
2024-01-19,189.767853


#### **2) Obtain the spread percentage during that day**

In [114]:
df = pd.read_csv("aapl_tick_ba.csv")
df.head()

Unnamed: 0,Local time,Ask,Bid,AskVolume,BidVolume
0,22.01.2024 08:30:00.227 GMT-0600,192.303,192.257,0.012,0.012
1,22.01.2024 08:30:00.332 GMT-0600,192.463,192.277,0.012,0.012
2,22.01.2024 08:30:00.483 GMT-0600,192.343,192.277,0.012,0.012
3,22.01.2024 08:30:00.584 GMT-0600,192.344,192.306,0.012,0.012
4,22.01.2024 08:30:00.744 GMT-0600,192.333,192.297,0.012,0.012


In [115]:
df['spread'] = df['Ask'] - df['Bid']
daily_spread = df['spread'].mean()
daily_spread

np.float64(0.02297702668905871)

**Interpretación:** El spread porcentual indica la diferencia relativa entre el precio de compra (bid) y el precio de venta (ask) en relación con el precio medio (midquote). Un spread más bajo sugiere un mercado más líquido y eficiente, mientras que un spread más alto puede indicar menor liquidez o mayor incertidumbre en el mercado. En este caso, el costo de liquidez implícito en el mercado de AAPL durante ese día fue de aproximadamente 0.012% del precio del activo --> un valor bastante bajo, lo que sugiere que AAPL es un activo con alta liquidez y eficiencia en el mercado.

#### **3) Implement Richard Roll's Spread model**

In [116]:
p = prices['AAPL']
diffs = p.diff().dropna()

cov = diffs.cov(diffs.shift(1))
roll_spread = 2 * np.sqrt(np.abs(cov))

roll_spread

np.float64(1.8930845467528457)

#### **4) Find the window that fits best considering the spread percentage**

In [117]:
goal = daily_spread

def roll_spread_rolling(prices, window):
    d = prices.diff()
    cov = d.rolling(window).cov(d.shift(1))
    spread = 2 * np.sqrt(np.abs(cov))
    return spread

windows = range(2, 101)

results = []

for w in windows:
    s = roll_spread_rolling(p, w)
    mean_spread = s.dropna().mean()

    results.append({
        "window": w,
        "roll_spread_mean": mean_spread,
        "abs_error": abs(mean_spread - goal)
    })

roll_summary = pd.DataFrame(results).sort_values("abs_error")
roll_summary.head(10)

Unnamed: 0,window,roll_spread_mean,abs_error
61,63,1.33048,1.307503
63,65,1.331897,1.30892
64,66,1.334763,1.311786
62,64,1.335961,1.312984
55,57,1.335996,1.313019
67,69,1.341241,1.318264
68,70,1.342765,1.319787
56,58,1.343939,1.320961
66,68,1.345869,1.322892
59,61,1.3459,1.322923
