In [36]:
import yfinance as yf
import numpy as np
import pandas as pd

#### **1) Download 6 months historical data for AAPL**

In [37]:
prices = yf.download("AAPL", start="2023-07-23", end="2024-01-23")['Close']
prices

  prices = yf.download("AAPL", start="2023-07-23", end="2024-01-23")['Close']
[*********************100%***********************]  1 of 1 completed




Ticker,AAPL
Date,Unnamed: 1_level_1
2023-07-24,190.438309
2023-07-25,191.297882
2023-07-26,192.167313
2023-07-27,190.902679
2023-07-28,193.481369
...,...
2024-01-16,181.912048
2024-01-17,180.970901
2024-01-18,186.865250
2024-01-19,189.767838


#### **2) Obtain the spread percentage during that day**

In [38]:
df = pd.read_csv("aapl_tick_ba.csv")
df.head()

Unnamed: 0,Local time,Ask,Bid,AskVolume,BidVolume
0,22.01.2024 08:30:00.227 GMT-0600,192.303,192.257,0.012,0.012
1,22.01.2024 08:30:00.332 GMT-0600,192.463,192.277,0.012,0.012
2,22.01.2024 08:30:00.483 GMT-0600,192.343,192.277,0.012,0.012
3,22.01.2024 08:30:00.584 GMT-0600,192.344,192.306,0.012,0.012
4,22.01.2024 08:30:00.744 GMT-0600,192.333,192.297,0.012,0.012


In [39]:
df['midquote'] = (df['Ask'] + df['Bid']) / 2
df['spread_pct'] = (df['Ask'] - df['Bid']) / df['midquote'] * 100

In [40]:
daily_spread_pct = df['spread_pct'].mean()
daily_spread_pct

np.float64(0.011842496618716095)

**Interpretación:** El spread porcentual indica la diferencia relativa entre el precio de compra (bid) y el precio de venta (ask) en relación con el precio medio (midquote). Un spread más bajo sugiere un mercado más líquido y eficiente, mientras que un spread más alto puede indicar menor liquidez o mayor incertidumbre en el mercado. En este caso, el costo de liquidez implícito en el mercado de AAPL durante ese día fue de aproximadamente 0.012% del precio del activo --> un valor bastante bajo, lo que sugiere que AAPL es un activo con alta liquidez y eficiencia en el mercado.

#### **3) Implement Richard Roll's Spread model**

In [41]:
p = prices['AAPL'].dropna()
returns = p.pct_change().dropna()

cov = returns.cov(returns.shift(1))
roll_spread_pct = 200 * np.sqrt(np.abs(cov))

roll_spread_pct

np.float64(1.039508099127076)

#### **4) Find the window that fits best considering the spread percentage**

In [None]:
goal = daily_spread_pct

def roll_spread_rolling_pct(prices, window):
    r = prices.pct_change()
    cov = r.rolling(window).cov(r.shift(1))
    spread = 200 * np.sqrt(np.abs(cov))
    return spread

windows = [5, 10, 15, 20, 30, 40, 60]

roll_by_window = {}

for w in windows:
    roll_by_window[w] = roll_spread_rolling_pct(p, w)

roll_df = pd.DataFrame(roll_by_window)
roll_on_day = roll_df.tail(1).T
roll_on_day.columns = ['roll_spread_pct_est']

In [None]:
roll_on_day['abs_error'] = (roll_on_day['roll_spread_pct_est'] - goal).abs()

roll_on_day.sort_values('abs_error')

Unnamed: 0,roll_spread_pct_est,abs_error
10,0.267921,0.256078
60,0.670428,0.658585
40,0.680707,0.668864
30,1.106976,1.095134
20,1.450848,1.439005
15,1.670882,1.65904
5,1.706032,1.69419
