# Tarea : Effective Spread

- David Campos Ambriz / 744435


Using Roll's model for effective spreads, do the following using the `aapl_5m_train.csv` dataset:

- Determine the effective spread using the block method `cov([dt1, dt2, dt3], [dt4, dt5, dt6])`
- Determine the effective spread using the serial covariance `cov([dt1, dt2, dt3], [dt2, dt3, dt4])`
- Infer the last operation type
- Calculate the Bid & Ask price using a window size of [5, 10, 20, 50, 100]
- Find the average spread of AAPL and analyze the spreads of all methods to find which one fits best for this time series
- Upload a jupyter notebook with the results

---

### Paqueterias

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('aapl_5m_train.csv')
df.drop(columns=['Unnamed: 0', 'Gmtoffset', 'Datetime', 'Volume'], inplace=True)
df

Unnamed: 0,Timestamp,Open,High,Low,Close
0,1609770600,133.570007,133.611602,132.389999,132.809997
1,1609770900,132.750000,132.750000,131.809997,131.889999
2,1609771200,131.500000,132.339996,131.500000,132.059997
3,1609771500,132.000000,132.250000,131.899993,132.250000
4,1609771800,132.000000,132.018096,131.520004,131.589996
...,...,...,...,...,...
39574,1672432800,129.029998,129.130004,128.919998,129.054992
39575,1672433100,129.059997,129.399993,129.020004,129.380004
39576,1672433400,129.375000,129.929992,129.330001,129.764404
39577,1672433700,129.764999,129.949996,129.619995,129.949996


In [3]:
df['diff'] = df['Close'].diff()
df = df.dropna().reset_index(drop=True)
df

Unnamed: 0,Timestamp,Open,High,Low,Close,diff
0,1609770900,132.750000,132.750000,131.809997,131.889999,-0.919998
1,1609771200,131.500000,132.339996,131.500000,132.059997,0.169998
2,1609771500,132.000000,132.250000,131.899993,132.250000,0.190003
3,1609771800,132.000000,132.018096,131.520004,131.589996,-0.660004
4,1609772100,132.000000,132.089996,131.300003,131.448898,-0.141098
...,...,...,...,...,...,...
39567,1672432800,129.029998,129.130004,128.919998,129.054992,0.014999
39568,1672433100,129.059997,129.399993,129.020004,129.380004,0.325012
39569,1672433400,129.375000,129.929992,129.330001,129.764404,0.384400
39570,1672433700,129.764999,129.949996,129.619995,129.949996,0.185592


## Block method 

`cov([dt1, dt2, dt3], [dt4, dt5, dt6])`

In [4]:
blocks = [5, 10, 20, 50, 100]

In [5]:
for n_block in blocks:
    covs = [np.nan] * len(df)
    
    for i in range(len(df) - 2*n_block + 1):
        block1 = df['diff'][i:i+n_block]
        block2 = df['diff'][i+n_block:i+2*n_block]
        
        cov_val = np.abs(np.cov(block1, block2)[0, 1])
        
        covs[i + 2*n_block - 1] = cov_val
    
    df[f'Cov_{n_block}'] = covs
    df[f'Spread_{n_block}'] = 2 * np.sqrt(df[f'Cov_{n_block}'])

df

Unnamed: 0,Timestamp,Open,High,Low,Close,diff,Cov_5,Spread_5,Cov_10,Spread_10,Cov_20,Spread_20,Cov_50,Spread_50,Cov_100,Spread_100
0,1609770900,132.750000,132.750000,131.809997,131.889999,-0.919998,,,,,,,,,,
1,1609771200,131.500000,132.339996,131.500000,132.059997,0.169998,,,,,,,,,,
2,1609771500,132.000000,132.250000,131.899993,132.250000,0.190003,,,,,,,,,,
3,1609771800,132.000000,132.018096,131.520004,131.589996,-0.660004,,,,,,,,,,
4,1609772100,132.000000,132.089996,131.300003,131.448898,-0.141098,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39567,1672432800,129.029998,129.130004,128.919998,129.054992,0.014999,0.005183,0.143989,0.001034,0.064309,0.001851,0.086049,0.000548,0.046804,0.010052,0.200519
39568,1672433100,129.059997,129.399993,129.020004,129.380004,0.325012,0.010577,0.205689,0.000199,0.028237,0.001259,0.070957,0.000557,0.047191,0.010005,0.200046
39569,1672433400,129.375000,129.929992,129.330001,129.764404,0.384400,0.007774,0.176345,0.009258,0.192435,0.002945,0.108540,0.000333,0.036521,0.010244,0.202426
39570,1672433700,129.764999,129.949996,129.619995,129.949996,0.185592,0.007761,0.176193,0.008288,0.182080,0.001540,0.078493,0.000748,0.054715,0.010351,0.203482


In [None]:

bid_ask_df = pd.DataFrame()

for i in range(len(blocks)):
    bid_ask_df[f'Bid_{blocks[i]}'] = np.where(df['Diff'] > 0, df['Close'] - df[f'Spread_{blocks[i]}'], df['Close'])
    bid_ask_df[f'Ask_{blocks[i]}'] = np.where(df['Diff'] > 0, df['Close'], df['Close'] + df[f'Spread_{blocks[i]}'])

bid_ask_df

## Serial covariance

`cov([dt1, dt2, dt3], [dt4, dt5, dt6])`



In [None]:
df = pd.read_csv('aapl_5m_train.csv')
df.drop(columns=['Unnamed: 0', 'Gmtoffset', 'Datetime', 'Volume'], inplace=True)

In [None]:
df['diff_1'] = df['diff'].shift(1)
df = df.dropna().reset_index(drop=True)