In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
pip install dask[dataframe] --upgrade

Collecting fsspec>=0.6.0
  Downloading fsspec-2021.11.1-py3-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 5.4 MB/s 
Collecting partd>=0.3.10
  Downloading partd-1.2.0-py3-none-any.whl (19 kB)
Collecting locket
  Downloading locket-0.2.1-py2.py3-none-any.whl (4.1 kB)
Installing collected packages: locket, partd, fsspec
Successfully installed fsspec-2021.11.1 locket-0.2.1 partd-1.2.0


In [None]:
import pandas as pd
import glob
import datetime as dt
import multiprocessing as mp
from datetime import datetime
import numpy as np
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from pandas import Series
from tqdm import tqdm, tqdm_notebook
import time
import sys
import matplotlib.pyplot as plt



In [None]:
cd gdrive/My Drive/TFM/

/content/gdrive/My Drive/TFM


#Method definition

In [None]:
#SNIPPET 3.1 DAILY VOLATILITY ESTIMATES

def get_daily_vol(close, lookback=100):
    """
    :param close: (data frame) Closing prices
    :param lookback: (int) lookback period to compute volatility
    :return: (series) of daily volatility value
    """
    print('Calculating daily volatility for dynamic thresholds')
    
    df0 = close.index.searchsorted(close.index - pd.Timedelta(days=1))
    df0 = df0[df0 > 0]
    df0 = (pd.Series(close.index[df0 - 1], index=close.index[close.shape[0] - df0.shape[0]:]))
        
    df0 = close.loc[df0.index] / close.loc[df0.values].values - 1  # daily returns
    df0 = df0.ewm(span=lookback).std()
    return df0

In [None]:
def get_t_events(raw_price, threshold):
    """
    :param raw_price: (series) of close prices.
    :param threshold: (float) when the abs(change) is larger than the threshold, the
    function captures it as an event.
    :return: (datetime index vector) vector of datetimes when the events occurred. This is used later to sample.
    """
    print('Applying Symmetric CUSUM filter.')

    t_events = []
    s_pos = 0
    s_neg = 0

    # log returns
    diff = np.log(raw_price).diff().dropna()

    # Get event time stamps for the entire series
    for i in tqdm(diff.index[1:]):
        pos = float(s_pos + diff.loc[i])
        neg = float(s_neg + diff.loc[i])
        s_pos = max(0.0, pos)
        s_neg = min(0.0, neg)

        if s_neg < -threshold:
            s_neg = 0
            t_events.append(i)

        elif s_pos > threshold:
            s_pos = 0
            t_events.append(i)

    event_timestamps = pd.DatetimeIndex(t_events)
    return event_timestamps

In [None]:
def bar(xs, y): return np.int64(xs / y) * y

#Loading of already sampled DOLLAR BARS

In [None]:
# raw trade data from https://public.bitmex.com/?prefix=data/trade/ 
Dollar_bars = pd.DataFrame()
for i,file in enumerate(glob.glob("data/bars/new_features/*.csv")):
  if i == 0:
    Dollar_bars = Dollar_bars.append(pd.read_csv(file))
    print('Percentge of files already Loaded:',round((i/len(glob.glob("data/bars/new_features/*.csv")))*100,1), '%. There are', len(glob.glob("data/bars/new_features/*.csv"))-i, "files left", end='')
  else:
    Dollar_bars = Dollar_bars.append(pd.read_csv(file))
    print('\r Percentge of files already Loaded:',round((i/len(glob.glob("data/bars/new_features/*.csv")))*100,1), '%. There are', len(glob.glob("data/bars/new_features/*.csv"))-i, "files left",end='', flush=True)

Percentge of files already Loaded: 0.0 %. There are 1 files left

In [None]:
Dollar_bars.drop(columns=['Unnamed: 0','timestamp.1'],inplace=True)
# timestamp parsing
Dollar_bars['timestamp'] = Dollar_bars.timestamp.map(lambda t: datetime.strptime(t[:-3], "%Y-%m-%d %H:%M:%S.%f"))

Dollar_bars.index = Dollar_bars['timestamp']
Dollar_bars

Unnamed: 0_level_0,timestamp,open,high,low,close,grossValue,homeNotional,foreignNotional,tweet_count,tweet_count2,Google_trend1,Google_trend2
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2019-12-22 00:00:02.857,2019-12-22 00:00:02.857,126.95,127.60,126.90,127.00,133995088605,75212.579042,9.563766e+06,379.0,172.0,42.0,55.0
2019-12-22 06:51:01.172,2019-12-22 06:51:01.172,127.00,128.80,126.95,128.45,133739653760,74811.562495,9.556766e+06,376.0,204.0,51.0,70.0
2019-12-22 07:42:27.825,2019-12-22 07:42:27.825,128.45,129.50,128.25,129.50,133638036400,74270.069115,9.573891e+06,609.0,359.0,56.0,58.0
2019-12-22 07:57:26.330,2019-12-22 07:57:26.330,129.50,131.45,129.40,130.45,132643814650,73244.938417,9.539001e+06,609.0,359.0,56.0,58.0
2019-12-22 08:02:58.674,2019-12-22 08:02:58.674,130.45,130.55,128.85,129.00,133612879225,73900.574533,9.580523e+06,737.0,211.0,55.0,60.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-23 22:32:07.348,2021-05-23 22:32:07.348,2091.35,2115.05,2065.90,2065.95,27670542430,4552.292856,9.452077e+06,3903.0,2226.0,74.0,41.0
2021-05-23 22:50:54.588,2021-05-23 22:50:54.588,2065.95,2075.10,2060.00,2065.00,28421512785,4657.210306,9.631046e+06,3903.0,2226.0,74.0,41.0
2021-05-23 22:53:52.767,2021-05-23 22:53:52.767,2065.00,2104.25,2065.00,2090.00,28385242445,4636.918646,9.626955e+06,3903.0,2226.0,74.0,41.0
2021-05-23 23:13:20.888,2021-05-23 23:13:20.888,2090.00,2133.50,2085.20,2121.20,27608181650,4538.231752,9.560626e+06,3941.0,2382.0,70.0,37.0


In [None]:
Time_bars_1d = data1.groupby(pd.Grouper(key="timestamp", freq="1d")).agg({'price': 'ohlc', 'size': 'sum', 'timestamp': 'first', 'foreignNotional':'sum' })
Time_bars_1d.columns = ['open','high','low','close','size','timestamp','foreignNotional']
avg_dly_dollar = np.mean(Time_bars_1d['foreignNotional'])


avg_15m_dollar = np.round(avg_dly_dollar/96,decimals=-3)

avg_30m_dollar = np.round(avg_dly_dollar/48, decimals = -3)

avg_1h_dollar = np.round(avg_dly_dollar/24,decimals=-2)

avg_4h_dollar = np.round(avg_dly_dollar/5,decimals=-2)

avg_12h_dollar = np.round(avg_dly_dollar/2,decimals=-4)


Dollar_bars = data1.groupby(bar(np.cumsum(data1['foreignNotional']), avg_30m_dollar)).agg({'price': 'ohlc', 'size': 'sum', 'timestamp': 'first'})


Dollar_bars.columns = ['open','high','low','close','size','timestamp']


Dollar_bars.set_index('timestamp', inplace=True)

#CUSUM FILTER BASED ON DAILY VOLATILITY WITH A 50 DAYS LOOKBACK PERIOD AND PLOTTED RESULTS

In [None]:
# determining daily volatility using the last 50 days
daily_vol = get_daily_vol(close=Dollar_bars['close'], lookback=50)

Calculating daily volatility for dynamic thresholds


In [None]:
CUSUM = get_t_events(Dollar_bars['close'], threshold=daily_vol.mean())
CUSUM1 = get_t_events(Dollar_bars['close'], threshold=daily_vol.mean()*2)

Applying Symmetric CUSUM filter.


100%|██████████| 24912/24912 [00:01<00:00, 17900.43it/s]


Applying Symmetric CUSUM filter.


100%|██████████| 24912/24912 [00:01<00:00, 18938.85it/s]


In [None]:
price = []

for i in Dollar_bars.index:
  if i in CUSUM:
    price.append(Dollar_bars['close'][i])

price1 = []

for i in Dollar_bars.index:
  if i in CUSUM1:
    price1.append(Dollar_bars['close'][i])
    

In [None]:
fig = make_subplots(rows=1, cols=1)


fig.add_trace(go.Scatter(
    x=Dollar_bars.index,
    y=Dollar_bars['close'],
    name="ETHUSD closing price",
    mode = 'lines',
    textfont_family="Arial_Black"),
    row= 1 , 
    col= 1 )

fig.add_trace(go.Scatter(
    x=CUSUM,
    y=price,
    mode = 'markers',
    name="1x Thrshld CUSUM events",   
    textfont_family="Arial_Black"),
    row=1, 
    col=1)

In [None]:
fig = make_subplots(rows=1, cols=1)

fig.add_trace(go.Scatter(
    x=Dollar_bars.index,
    y=Dollar_bars['close'],
    name="ETHUSD closing price",
    mode = 'lines',
    textfont_family="Arial_Black"),
    row=1, 
    col=1)

fig.add_trace(go.Scatter(
    x=CUSUM1,
    y=price1,
    mode = 'markers',
    name="Seed CUSUM events",   
    textfont_family="Arial_Black"),
    row=1, 
    col=1)

In [None]:
import plotly.io as pio
pio.write_image(fig, 'CUSUM_events.png', width=1720, height=540)

[autoreload of plotly.basedatatypes failed: Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/extensions/autoreload.py", line 247, in check
    superreload(m, reload, self.old_objects)
ImportError: cannot import name '_get_int_type' from '_plotly_utils.utils' (/usr/local/lib/python3.7/dist-packages/_plotly_utils/utils.py)
]
[autoreload of plotly.io._orca failed: Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/extensions/autoreload.py", line 247, in check
    superreload(m, reload, self.old_objects)
ValueError: request_image_with_retrying() requires a code object with 3 free vars, not 2
]
[autoreload of plotly.io._renderers failed: Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/IPython/extensions/autoreload.py", line 247, in check
    superreload(m, reload, self.old_objects)
ImportError: cannot import name 'SphinxGalleryHtmlRenderer' from 'plotly.io._base_renderers' (/usr

In [None]:
fig = make_subplots(rows=2, cols=1)


fig.add_trace(go.Scatter(
    x=Dollar_bars.index,
    y=Dollar_bars['close'],
    name="ETHUSD closing price",
    mode = 'lines',
    textfont_family="Arial_Black"),
    row= 1 , 
    col= 1 )

fig.add_trace(go.Scatter(
    x=CUSUM,
    y=price,
    mode = 'markers',
    name="1x Thrshld CUSUM events",   
    textfont_family="Arial_Black"),
    row=1, 
    col=1)

fig.add_trace(go.Scatter(
    x=Dollar_bars.index,
    y=Dollar_bars['close'],
    name="ETHUSD closing price",
    mode = 'lines',
    textfont_family="Arial_Black"),
    row=2, 
    col=1)

fig.add_trace(go.Scatter(
    x=CUSUM1,
    y=price1,
    mode = 'markers',
    name="2x Thrshld CUSUM events",   
    textfont_family="Arial_Black"),
    row=2, 
    col=1)

Output hidden; open in https://colab.research.google.com to view.