### Importing Packages and API Key

In [1]:
# Import packages
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
from dotenv import load_dotenv
from scipy import stats

# Load environment variables from the .env file
load_dotenv('../config.env')


True

In [2]:
# Import MlFinLab package
import mlfinlab
from mlfinlab.data_structures.standard_data_structures import (get_dollar_bars,
                                                               get_tick_bars, get_volume_bars)

from mlfinlab.multi_product.etf_trick import get_futures_roll_series
from mlfinlab.data_structures.imbalance_data_structures import get_ema_dollar_imbalance_bars




### Importing Data and Creating Dollar, Volume and Tick Bars

In [3]:
# import ES_Trades data
path = '../Data/ES_Trades.csv'
data = pd.read_csv(path)

# viewing data
data.head()


Unnamed: 0,Symbol,Date,Time,Price,Volume,Market Flag,Sales Condition,Exclude Record Flag,Unfiltered Price
0,ESU13,09/01/2013,17:00:00.083,1640.25,8,E,0,,1640.25
1,ESU13,09/01/2013,17:00:00.083,1640.25,1,E,0,,1640.25
2,ESU13,09/01/2013,17:00:00.083,1640.25,2,E,0,,1640.25
3,ESU13,09/01/2013,17:00:00.083,1640.25,1,E,0,,1640.25
4,ESU13,09/01/2013,17:00:00.083,1640.25,1,E,0,,1640.25


In [4]:
# get all unique values of the 'Symbol' column

data['Symbol'].unique()

array(['ESU13', 'ESZ13'], dtype=object)

In [5]:
# Format the Data
date_time = data['Date'] + ' ' + data['Time'] # Dont convert to datetime here, it will take forever to convert.
new_data = pd.concat([date_time, data['Price'], data['Volume']], axis=1)
new_data.columns = ['date', 'price', 'volume']
print(new_data.head())
print('\n')
print('Rows:', new_data.shape[0])


                      date    price  volume
0  09/01/2013 17:00:00.083  1640.25       8
1  09/01/2013 17:00:00.083  1640.25       1
2  09/01/2013 17:00:00.083  1640.25       2
3  09/01/2013 17:00:00.083  1640.25       1
4  09/01/2013 17:00:00.083  1640.25       1


Rows: 5454950


In [6]:
# 'new_data' can be simply replaced with the file path where 'raw_tick_data' was saved if memory is an issue
print('Creating Dollar Bars')
dollar = get_dollar_bars(new_data, threshold=70000000,
                         batch_size=1000000, verbose=True)


Creating Dollar Bars
Reading data in batches:
Batch number: 0
Batch number: 1
Batch number: 2
Batch number: 3
Batch number: 4
Batch number: 5
Returning bars 



In [7]:
# creating dollar imbalance bars
print('Creating Dollar Imbalance Bars')
dollar_imbalance = get_ema_dollar_imbalance_bars(new_data, num_prev_bars=3, expected_imbalance_window=100, exp_num_ticks_init=1000, exp_num_ticks_constraints=[1000, 10000],
                                                 batch_size=1000000, verbose=True, to_csv=False, analyse_thresholds=False, output_path=None)

Creating Dollar Imbalance Bars
Reading data in batches:
Batch number: 0
Batch number: 1
Batch number: 2
Batch number: 3
Batch number: 4
Batch number: 5
Returning bars 



### Confirming Sampling

In [8]:
# Confirm the dollar sampling
dollar['value'] = dollar['close'] * dollar['volume']
dollar.head()


Unnamed: 0_level_0,tick_num,open,high,low,close,volume,cum_buy_volume,cum_ticks,cum_dollar_value,value
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
09/01/2013 21:34:39.298,11207,1640.25,1643.5,1639.0,1640.75,42862,21896,11207,70347610.0,70325826.5
09/02/2013 02:56:24.209,26547,1640.75,1646.0,1640.25,1644.5,42585,24320,15340,70000546.5,70031032.5
09/02/2013 06:37:33.128,40473,1644.5,1647.5,1644.25,1647.5,42580,23167,13926,70095794.25,70150550.0
09/02/2013 09:34:46.141,51328,1647.5,1648.5,1645.25,1647.0,42535,23904,10855,70053015.75,70055145.0
09/02/2013 22:55:20.297,64261,1647.0,1648.5,1645.25,1648.0,42512,23884,12933,70024910.5,70059776.0


In [9]:
# Confirm the dollar imbalance sampling
dollar_imbalance['value'] = dollar_imbalance['close'] * dollar_imbalance['volume']
dollar_imbalance.head()

Unnamed: 0_level_0,tick_num,open,high,low,close,volume,cum_buy_volume,cum_ticks,cum_dollar_value,value
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
09/01/2013 17:01:24.420,1000,1640.25,1641.0,1639.0,1640.0,4366,1886,1000,7160071.0,7160240.0
09/01/2013 17:08:58.970,1785,1640.0,1641.0,1639.5,1640.5,2696,1715,785,4422162.0,4422788.0
09/01/2013 19:48:22.032,7606,1640.5,1642.75,1639.5,1642.75,22920,13511,5821,37613320.0,37651830.0
09/03/2013 12:05:03.228,411483,1642.75,1650.0,1632.0,1632.25,1492401,732887,403877,2452809000.0,2435972000.0
09/03/2013 14:16:04.280,531014,1632.25,1638.75,1630.75,1638.75,452350,247094,119531,739253600.0,741288600.0


### Creating a Continuous Futures Contract using the ETF Trick

In [10]:
# making a copy of  dollar and dollar_imbalance
dollar_roll = dollar.copy()
dollar_imbalance_roll = dollar_imbalance.copy()


# Ensure the index is a datetime object for each of the dataframes
dollar_roll.index = pd.to_datetime(dollar_roll.index)
dollar_imbalance_roll.index = pd.to_datetime(dollar_imbalance_roll.index)

# adding a date column to dollar_roll, tick and volume bars
dollar_roll['date'] = dollar_roll.index.date
dollar_imbalance_roll['date'] = dollar_imbalance_roll.index.date

# selecting only the columns we need for the ETF trick: 'date', 'date_time', 'close', 'open', 'high', 'low', 'volume' for each dataframe
dollar_roll = dollar_roll[['date', 'close', 'open', 'high', 'low', 'volume']]
dollar_imbalance_roll = dollar_imbalance_roll[['date', 'close', 'open', 'high', 'low', 'volume']]

# adding a ticker column to dollar_roll with the value 'ES' for each dataframe from above
dollar_roll['ticker'] = 'ES'
dollar_imbalance_roll['ticker'] = 'ES'

# adding a nearest_contract column to dollar_roll with the value 'ES' for each dataframe from above
dollar_roll['nearest_contract'] = 'ES'
dollar_imbalance_roll['nearest_contract'] = 'ES'



In [11]:
# Get roll gaps (relative or absolute)

# for dollar_roll
roll_gaps_relative_dollar = get_futures_roll_series(dollar_roll, open_col='open', close_col='close',
                                             sec_col='ticker', current_sec_col='nearest_contract', method='relative')
# for dollar_imbalance_roll
roll_gaps_relative_dollar_imbalance = get_futures_roll_series(dollar_imbalance_roll, open_col='open', close_col='close',
                                             sec_col='ticker', current_sec_col='nearest_contract', method='relative')

In [12]:
# apply the roll gaps
continuous_dollar_relative_method = dollar_roll.copy()
continuous_dollar_relative_method['close'] /= roll_gaps_relative_dollar

continuous_dollar_imbalance_relative_method = dollar_imbalance_roll.copy()
continuous_dollar_imbalance_relative_method['close'] /= roll_gaps_relative_dollar_imbalance




In [13]:
# viewing the head of continuous_contract_relative_method
continuous_dollar_relative_method.head()


Unnamed: 0_level_0,date,close,open,high,low,volume,ticker,nearest_contract
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-09-01 21:34:39.298,2013-09-01,1640.75,1640.25,1643.5,1639.0,42862,ES,ES
2013-09-02 02:56:24.209,2013-09-02,1644.5,1640.75,1646.0,1640.25,42585,ES,ES
2013-09-02 06:37:33.128,2013-09-02,1647.5,1644.5,1647.5,1644.25,42580,ES,ES
2013-09-02 09:34:46.141,2013-09-02,1647.0,1647.5,1648.5,1645.25,42535,ES,ES
2013-09-02 22:55:20.297,2013-09-02,1648.0,1647.0,1648.5,1645.25,42512,ES,ES


In [15]:
# viewing the head of continuous_contract_relative_method for dollar imbalance
continuous_dollar_imbalance_relative_method.head()

Unnamed: 0_level_0,date,close,open,high,low,volume,ticker,nearest_contract
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-09-01 17:01:24.420,2013-09-01,1640.0,1640.25,1641.0,1639.0,4366,ES,ES
2013-09-01 17:08:58.970,2013-09-01,1640.5,1640.0,1641.0,1639.5,2696,ES,ES
2013-09-01 19:48:22.032,2013-09-01,1642.75,1640.5,1642.75,1639.5,22920,ES,ES
2013-09-03 12:05:03.228,2013-09-03,1632.25,1642.75,1650.0,1632.0,1492401,ES,ES
2013-09-03 14:16:04.280,2013-09-03,1638.75,1632.25,1638.75,1630.75,452350,ES,ES


### Computing Serial correlation of the Dollar and Dollar Imbalance Bars

In [16]:
# computing the returns for the dollar bars and dollar imbalance bars using pct change
dollar_returns = continuous_dollar_relative_method['close'].pct_change().dropna()
dollar_imbalance_returns = continuous_dollar_imbalance_relative_method['close'].pct_change().dropna()

In [19]:
# computing the serial correlation for the dollar bars and dollar imbalance bars
dollar_serial_corr = dollar_returns.autocorr()
dollar_imbalance_serial_corr = dollar_imbalance_returns.autocorr()




In [20]:
# printing the serial correlation for the dollar bars and dollar imbalance bars
print('Serial Correlation for Dollar Bars:', dollar_serial_corr)
print('Serial Correlation for Dollar Imbalance Bars:', dollar_imbalance_serial_corr)

Serial Correlation for Dollar Bars: 0.00526983961302618
Serial Correlation for Dollar Imbalance Bars: -0.4125859709318456
