In [13]:
import pandas as pd

## Schema

publisher_id - The publisher ID assigned by Databento, which denotes the dataset and venue.

instrument_id - The numeric instrument ID.

ts_event - The matching-engine-received timestamp expressed as the number of nanoseconds since the UNIX epoch.

price - The order price where every 1 unit corresponds to 1e-9, i.e. 1/1,000,000,000 or 0.000000001.

size - The order quantity.

action - The event action. Always Trade in the TBBO schema. See Action.

side - The side that initiates the event. Can be Ask for a sell aggressor, Bid for a buy aggressor, or None where no side is specified by the original trade.

flags - A bit field indicating event end, message characteristics, and data quality. See Flags.

depth - The book level where the update event occurred.

ts_recv - The capture-server-received timestamp expressed as the number of nanoseconds since the UNIX epoch.

ts_in_delta - The matching-engine-sending timestamp expressed as the number of nanoseconds before ts_recv.

sequence - The message sequence number assigned at the venue.

bid_px_00 - The bid price at the top level. (highest price a buyer is willing to pay)

ask_px_00 - The ask price at the top level. (lowest price a seller is willing to accept)

bid_sz_00 - The bid size at the top level. (number of shares investors are trying to buy)

ask_sz_00 - The ask size at the top level. (number of shares investors are trying to sell)

bid_ct_00 - The number of bid orders at the top level. 

ask_ct_00 - The number of ask orders at the top level.

In [14]:
df = pd.read_csv('data.csv')
df.head()

Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol
0,1688371200660869841,1688371200660704717,1,2,32,T,B,0,194120000000,1,130,165124,303634,193630000000,194120000000,27,27,1,1,AAPL
1,1688371201201402566,1688371201201237816,1,2,32,T,B,0,194110000000,2,130,164750,304724,193900000000,194110000000,5,400,1,1,AAPL
2,1688371201233688992,1688371201233524761,1,2,32,T,B,0,194110000000,8,130,164231,304850,193900000000,194110000000,5,398,1,1,AAPL
3,1688371201317556361,1688371201317392163,1,2,32,T,B,0,194110000000,2,130,164198,305101,193900000000,194110000000,5,390,1,1,AAPL
4,1688371201478520666,1688371201478356044,1,2,32,T,B,0,194000000000,7,130,164622,306430,193900000000,194000000000,5,200,1,1,AAPL


In [15]:
df.shape

(59271, 20)

Technical Indicators used for the project include:

- Trend Indicators:
    1) Moving Averages (MA)
    2) Exponential Moving Averages (EMA)
    3) Ichimoku Cloud (ICH)
    4) Average Directional Index (ADX)
    5) Directional Index (DI)

- Momentum Indicators:
    1) Momentum (MOM)
    2) Rate of Change (ROC)
    3) Commodity Channel Index (CCI)
    4) Moving Average Convergence/Divergence (MACD)
    5) Relative Strength Index (RSI)
    6) Stochastic Oscillator (Stoch)
    
- Volume Indicators:
    1) On Balance Volume (OBV)
    2) Accumulation/Distribution Line (ADL)
    3) OBV Mean

- Volatility Indicators:
    1) Bollinger Bands (BB)
    2) Average True Range (ATR)

- Price Indicators:
    1) Time-Weighted Average Price (TWAP)
    2) VOlume-Weighted Average Price (VWAP)

In [16]:
import talib as ta
import numpy as np

class TechnicalIndicators:
    def __init__(self, df):
        self.df = df
    
    def add_trend_indicators(self):

        # Ichimoku Cloud components (will start from 101st index)
        self.df['conversion_line'], self.df['base_line'] = ta.TEMA(self.df['Close'], timeperiod=9), ta.TEMA(self.df['Close'], timeperiod=26)
        self.df['span_a'] = ((self.df['conversion_line'] + self.df['base_line']) / 2).shift(26)
        self.df['span_b'] = ta.SMA(self.df['Close'], timeperiod=52).shift(26)
        self.df['lagging_span'] = self.df['Close'].shift(-26) # 26 days behind

        # Exponential Moving Average (EMA)
        self.df['EMA'] = ta.EMA(df['Close'], timeperiod=20) # 20-period EMA (from 19th index)

        # Average Directional Index (ADX)
        # since ADX is calculated using +DI and -DI, let's calculate those values as well (although these values are calculated internally while calculating ADX, it's still nice to have these values for future visualizations or analysis)
        self.df['ADX'] = ta.ADX(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=14) #from 28th index
        self.df['+DI'] = ta.PLUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=14) #from 14th index
        self.df['-DI'] = ta.MINUS_DI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=14) #from 14th index
    
    def add_momentum_indicators(self):

        # Momentum
        self.df['Momentum'] = ta.MOM(self.df['Close'], timeperiod=10) #from 10th index

        # Rate Of Change (ROC)
        self.df['ROC'] = ta.ROC(self.df['Close'], timeperiod=10)

        # Commodity Channel Index (CCI)
        self.df['CCI'] = ta.CCI(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=14)

        # MACD
        self.df['MACD'], self.df['MACD_signal'], self.df['MACD_hist'] = ta.MACD(self.df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)

        # RSI
        self.df['RSI'] = ta.RSI(self.df['Close'], timeperiod=14)

        # Stochastic Oscillator
        self.df['Stoch_k'], self.df['Stoch_d'] = ta.STOCH(self.df['High'], self.df['Low'], self.df['Close'], fastk_period=14, slowk_period=3, slowd_period=3)

    def add_volume_indicators(self):
        
        # On Balance Volume
        self.df['OBV'] = ta.OBV(self.df['Close'], self.df['Volume'])

        # Accumulation/Distribution Line (ADL)
        self.df['ADL'] = ta.AD(self.df['High'], self.df['Low'], self.df['Close'], self.df['Volume'])

    def add_volatility_indicators(self):
        
        # Bollinger Bands (BB)
        self.df['Upper_BB'], self.df['Middle_BB'], self.df['Lower_BB'] = ta.BBANDS(self.df['Close'], timeperiod=20, nbdevup=2, nbdevdn=2, matype=0)

        # Average True Range (ATR)
        self.df['ATR'] = ta.ATR(self.df['High'], self.df['Low'], self.df['Close'], timeperiod=14)

    def add_price_indicators(self):
        '''
        Since the talib doesn't have a function to calculate TWAP and VWAP, I will be calculating them manually
        '''

        # Time-Weighted Average Price (TWAP) - SUM(Price * Time) / SUM(Time)
        self.df['Timestamp'] = pd.to_datetime(self.df['ts_event'])
        # calculating time differences in seconds
        self.df['Time'] = self.df['Timestamp'].diff().dt.total_seconds()
        # calculating weighted closed prices
        self.df['Weighted_Close'] = df['Close'] * df['Time']
        # calculating TWAP
        self.df['TWAP'] = self.df['Weighted_Close'].cumsum() / self.df['Time'].cumsum()

        # Volume-Weighted Average PRice (VWAP) - SUM(Price * Volume) / SUM(Volume)
        self.df['VWAP'] = (self.df['Close'] * self.df['Volume']).cumsum() / self.df['Volume'].cumsum()

    def add_all_indicators(self):
        self.add_trend_indicators()
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_price_indicators()
        return self.df


In [17]:
# Creating necessary columns to calculate techincal indicators
df['price'] = df['price'] * 1e-9
df['bid_px_00'] = df['bid_px_00'] * 1e-9
df['ask_px_00'] = df['ask_px_00'] * 1e-9

df['Close'] = df['price']
df['Open'] = df['Close'].shift(1).fillna(df['Close'])
df['Volume'] = df['size']
df['High'] = df[['bid_px_00','ask_px_00']].max(axis=1)
df['Low'] = df[['bid_px_00','ask_px_00']].min(axis=1)

ti = TechnicalIndicators(df)
df_with_indicators = ti.add_all_indicators()


In [18]:
pd.set_option('display.max_columns', None)

df_with_indicators[101:]

Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,Close,Open,Volume,High,Low,conversion_line,base_line,span_a,span_b,lagging_span,EMA,ADX,+DI,-DI,Momentum,ROC,CCI,MACD,MACD_signal,MACD_hist,RSI,Stoch_k,Stoch_d,OBV,ADL,Upper_BB,Middle_BB,Lower_BB,ATR,Timestamp,Time,Weighted_Close,TWAP,VWAP
101,1688371400664279786,1688371400664115447,1,2,32,T,A,0,194.19,70,130,164339,475955,194.19,194.20,70,297,1,3,AAPL,194.19,194.20,70,194.20,194.19,194.194363,194.182948,194.255568,194.110385,194.18,194.197540,33.829051,5.599560,8.502856,0.01,0.005150,9.523810,-0.003042,-0.001514,-0.001528,48.267068,42.424242,40.404040,-4470.0,-4.340057e+03,194.234310,194.1975,194.160690,0.056411,2023-07-03 08:03:20.664115447,6.316081,1226.519748,194.173886,194.064270
102,1688371401113889215,1688371401113724707,1,2,32,T,B,0,194.20,1,130,164508,476216,194.15,194.20,145,297,5,3,AAPL,194.20,194.19,1,194.20,194.15,194.197947,194.184131,194.268455,194.115769,194.13,194.197774,34.465454,5.242082,13.067252,0.01,0.005150,-48.837209,-0.002355,-0.001682,-0.000673,50.679430,42.424242,43.434343,-4469.0,-4.339057e+03,194.226789,194.1955,194.164211,0.055953,2023-07-03 08:03:21.113724707,0.449609,87.314118,194.173945,194.064288
103,1688371401985448075,1688371401985283743,1,2,32,T,B,0,194.20,64,130,164332,476909,194.15,194.20,145,296,5,3,AAPL,194.20,194.20,64,194.20,194.15,194.200140,194.185219,194.277233,194.121154,194.13,194.197986,35.056399,4.904867,12.226656,0.00,0.000000,-41.176471,-0.001790,-0.001704,-0.000086,50.679430,42.424242,42.424242,-4469.0,-4.275057e+03,194.226789,194.1955,194.164211,0.055528,2023-07-03 08:03:21.985283743,0.871559,169.256765,194.174057,194.065420
104,1688371404714220324,1688371404714056048,1,2,32,T,B,0,194.20,1,130,164276,478991,194.15,194.20,145,232,5,3,AAPL,194.20,194.20,1,194.20,194.15,194.201392,194.186221,194.286588,194.126731,194.13,194.198178,35.605135,4.587089,11.434511,0.01,0.005150,-29.577465,-0.001326,-0.001628,0.000302,50.679430,63.636364,49.494949,-4469.0,-4.274057e+03,194.212079,194.1930,194.173921,0.055133,2023-07-03 08:03:24.714056048,2.728772,529.927582,194.174404,194.065437
105,1688371410071601892,1688371410071432879,1,2,32,T,A,0,194.15,30,0,169013,481477,194.15,194.20,145,231,5,3,AAPL,194.15,194.20,30,194.20,194.15,194.177621,194.176836,194.289392,194.132115,194.15,194.193590,36.114674,4.287912,10.688735,-0.04,-0.020598,-203.992016,-0.004937,-0.002290,-0.002647,39.249703,48.484848,51.515152,-4499.0,-4.304057e+03,194.216939,194.1905,194.164061,0.054766,2023-07-03 08:03:30.071432879,5.357377,1040.134712,194.173780,194.065766
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59266,1688417954514485218,1688417954514320323,1,2,32,T,B,0,192.44,6,130,164895,252532002,192.40,192.44,40,7,1,1,AAPL,192.44,192.40,6,192.44,192.40,192.417554,192.424443,192.394268,192.387692,,192.415696,15.474528,6.612534,3.765166,0.01,0.005197,143.307087,0.005002,0.006368,-0.001366,63.061829,33.333333,22.222222,913234.0,-2.071561e+06,192.444977,192.4175,192.390023,0.027784,2023-07-03 20:59:14.514320323,45.103716,8679.759065,192.926611,192.721647
59267,1688417961020718430,1688417961020553920,1,2,32,T,B,0,192.44,1,130,164510,252532102,192.40,192.44,40,1,1,1,AAPL,192.44,192.44,1,192.44,192.40,192.428054,192.428471,192.396485,192.387885,,192.418011,16.329018,5.953254,3.389772,0.01,0.005197,129.629630,0.006355,0.006365,-0.000011,63.061829,66.666667,33.333333,913234.0,-2.071560e+06,192.447411,192.4195,192.391589,0.028657,2023-07-03 20:59:21.020553920,6.506234,1252.059593,192.926543,192.721647
59268,1688417973297905504,1688417973297741235,1,2,32,T,A,0,192.40,5,130,164269,252532347,192.40,192.46,40,6,1,1,AAPL,192.40,192.44,5,192.46,192.40,192.415190,192.423616,192.398003,192.388462,,192.416295,19.013869,9.751295,2.919558,-0.03,-0.015590,21.021021,0.004151,0.005922,-0.001771,44.499081,66.666667,55.555556,913229.0,-2.071565e+06,192.447411,192.4195,192.391589,0.030896,2023-07-03 20:59:33.297741235,12.277187,2362.130839,192.926405,192.721647
59269,1688417996889779362,1688417996889614660,1,2,32,T,B,0,192.45,3,130,164702,252532944,192.40,192.45,35,16,1,1,AAPL,192.45,192.40,3,192.45,192.40,192.431341,192.429705,192.399032,192.388654,,192.419505,21.506945,8.671762,2.596344,0.02,0.010393,216.260163,0.006366,0.006011,0.000355,60.250043,61.111111,64.814815,913232.0,-2.071562e+06,192.451394,192.4220,192.392606,0.032260,2023-07-03 20:59:56.889614660,23.591873,4540.256041,192.926165,192.721647
