Use this notebook to read in your data and determine if any pre-processing needs to be done

In [1]:
import os
import pyarrow.parquet as pq
import plotly.graph_objects as go
from lightning_ib.metrics import factors

In [2]:
os.chdir("..")
os.getcwd()

'/Users/justin/Developer/quant/lightning-ib'

In [3]:
datapath = os.path.join("data", "markets", "raw", "equities", "SPY.pq")
data = pq.read_table(datapath, columns=["Date", "Open", "High", "Low", "Close"], ).to_pandas()

# Normalized Average True Range

In [4]:
natr = factors.normalized_average_true_range(data, period=20)

In [5]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=natr.index, y=natr.expanding().rank(pct=True)))
fig.update_layout(title="NATR Expanding Rank")
fig.show()

## Log Returns

In [6]:
returns = factors.log_returns(data["Close"])

In [7]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=returns.index, y=returns))
fig.update_layout(title="Log Returns")
fig.show()

In [8]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=returns.index, y=returns.expanding().rank(pct=True)))
fig.update_layout(title="Returns Expanding Rank")
fig.show()

## Relative Strength Index

In [9]:
rsi = factors.relative_strength_index(data["Close"])

In [10]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=rsi.index, y=rsi))
fig.update_layout(title="RSI")
fig.show()

In [11]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=rsi.index, y=rsi.expanding().rank(pct=True)))
fig.update_layout(title="RSI Expanding Rank")
fig.show()

## Rate of Change

In [12]:
roc = factors.rate_of_change(data["Close"], period=20)

In [13]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=roc.index, y=roc))
fig.update_layout(title="Rate of Change")
fig.show()

In [14]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=roc.index, y=roc.expanding().rank(pct=True)))
fig.update_layout(title="Rate of Change Expanding Rank")
fig.show()

## Training Dataset

In [15]:
training_data_path = os.path.join("data", "markets", "training", "training.pq")

In [16]:
training_data = pq.read_table(training_data_path).to_pandas()

In [17]:
training_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3886 entries, 2007-06-27 to 2022-11-30
Columns: 161 entries, SPY_NATR_RANK to USO_RTNS_RANK
dtypes: float64(161)
memory usage: 4.8+ MB


In [18]:
training_data.head()

Unnamed: 0_level_0,SPY_NATR_RANK,SPY_AROON_RANK,SPY_RSI_RANK,SPY_ROC_RANK,SPY_RTNS_RANK,EWJ_NATR_RANK,EWJ_AROON_RANK,EWJ_RSI_RANK,EWJ_ROC_RANK,EWJ_RTNS_RANK,...,GLD_NATR_RANK,GLD_AROON_RANK,GLD_RSI_RANK,GLD_ROC_RANK,GLD_RTNS_RANK,USO_NATR_RANK,USO_AROON_RANK,USO_RSI_RANK,USO_ROC_RANK,USO_RTNS_RANK
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2007-06-27,0.411357,0.03615,0.301385,0.212188,0.930008,0.024477,0.109081,0.342675,0.482086,0.565187,...,0.379528,0.064567,0.075591,0.310236,0.496942,0.199301,0.909091,0.905594,0.93007,0.816393
2007-06-28,0.413736,0.036278,0.301302,0.217114,0.456749,0.021277,0.10922,0.303546,0.388652,0.433251,...,0.360063,0.065252,0.177673,0.279874,0.81374,0.160279,0.994774,0.958188,0.923345,0.761438
2007-06-29,0.429402,0.036406,0.305371,0.187431,0.48499,0.023041,0.109358,0.520737,0.420418,0.769718,...,0.342229,0.065934,0.183673,0.150706,0.45122,0.111111,0.993056,0.979167,0.920139,0.736156
2007-07-02,0.419042,0.05494,0.451979,0.24827,0.844438,0.03331,0.500354,0.801914,0.528703,0.892995,...,0.344828,0.066614,0.360502,0.242947,0.869102,0.100346,0.946367,0.982699,0.906574,0.733766
2007-07-03,0.403708,0.076785,0.509685,0.303542,0.655106,0.0209,0.500354,0.77152,0.524973,0.435257,...,0.327074,0.087637,0.286385,0.225352,0.291793,0.041379,0.944828,0.982759,0.917241,0.508091
