Use this notebook to read in your data and determine if any pre-processing needs to be done

In [None]:
import os
import pyarrow.parquet as pq
import plotly.graph_objects as go
from lightning_ib.metrics import factors

In [None]:
os.chdir("..")
os.getcwd()

In [None]:
datapath = os.path.join("data", "markets", "raw", "equities", "SPY.pq")
data = pq.read_table(datapath, columns=["Date", "Open", "High", "Low", "Close"], ).to_pandas()

# Normalized Average True Range

In [None]:
natr = factors.normalized_average_true_range(data, period=20)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=natr.index, y=natr.expanding().rank(pct=True)))
fig.update_layout(title="NATR Expanding Rank")
fig.show()

## Log Returns

In [None]:
returns = factors.log_returns(data["Close"])

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=returns.index, y=returns))
fig.update_layout(title="Log Returns")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=returns.index, y=returns.expanding().rank(pct=True)))
fig.update_layout(title="Returns Expanding Rank")
fig.show()

## Relative Strength Index

In [None]:
rsi = factors.relative_strength_index(data["Close"])

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=rsi.index, y=rsi))
fig.update_layout(title="RSI")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=rsi.index, y=rsi.expanding().rank(pct=True)))
fig.update_layout(title="RSI Expanding Rank")
fig.show()

## Rate of Change

In [None]:
roc = factors.rate_of_change(data["Close"], period=20)

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=roc.index, y=roc))
fig.update_layout(title="Rate of Change")
fig.show()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=roc.index, y=roc.expanding().rank(pct=True)))
fig.update_layout(title="Rate of Change Expanding Rank")
fig.show()

## Training Dataset

In [None]:
training_data_path = os.path.join("data", "markets", "training", "training.pq")

In [None]:
training_data = pq.read_table(training_data_path).to_pandas()

In [None]:
training_data.info()

In [None]:
training_data.head()