# Comparison of Polygon and IB data

In [None]:
import pandas as pd
import plotly.graph_objects as go

from nautilus_trader.persistence.catalog import ParquetDataCatalog
from plotly.subplots import make_subplots

from src.utils.time import convert_utc_to_ny

In [None]:
catalog = ParquetDataCatalog(path="/home/pjpr/projects/wee_hedgy_thing/quiescence/catalog")

# fetch IB 1-minute bars for MSFT
bars_ib = catalog.bars(bar_types=["MSFT.IB-1-MINUTE-LAST-EXTERNAL"])

# fetch Polygon 1-minute bars for MSFT
bars_polygon = catalog.bars(bar_types=["MSFT.POLYGON-1-MINUTE-LAST-EXTERNAL"])

In [None]:
def unwrap_price(x):
    # Works for Price objects or NaN
    try:
        return float(x.as_decimal()) if x is not None else None
    except AttributeError:
        return float(x) if x is not None else None
    
def unwrap_quantity(x):
    try:
        return float(x.as_decimal()) if x is not None else None
    except AttributeError:
        return float(x) if x is not None else None

In [None]:
# ts_init is the time the bar closes which is how Nautilus Trader expects it
bar_data = []
for bar in bars_ib:
	bar_data.append({
		"open": bar.open.as_double(),
		"high": bar.high.as_double(),
		"low": bar.low.as_double(),
		"close": bar.close.as_double(),
		"volume": bar.volume.as_double(),
		"ts_event": bar.ts_event,
		"ts_init": bar.ts_init,
	})

# create a dataframe from the bar_data list
df_bars_ib = pd.DataFrame(bar_data)

df_bars_ib['timestamp'] = df_bars_ib['ts_init'].apply(lambda x: convert_utc_to_ny(x/10**9))
df_bars_ib.set_index("timestamp", inplace=True)
df_bars_ib

In [None]:
# ts_init is the time the bar closes which is how Nautilus Trader expects it
bar_data = []
for bar in bars_polygon:
	bar_data.append({
		"open": bar.open.as_double(),
		"high": bar.high.as_double(),
		"low": bar.low.as_double(),
		"close": bar.close.as_double(),
		"volume": bar.volume.as_double(),
		"ts_event": bar.ts_event,
		"ts_init": bar.ts_init,
	})

# create a dataframe from the bar_data list
df_bars_polygon = pd.DataFrame(bar_data)

df_bars_polygon['timestamp'] = df_bars_polygon['ts_init'].apply(lambda x: convert_utc_to_ny(x/10**9))
df_bars_polygon.set_index("timestamp", inplace=True)
df_bars_polygon

In [None]:
# filter for time range
#df_bars_ib = df_bars_ib.between_time("09:30", "16:00")
#df_bars_polygon = df_bars_polygon.between_time("09:30", "16:00")

# compare the open price of both dataframes
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, subplot_titles=("IBKR Data", "Polygon Data"))

fig.add_trace(go.Scatter(x=df_bars_ib.index, y=df_bars_ib['open'], name="IBKR Open", line=dict(color='blue')), row=1, col=1)
fig.add_trace(go.Scatter(x=df_bars_polygon.index, y=df_bars_polygon['open'], name="Polygon Open", line=dict(color='red')), row=2, col=1)

# add a thiurd row with the difference between the two open prices
df_diff = df_bars_ib['open'] - df_bars_polygon['open']
fig.add_trace(go.Scatter(x=df_diff.index, y=df_diff, name="Difference (IBKR - Polygon)", line=dict(color='green')), row=3, col=1)

fig.update_layout(height=600, width=800, title_text="IBKR vs Polygon Open Prices")
fig.show()