# Exploratory analysis of lob data

In this notebook I explore the datasets and plot some of the data.

In [None]:
# TODO:
# Plot the order book heatmap
# Auto-correlation analysis?

In [None]:
import os
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import polars as pl

from data.utils import get_list_of_dates_between, set_plot_style, ensure_dir_exists

In [None]:
pl.enable_string_cache(True)
set_plot_style()

In [None]:
# Indicate whether to save figures
save_fig = False

# Set path for figures saving
FIGURES_PATH = "/home/juraj/Projects/thesis-market-making/thesis/images"
ensure_dir_exists(FIGURES_PATH)

In [None]:
# Define custom colors
color_green = "#13961a"
color_red = "#eb5c14"

In [None]:
# BTC
# exchange = "BINANCE"
# symbol = "BTC-USDT"

# SOL
# exchange = "BINANCE"
# exchange = "OKX"
# exchange = "GATEIO"
exchange = "BIT.COM"
symbol = "SOL-USDT"

### Load all the data

In [None]:
# Set parameters
start_date = datetime.datetime(2023, 9, 1)
end_date = datetime.datetime(2023, 9, 13)
path = os.path.join(os.getcwd(), "datasets")
second = False

In [None]:
# Get the list of dates
dates = get_list_of_dates_between(start_date, end_date)

In [None]:
# Load the data
prefix = "order_book"
for date in dates:
    file_name = f"{exchange}_{symbol}_{prefix}_{date.strftime('%Y_%m_%d')}.parquet"
    df_single = pd.read_parquet(os.path.join(path, file_name))
    if date  == start_date:
        df = df_single
    else:
        df = pd.concat([df, df_single])
    
df.sort_index(inplace=True)

In [None]:
df

### Analysis

In [None]:
df["mid_price"] = (df["bid_0_price"] + df["ask_0_price"]) / 2

for i in range(3):
    df[f"spread_{i}"] = df[f"ask_{i}_price"] - df[f"bid_{i}_price"]

### Mid-price and returns analysis

In [None]:
# Plot the mid-price evolution
plt.figure(figsize=(12, 4.5))
plt.plot(df["mid_price"])
plt.xlabel("Time")
plt.ylabel("Price (USDT)")
plt.gca().xaxis.set_major_locator(mdates.DayLocator(interval=1))
plt.tight_layout()
# plt.show()
if save_fig:
    plt.savefig(os.path.join(FIGURES_PATH, f"{symbol}_mid_price.pdf"))

In [None]:
df["mid_price"].describe()

In [None]:
# plt.figure(figsize=(12, 4))
# plt.plot(df["mid_price"].pct_change())
# plt.xlabel("Time")
# plt.ylabel("Returns")
# plt.tight_layout()
# plt.show()

In [None]:
# # Merge the above two plots into one figure with two subplots
# fig, axs = plt.subplots(2, 1, figsize=(12, 8), sharex=False)
# axs[0].plot(df["mid_price"])
# axs[0].set_ylabel("Price")
# axs[1].plot(df["mid_price"].diff())
# axs[1].set_ylabel("Returns")
# plt.xlabel("Time")
# plt.tight_layout()
# plt.show()

# # Save the figure
# if save_fig:
#     fig.savefig(f"{FIGURES_PATH}/mid_price.pdf")

In [None]:
# # Plot the histogram of the mid-price returns
# fig = plt.figure(figsize=(12, 4))
# plt.hist(df["mid_price"].diff(), bins=100, edgecolor="black", log=False)
# plt.xlabel("Returns")
# plt.ylabel("Frequency (log scale)")
# plt.tight_layout()
# plt.show()

# # Save the figure
# if save_fig:
#     fig.savefig(f"{FIGURES_PATH}/mid_price_returns_hist.pdf")

In [None]:
# # Print the summary statistics of the mid-price returns
# print(df["mid_price"].diff().describe())

### Spread analysis

In [None]:
# Create a grid of subplots
fig, axs = plt.subplots(3, 1, figsize=(12, 12), sharey=False)

# Plot the bid-ask spread evolution for each level
for i in range(3):
    axs[i].plot(df[f"spread_{i}"])
    axs[i].set_xlabel("Time")
    axs[i].set_ylabel(f"Spread on level {i+1}")

plt.tight_layout()
plt.show()

# Save the figure
if save_fig:
    fig.savefig(f"{FIGURES_PATH}/{exchange}_{symbol}_lob_spreads.pdf")

In [None]:
# Describe the spread
for i in range(3):
    print(f"Spread on level {i+1}")
    print(df[f"spread_{i}"].describe())
    print()

### Prices on different levels

In [None]:
# # Visualize bid price for each level
# for level in range(20):
#     fig = plt.figure(figsize=(12, 4))
#     plt.plot(df[f"bid_{level}_price"])
#     plt.xlabel("Time")
#     plt.ylabel(f"Bid price for level {level}")
#     plt.tight_layout()
#     plt.show()

In [None]:
# # Visualize ask price for each level
# for level in range(20):
#     fig = plt.figure(figsize=(12, 4))
#     plt.plot(df[f"ask_{level}_price"])
#     plt.xlabel("Time")
#     plt.ylabel(f"Ask price for level {level}")
#     plt.tight_layout()
#     plt.show()

### Best bid and ask volume analysis

In [None]:
# # Plot the best bid volumes
# plt.figure(figsize=(12, 4))
# plt.plot(df["bid_0_size"],  color=color_green)
# plt.xlabel("Time")
# plt.ylabel("Volume")
# plt.tight_layout()
# plt.show()

In [None]:
# df["bid_0_size"].describe()

In [None]:
# # Plot the best ask volumes
# plt.figure(figsize=(12, 4))
# plt.plot(df["ask_0_size"], color=color_red)
# plt.xlabel("Time")
# plt.ylabel("Volume")
# plt.tight_layout()
# plt.show()

In [None]:
# df["ask_0_size"].describe()

In [None]:
# Create plot with five subfigures with the best bid volumes
fig, axs = plt.subplots(3, 1, figsize=(12, 12), sharey=True)
for i in range(3):
    axs[i].plot(df[f"bid_{i}_size"], color=color_green)
    axs[i].set_ylabel(f"Level {i+1} volume")
    axs[i].set_xlabel("Time")

plt.tight_layout()
plt.show()

# Save the figure
if save_fig:
    fig.savefig(f"{FIGURES_PATH}/{exchange}_{symbol}_lob_bid_volumes.pdf")

In [None]:
for i in range(3):
    print(f"Level {i} volume statistics")
    # Show descriptive statistics in non-scientific notation
    pd.options.display.float_format = '{:.3f}'.format
    print(df[f"bid_{i}_size"].describe())
    print()

In [None]:
# Create plot with five subfigures containing histograms of the best bid volumes
fig, axs = plt.subplots(3, 1, figsize=(12, 12))
for i in range(3):
    axs[i].hist(df[f"bid_{i}_size"], bins=100, edgecolor="black", log=True, color=color_green, linewidth=0.3)
    axs[i].set_ylabel(f"Level {i+1} volume")
    axs[i].set_xlabel("Volume")

# Compute max volume for each level
max_volumes = [df[f"bid_{i}_size"].max() for i in range(3)]
max_volume = max(max_volumes)

# Set the same x-axis and bins  for all subplots
for i in range(3):
    axs[i].set_xlim(0, max_volume)
    
plt.tight_layout()
plt.show()

# Save the figure
if save_fig:
    fig.savefig(f"{FIGURES_PATH}/{exchange}_{symbol}_lob_bid_volumes_hist.pdf")

In [None]:
# Create plot with five subfigures with the best ask volumes
fig, axs = plt.subplots(3, 1, figsize=(12, 12))
for i in range(3):
    axs[i].plot(df[f"ask_{i}_size"], color=color_red)
    axs[i].set_ylabel(f"Level {i+1} volume")
    axs[i].set_xlabel("Time")

plt.tight_layout()
plt.show()

# Save the figure
if save_fig:
    fig.savefig(f"{FIGURES_PATH}/{exchange}_{symbol}_lob_ask_volumes.pdf")

In [None]:
for i in range(3):
    print(f"Level {i} volume statistics")
    print(df[f"ask_{i}_size"].describe())
    print()

In [None]:
# Create plot with five subfigures containing histograms of the best bid volumes
fig, axs = plt.subplots(3, 1, figsize=(12, 12), sharey=True)
for i in range(3):
    axs[i].hist(df[f"ask_{i}_size"], bins=100, edgecolor="black", log=True, color=color_red)
    axs[i].set_ylabel(f"Level {i+1} volume")
    axs[i].set_xlabel("Volume")

# Compute max volume for each level
max_volumes = [df[f"ask_{i}_size"].max() for i in range(3)]
max_volume = max(max_volumes)

# Set the same x-axis and bins  for all subplots
for i in range(3):
    axs[i].set_xlim(0, max_volume)
    
plt.tight_layout()
plt.show()

# Save the figure
if save_fig:
    fig.savefig(f"{FIGURES_PATH}/{exchange}_{symbol}_lob_ask_volumes_hist.pdf")

### Order book imbalance analysis

In [None]:
# Compute the total volume at each level
df["bid_total_volume"] = 0
df["ask_total_volume"] = 0
for i in range(20):
    temp_bid_size = df[f"bid_{i}_size"]
    temp_ask_size = df[f"ask_{i}_size"]
    temp_bid_size = temp_bid_size.fillna(0)
    temp_ask_size = temp_ask_size.fillna(0)
    df["bid_total_volume"] += temp_bid_size
    df["ask_total_volume"] += temp_ask_size

df["imbalance"] = (df["bid_total_volume"] - df["ask_total_volume"]) / (df["bid_total_volume"] + df["ask_total_volume"])

In [None]:
df["imbalance"]

In [None]:
# Plot the imbalance evolution
ts_start = datetime.datetime(2023, 9, 1, 9, 0, 0)
ts_end = datetime.datetime(2023, 9, 1, 12, 0, 0)

fig = plt.figure(figsize=(12, 4))
# plt.plot(df["imbalance"][start_index:max_index], color="black")
plt.plot(df["imbalance"][ts_start:ts_end], color="black")
# Show only hours and minutes in the x-axis
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%H:%M"))
plt.xlabel("Time")
plt.ylabel("Volume imbalance")
plt.tight_layout()
plt.show()

# Save the figure
if save_fig:
    fig.savefig(f"{FIGURES_PATH}/{exchange}_{symbol}_lob_volume_imbalance.pdf")

In [None]:
# Compute the imbalance signal for each level
for i in range(20):
    df[f"imbalance_{i}"] = (df[f"bid_{i}_size"] - df[f"ask_{i}_size"]) / (df[f"bid_{i}_size"] + df[f"ask_{i}_size"])

In [None]:
# Plot the imbalance signal for top 5 levels
ts_start = datetime.datetime(2023, 9, 1, 9, 0, 0)
ts_end = datetime.datetime(2023, 9, 1, 12, 0, 0)

fig, axs = plt.subplots(3, 1, figsize=(12, 12), sharey=True)

for i in range(3):
    axs[i].plot(df[f"imbalance_{i}"][ts_start:ts_end], color="black")
    axs[i].set_ylabel(f"Level {i+1} imbalance")
    axs[i].xaxis.set_major_formatter(mdates.DateFormatter("%H:%M"))
    axs[i].set_xlabel("Time")

plt.tight_layout()
plt.show()

# Save the figure
if save_fig:
    fig.savefig(f"{FIGURES_PATH}/{exchange}_{symbol}_lob_level_imbalance.pdf")

### Orderbook snapshots

In [None]:
# Get the timestamps
all_timestamps = df.index

In [None]:
# Find the first index that is larger than the given timestamp
def find_first_index_larger_than(timestamp):
    for i, ts in enumerate(all_timestamps):
        if ts > timestamp:
            return i

In [None]:
index = find_first_index_larger_than(datetime.datetime(2023, 9, 9, 12, 4, 46))

In [None]:
depth = 15 if exchange == "BINANCE" else 6
# index_start = 12450
# index_end = index_start + 1
index_start = index
index_end = index_start + 1

for i in range(index_start, index_end):
    ts = all_timestamps[i]
    bid_prices_labels = [f"bid_{i}_price" for i in range(depth)]
    ask_prices_labels = [f"ask_{i}_price" for i in range(depth)]
    bid_sizes_labels = [f"bid_{i}_size" for i in range(depth)]
    ask_sizes_labels = [f"ask_{i}_size" for i in range(depth)]

    # Process for one timestamp
    row = df.loc[ts]
    bid_prices = row[bid_prices_labels].to_numpy().flatten()
    ask_prices = row[ask_prices_labels].to_numpy().flatten()
    bid_volumes = row[bid_sizes_labels].to_numpy().cumsum()
    ask_volumes = row[ask_sizes_labels].to_numpy().cumsum()
    
    # Visualization for trading rules
    # bid_prices = np.insert(bid_prices, 1, 19.54)
    # bid_volumes = np.insert(bid_volumes, 1, 0)
    # bid_volumes[0] = 50
    # ask_volumes[0] = 50
    # print(bid_prices)
    # print(bid_volumes)
    
    # X-axis
    spread_space = 1
    x_axis = np.arange(0, 2 * depth + spread_space, 1)
    # Visualization for trading rules
    # spread_space = 2 # Number of ticks to leave in the middle
    # x_axis = np.arange(0, 2 * depth + spread_space + 1, 1)
    
    fig = plt.figure(figsize=(12, 5))
    
    plt.bar(
        # x_axis[:depth + 1], # Visualization for trading rules
        x_axis[:depth],
        bid_volumes[::-1],
        label="Bid",
        color="#9ED166",
        width=1,
        edgecolor="black",
        linewidth=1.3,
    )
    plt.bar(
        # x_axis[depth + 1 + spread_space:],  # Visualization for trading rules
        x_axis[depth + spread_space:], 
        ask_volumes,
        label="Ask",
        color="#EB735F",
        width=1,
        edgecolor="black",
        linewidth=1.3,
    )
    x_ticks = np.append(bid_prices[::-1], ask_prices)
    x_ticks = np.insert(x_ticks, depth, "")
    
    # Visualization for trading rules
    # x_ticks = np.insert(x_ticks, depth + 1, "19.56")
    # x_ticks = np.insert(x_ticks, depth + 2, "19.57")
    # print(x_ticks)
    
    plt.xticks(x_axis, x_ticks, rotation=45, size=12)
    
    # plt.title(f"Order book at {ts.strftime('%Y-%m-%d %H:%M:%S')}")
    plt.xlabel("Price")
    plt.ylabel("Volume")
    plt.tight_layout()
    plt.show()
    
    # Save the figure
    if save_fig:
        ts_str = ts.strftime("%Y_%m_%d_%H_%M_%S")
        fig.savefig(f"{FIGURES_PATH}/{exchange}_{symbol}_lob_{ts_str}.pdf")

### Orderbook heatmap

In [None]:
df.head()

In [None]:
col_prices = [f"bid_{i}_price" for i in range(20)] + [f"ask_{i}_price" for i in range(20)]
col_volumes = [f"bid_{i}_size" for i in range(20)] + [f"ask_{i}_size" for i in range(20)]

In [None]:
row = df.iloc[0]

In [None]:
# Plot the limit order book heatmap
plt.figure(figsize=(10, 4))
row = df.iloc[0]
ts = row["received_time"]
prices = row[col_prices].values
volumes = row[col_volumes].values

plt.scatter(ts, prices, c="black")


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# Sample data
data = {
    'Timestamp': pd.to_datetime(['2023-01-01 10:00:00', '2023-01-01 10:01:00', '2023-01-01 10:02:00']),
    'Price': [100, 101, 99],
    'Volume': [50, 30, 20],
}

df = pd.DataFrame(data)

# Create a scatter plot
plt.figure(figsize=(10, 6))

# Plot each data point with a color representing volume
for i in range(len(df)):
    plt.scatter(df['Timestamp'][i], df['Price'][i], s=df['Volume'][i], c=np.random.rand(3,))

# Set axis labels and title
plt.xlabel('Timestamp')
plt.ylabel('Price')
plt.title('Limit Order Book')

# Show the plot
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample data
timestamps = ['10:00', '10:01', '10:02', '10:03']
bid_prices = [[100, 99, 98, 97, 96, 95, 94, 93, 92, 91],
              [101, 100, 99, 98, 97, 96, 95, 94, 93, 92],
              [102, 101, 100, 99, 98, 97, 96, 95, 94, 93],
              [103, 102, 101, 100, 99, 98, 97, 96, 95, 94]]
ask_prices = [[105, 106, 107, 108, 109, 110, 111, 112, 113, 114],
              [104, 105, 106, 107, 108, 109, 110, 111, 112, 113],
              [103, 104, 105, 106, 107, 108, 109, 110, 111, 112],
              [102, 103, 104, 105, 106, 107, 108, 109, 110, 111]]
bid_volumes = [[10, 15, 8, 5, 12, 7, 10, 6, 8, 14],
               [8, 10, 12, 15, 7, 9, 11, 13, 6, 10],
               [14, 7, 10, 12, 8, 15, 9, 11, 13, 6],
               [9, 12, 8, 14, 10, 11, 7, 13, 6, 15]]
ask_volumes = [[5, 10, 7, 12, 9, 14, 8, 11, 6, 13],
               [12, 8, 15, 7, 11, 10, 9, 13, 6, 14],
               [10, 13, 6, 11, 14, 8, 9, 7, 12, 15],
               [11, 7, 13, 10, 9, 12, 8, 14, 6, 15]]

# Plotting
fig, ax = plt.subplots(figsize=(10, 6))

for i in range(len(timestamps)):
    # Plotting bid prices and volumes
    ax.scatter([i]*len(bid_prices[i]), bid_prices[i], s=bid_volumes[i], c='b', label='Bid', alpha=0.5)

    # Plotting ask prices and volumes
    ax.scatter([i]*len(ask_prices[i]), ask_prices[i], s=ask_volumes[i], c='r', label='Ask', alpha=0.5)

ax.set_xticks(range(len(timestamps)))
ax.set_xticklabels(timestamps)
ax.set_xlabel('Timestamp')
ax.set_ylabel('Price')
ax.legend()
plt.title('Limit Order Book Visualization')
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample data
timestamps = ['10:00', '10:01', '10:02', '10:03']
bid_prices = [[100, 99, 98, 97, 96, 95, 94, 93, 92, 91],
              [101, 100, 99, 98, 97, 96, 95, 94, 93, 92],
              [102, 101, 100, 99, 98, 97, 96, 95, 94, 93],
              [103, 102, 101, 100, 99, 98, 97, 96, 95, 94]]
ask_prices = [[105, 106, 107, 108, 109, 110, 111, 112, 113, 114],
              [104, 105, 106, 107, 108, 109, 110, 111, 112, 113],
              [103, 104, 105, 106, 107, 108, 109, 110, 111, 112],
              [102, 103, 104, 105, 106, 107, 108, 109, 110, 111]]
bid_volumes = [[10, 15, 8, 5, 12, 7, 10, 6, 8, 14],
               [8, 10, 12, 15, 7, 9, 11, 13, 6, 10],
               [14, 7, 10, 12, 8, 15, 9, 11, 13, 6],
               [9, 12, 8, 14, 10, 11, 7, 13, 6, 15]]
ask_volumes = [[5, 10, 7, 12, 9, 14, 8, 11, 6, 13],
               [12, 8, 15, 7, 11, 10, 9, 13, 6, 14],
               [10, 13, 6, 11, 14, 8, 9, 7, 12, 15],
               [11, 7, 13, 10, 9, 12, 8, 14, 6, 15]]

# Plotting
fig, ax = plt.subplots(figsize=(10, 6))

for i in range(len(timestamps)):
    # Set color based on volume using the viridis colormap
    bid_colors = plt.cm.viridis(np.array(bid_volumes[i]) / max(bid_volumes[i]))
    ask_colors = plt.cm.viridis(np.array(ask_volumes[i]) / max(ask_volumes[i]))

    # Plotting bid prices and volumes with color
    ax.scatter([i]*len(bid_prices[i]), bid_prices[i], c=bid_colors, label='Bid', alpha=0.8)

    # Plotting ask prices and volumes with color
    ax.scatter([i]*len(ask_prices[i]), ask_prices[i], c=ask_colors, label='Ask', alpha=0.8)

ax.set_xticks(range(len(timestamps)))
ax.set_xticklabels(timestamps)
ax.set_xlabel('Timestamp')
ax.set_ylabel('Price')
ax.legend()
plt.title('Limit Order Book Visualization with Volume-based Color')
plt.show()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sample data
timestamps = ['10:00', '10:01', '10:02', '10:03']
bid_prices = [[100, 99, 98, 97, 96, 95, 94, 93, 92, 91],
              [101, 100, 99, 98, 97, 96, 95, 94, 93, 92],
              [102, 101, 100, 99, 98, 97, 96, 95, 94, 93],
              [103, 102, 101, 100, 99, 98, 97, 96, 95, 94]]
ask_prices = [[105, 106, 107, 108, 109, 110, 111, 112, 113, 114],
              [104, 105, 106, 107, 108, 109, 110, 111, 112, 113],
              [103, 104, 105, 106, 107, 108, 109, 110, 111, 112],
              [102, 103, 104, 105, 106, 107, 108, 109, 110, 111]]
bid_volumes = [[10, 15, 8, 5, 12, 7, 10, 6, 8, 14],
               [8, 10, 12, 15, 7, 9, 11, 13, 6, 10],
               [14, 7, 10, 12, 8, 15, 9, 11, 13, 6],
               [9, 12, 8, 14, 10, 11, 7, 13, 6, 15]]
ask_volumes = [[5, 10, 7, 12, 9, 14, 8, 11, 6, 13],
               [12, 8, 15, 7, 11, 10, 9, 13, 6, 14],
               [10, 13, 6, 11, 14, 8, 9, 7, 12, 15],
               [11, 7, 13, 10, 9, 12, 8, 14, 6, 15]]

# Plotting
fig, ax = plt.subplots(figsize=(10, 6))

timestamps = df["received_time"][:10]
max_volume = 0
for i in range(1):
    max_volume = max(
        df[f"bid_{i}_size"].max(), df[f"ask_{i}_size"].max(), max_volume
    )
 
for i in range(len(timestamps)):
    row = df.iloc[i]
    bid_prices = list(row[col_prices].values)
    bid_volumes = list(row[col_volumes].values)
    ask_prices = list(row[col_prices].values)
    ask_volumes = list(row[col_volumes].values)
    
    # Set color based on volume using the viridis colormap
    bid_colors = plt.cm.viridis(np.array(bid_volumes) / max_volume)
    ask_colors = plt.cm.viridis(np.array(ask_volumes) / max_volume)

    # Plotting bid prices and volumes with color
    ax.scatter([i]*len(bid_prices), bid_prices, c=bid_colors, label='Bid', alpha=0.8)

    # Plotting ask prices and volumes with color
    ax.scatter([i]*len(ask_prices), ask_prices, c=ask_colors, label='Ask', alpha=0.8)

ax.set_xticks(range(len(timestamps)))
# ax.set_xticklabels(timestamps)
ax.set_xlabel('Timestamp')
ax.set_ylabel('Price')
# ax.legend()
plt.title('Limit Order Book Visualization with Volume-based Color')
plt.show()


In [None]:
bid_colors

### Generate random sequence number for each snapshot

In [None]:
# BTC
# exchange = "BINANCE"
# symbol = "BTC-USDT"

# SOL
# exchange = "BINANCE"
exchange = "OKX"
# exchange = "GATEIO"
# exchange = "BIT.COM"
symbol = "SOL-USDT"

In [None]:
# Set parameters
start_date = datetime.datetime(2023, 9, 1)
end_date = datetime.datetime(2023, 9, 13)
path = os.path.join(os.getcwd(), "datasets")
second = False

In [None]:
# Get the list of dates
dates = get_list_of_dates_between(start_date, end_date)

In [None]:
# # Load the data
# prefix = "order_book"
# for date in dates:
#     file_name = f"{exchange}_{symbol}_{prefix}_{date.strftime('%Y_%m_%d')}.parquet"
#     df = pd.read_parquet(os.path.join(path, file_name))
    
#     # Generate random sequence numbers
#     df["sequence_number"] = np.random.randint(10000000, 100000000, df.shape[0])
#     df.to_parquet(os.path.join(path, file_name))
    

### Load a single day of data

In [None]:
# Set parameters
date = datetime.datetime(2023, 9, 1)
path = os.path.join(os.getcwd(), "datasets")
second = False

In [None]:
# Load the data
prefix = "order_book_second" if second else "order_book"
file_name = f"{exchange}_{symbol}_{prefix}_{date.strftime('%Y_%m_%d')}.parquet"
df = pl.read_parquet(os.path.join(path, file_name))

In [None]:
# Check df length
print(f"Number of rows: {len(df)}")

In [None]:
df

In [None]:
# df.head(10)