This is a sample notebook, showing how you can create charts from Data Lake queries.

It creates a chart of the min and max prices of AAPL trades for each hour on a given day.

In [None]:
import datetime

import pandas as pd
import maystreet_data as md


def fetch_min_max():
    """
    Query the Data Lake for min/max prices grouped by hour of the day.

    Returns a Pandas dataframe with timestamp (as a Python datetime), min_price and max_price.
    """

    query = """
    SELECT 
        DATE_TRUNC('hour', TO_TIMESTAMP(ExchangeTimestamp / 1000000000)) AS hour_ts,
        MIN(price) as min_price,
        MAX(price) as max_price
    FROM 
        "prod_lake"."p_mst_data_lake".mt_trade
    WHERE 
        dt = '2022-01-19'
        AND product = 'AAPL'
    GROUP BY 1
    ORDER BY 1
    """

    data = pd.DataFrame(md.query(md.DataSource.DATA_LAKE, query))
    data["timestamp"] = [
        datetime.datetime.fromtimestamp(x / 1000) for x in data["hour_ts"]
    ]

    return data


min_max_data = fetch_min_max()


In [None]:
import datetime

import matplotlib.pyplot as plt
import matplotlib.dates as mdates


# Here we plot the data fetched in the previous cell.
# This is standard matplotlib.

plt.rcParams["figure.figsize"] = [10, 5]

fig, ax = plt.subplots()

# Explicitly set background color: when using a Dark theme dark text is not very readable
fig.patch.set_facecolor((1, 1, 1))

ax.grid(True)

ax.set_title("AAPL min/max price per hour, 2022/01/19")
ax.set_ylabel("Price")
ax.set_xlabel("Hour")

ax.set_ylim(min_max_data["min_price"].min() - 1, min_max_data["max_price"].max() + 1)

ax.xaxis.set_major_formatter(mdates.DateFormatter("%H"))

ax.bar(
    x=min_max_data["timestamp"],
    height=min_max_data["max_price"] - min_max_data["min_price"],
    bottom=min_max_data["min_price"],
    width=0.01,
    zorder=2,
)

plt.show()
