In [1]:
import os 
import sys 

cur_path = os.path.abspath("../..")
if cur_path not in sys.path: 
    sys.path.append(cur_path)

from functools import cache 
import numpy as np 
import pandas as pd 
import altair as alt 
from altair import datum
from subgrounds.subgrounds import Subgrounds, Subgraph
from subgrounds.pagination import ShallowStrategy

from dotenv import load_dotenv


os.environ['SUBGRAPH_URL'] = 'https://graph.node.bean.money/subgraphs/name/beanstalk-dev'


from utils_notebook.utils import ddf, load_subgraph, remove_prefix
from utils_notebook.vega import output_chart

In [2]:
sg: Subgrounds
bs: Subgraph
sg, bs = load_subgraph()

In [3]:
@cache 
def query_marketplace_fills(*args):
    # Query historical farmer's market order and listing fills 
    q = bs.Query.podFills(first=10000)
    df = sg.query_df(
        [
            q.amount, 
            q.index, 
            q.start, 
            # q.listing.status, 
            # q.order.status, 
            q.listing.pricePerPod, 
            q.order.pricePerPod, 
            q.transaction.timestamp, 
        ],
        pagination_strategy=ShallowStrategy
    )
    df = remove_prefix(df, "podFills_")
    return df 
    

In [74]:
# history of all marketplace listing and order fills 
df_fills = query_marketplace_fills(1).copy()
df_fills = df_fills.rename(columns={
    "listing_pricePerPod": "listing_price_per_pod",
    "order_pricePerPod": "order_price_per_pod", 
})
df_fills.amount /= 10**6 
df_fills['index'] /= 10**6 # Must use bracket notation for column name "index" to avoid clashing with dataframe index. 
df_fills.start /= 10**6
df_fills['place_in_line'] = df_fills['index'] + df_fills.start
df_fills.listing_price_per_pod = df_fills.listing_price_per_pod.fillna(0) / 1e6 
df_fills.order_price_per_pod = df_fills.order_price_per_pod.fillna(0) / 1e6 
df_fills['price_per_pod'] = df_fills.listing_price_per_pod + df_fills.order_price_per_pod
df_fills["datetime"] = pd.to_datetime(df_fills["transaction_timestamp"], unit="s")
df_fills["date"] = pd.to_datetime(df_fills["datetime"].dt.date)
df_fills = df_fills[[
    "date", "datetime", "amount", "index", 
    "start", "place_in_line", "price_per_pod", 
]].sort_values('datetime').reset_index(drop=True)
df_fills.head()

Unnamed: 0,date,datetime,amount,index,start,place_in_line,price_per_pod
0,2022-02-05,2022-02-05 22:37:02,20000.0,222197200.0,0.0,222197200.0,0.2
1,2022-02-05,2022-02-05 23:06:54,35453.723781,387265600.0,0.0,387265600.0,0.05
2,2022-02-05,2022-02-05 23:12:59,10157.18984,250516000.0,0.0,250516000.0,0.1
3,2022-02-06,2022-02-06 00:03:45,20044.08096,387301100.0,0.0,387301100.0,0.05
4,2022-02-06,2022-02-06 01:01:35,995.305321,50199920.0,0.0,50199920.0,0.6


In [78]:
# https://observablehq.com/@mdeagen/vega-lite-table-using-text-marks

from IPython.display import display, HTML

# display(HTML("""
# <style>
# form.vega-bindings {
#   position: absolute;
#   left: 0px;
#   top: 20px;
#   -webkit-transform: rotate(90deg);

# }
# </style>
# """))

cols = ['date', 'amount', 'place_in_line', 'price_per_pod']
df = df_fills[cols].copy()
sort_cols = []
for c in cols: 
    scol = f"sort_{c}"
    df_sort = df.sort_values(c)
    df.loc[df_sort.index, scol] = df.index.values
    sort_cols.append(scol) 
    
df['date'] = df['date'].dt.strftime("%Y-%m-%d")
df['amount'] = df.amount.astype(int)
df['place_in_line'] = df.place_in_line.astype(int)

nrows = 10
radio = alt.binding_radio(name="Sort By", options=cols)
slider = alt.binding_range(min=1, max=len(df) - nrows, step=1, name='Scroll Offset:')

select_scroll = alt.selection_single(
    name="scroller", fields=['offset'],
    bind=slider, init={'offset': 1}
)
select_radio = alt.selection_single(
    name="sortby", fields=["sort_col"], 
    bind=radio, init={"sort_col": "date"}
)

alt.Chart(df).add_selection(
    select_scroll, select_radio 
).transform_calculate(
    sort_num=f"datum['sort_' + {select_radio.name}.sort_col]"
).transform_fold(
    ['date', 'amount', 'place_in_line', 'price_per_pod']
).transform_filter(
    filter=(
        "datum.sort_num >= parseInt(scroller.offset)"
        f" & datum.sort_num < (parseInt(scroller.offset) + {nrows})"
    )
).mark_text().encode(
    x=alt.X(
        "key:N", 
        axis=alt.Axis(orient="top", labelAngle=0, title=None, domain=False, ticks=False), 
        scale=alt.Scale(padding=15)
    ),
    text="value:N",
    y=alt.Y("sort_num:O", axis=None), 
    order=alt.Order("order:N", sort='descending'),
).properties(width=500)

In [7]:
"""
Some notes: 
    ser why is vega-lite so hard ʕっ•ᴥ•ʔっ

    Cross filtering by temporal domain through an interval selection: 
        In order to have the separate time axis perform cross filtering to the histogram and heatmap plots, it is necessary 
        that the encoding (brush uses encoding x, and in time axis, the encoding spec for x is date:T) that is used in the 
        plot housing the selection also exists and is of the same type within the cross filtered chart. This is why the detail 
        encoding of the heatmap uses this same value. See the following github link for reference. 
        https://stackoverflow.com/questions/71249346/use-interval-selection-from-one-dataset-chart-to-filter-data-in-another-dataset
        Also it's a bit strange but if this encoding is made ordinal (i.e. date:O in the time axis x encoding) then the inclusion 
        of the detail encoding in the heatmap is not necessary. Tbh not sure why this is true but documenting nonetheless. 
        
    Filtering by selection when the selection's mapped encoding is binned:
        In this case, make sure not to use condition's for encodings as they don't accurately reflect binning. This could be rectified 
        with an explicit bin transform. See this issue: 
        https://stackoverflow.com/questions/60994128/how-to-click-on-square-of-heatmap-to-filter-linked-bar-chart-altair
"""

width = 750
sel_point = alt.selection_single(encodings=['x'], nearest=True, on="mouseover", clear="mouseout", empty="none")
sel_brush = alt.selection_interval(encodings=['x'])

base = alt.Chart(df_fills[['amount', 'place_in_line', 'price_per_pod', 'date']]
).transform_filter(sel_brush)

xbin = alt.Bin(extent=[df_fills.place_in_line.min(), df_fills.place_in_line.max()], maxbins=50)

histogram_place_in_line = base.mark_rect(
).encode(
    x=alt.X("place_in_line:Q", bin=xbin, axis=None), 
    y=alt.Y("sum(amount):Q", axis=alt.Axis(title="Bin Volume (Pods)")), 
    tooltip=alt.Tooltip("sum(amount)", format=",d"),
).properties(width=width, height=100)

heatmap_base = base.properties(width=width, height=250)
heatmap = heatmap_base.mark_rect(
).encode(
    x=alt.X("place_in_line:Q", bin=xbin), 
    y=alt.Y(
        "price_per_pod:Q", 
        scale=alt.Scale(domain=(0,1)), 
        bin=alt.Bin(extent=[0,1], step=.1), 
        axis=alt.Axis(title="Price Per Pod ($)")
    ), 
    color=alt.Color("sum(amount)", scale=alt.Scale(type="log", scheme="plasma")), 
    detail='date:T',
    tooltip=alt.Tooltip("sum(amount)", format=",d",)
)

yield_curve_base = heatmap_base.encode(
    x=alt.X("place_in_line:Q", bin=xbin), 
    y=alt.Y("mean(price_per_pod)"), 
    color=alt.value("#03dbfc"),
)
yield_curve_point_size = alt.Size("sum(amount)", scale=alt.Scale(range=[5, 125]))
# scatter plot of yield curve 
yield_curve_points = yield_curve_base.mark_point(
).encode(
    size=yield_curve_point_size,
    tooltip=alt.Tooltip("mean(price_per_pod)")
).add_selection(sel_point)
# single mark for currently selected point (to visually highlight selection) 
yield_curve_point_selected = yield_curve_base.mark_point(filled=True
).encode(size=yield_curve_point_size
).transform_filter(sel_point)
# single mark showing value of currently selected point
yield_curve_text = yield_curve_base.mark_text(dy=-15, fontSize=15, stroke="black", strokeWidth=.4
).encode(
    text=alt.Text('mean(price_per_pod):Q', format='.2f')
).transform_filter(sel_point)

time_axis = alt.Chart(df_vol_daily
).mark_bar(
).encode(
    x=alt.X('date:T', axis=alt.Axis(title="Date", format="%b %Y", tickCount=8)), 
    y=alt.Y('pod_volume:Q', axis=alt.Axis(title="Farmer's Market Total Volume (Pods)")), 
).properties(width=width, height=100
).add_selection(sel_brush)

c = alt.vconcat(
    histogram_place_in_line, 
    alt.layer(heatmap, yield_curve_points, yield_curve_point_selected, yield_curve_text), 
    bounds="flush"
) & time_axis
#c.save("../schemas/farmers_market_history.json")
c

In [7]:
output_chart(c)

<IPython.core.display.JSON object>