In [1]:
import os 
import sys 

cur_path = os.path.abspath(".")
if cur_path not in sys.path: 
    sys.path.append(cur_path)

from functools import cache 
import numpy as np 
import pandas as pd 
import altair as alt 
from altair import datum
from subgrounds.subgrounds import Subgrounds, Subgraph
from subgrounds.pagination import ShallowStrategy

from utils.utils import ddf, load_subgraph, remove_prefix

In [2]:
sg: Subgrounds
bs: Subgraph
sg, bs = load_subgraph()

In [3]:
@cache 
def query_marketplace_fills(*args):
    # Query historical farmer's market order and listing fills 
    q = bs.Query.podFills(first=10000)
    df = sg.query_df(
        [
            q.amount, 
            q.index, 
            q.start, 
            # q.listing.status, 
            # q.order.status, 
            q.listing.pricePerPod, 
            q.order.pricePerPod, 
            q.transaction.timestamp, 
        ],
        pagination_strategy=ShallowStrategy
    )
    df = remove_prefix(df, "podFills_")
    return df 
    

In [4]:
# history of all marketplace listing and order fills 
df_fills = query_marketplace_fills(1).copy()
df_fills = df_fills.rename(columns={
    "listing_pricePerPod": "listing_price_per_pod",
    "order_pricePerPod": "order_price_per_pod", 
})
df_fills.amount /= 10**6 
df_fills['index'] /= 10**6 # Must use bracket notation for column name "index" to avoid clashing with dataframe index. 
df_fills.start /= 10**6
df_fills['place_in_line'] = df_fills['index'] + df_fills.start
df_fills.listing_price_per_pod = df_fills.listing_price_per_pod.fillna(0) / 1e6 
df_fills.order_price_per_pod = df_fills.order_price_per_pod.fillna(0) / 1e6 
df_fills['price_per_pod'] = df_fills.listing_price_per_pod + df_fills.order_price_per_pod
df_fills["datetime"] = pd.to_datetime(df_fills["transaction_timestamp"], unit="s")
df_fills["date"] = pd.to_datetime(df_fills["datetime"].dt.date)
df_fills = df_fills[[
    "date", "datetime", "amount", "index", 
    "start", "place_in_line", "price_per_pod", 
]]
df_fills.tail()

Unnamed: 0,date,datetime,amount,index,start,place_in_line,price_per_pod
1395,2022-03-30,2022-03-30 14:58:00,4520.835534,99858010.0,0.0,99858010.0,0.29
1396,2022-03-30,2022-03-30 20:58:45,36237.351724,99862540.0,0.0,99862540.0,0.29
1397,2022-03-30,2022-03-30 22:24:18,17865.626157,99898770.0,0.0,99898770.0,0.29
1398,2022-04-13,2022-04-13 23:22:23,2331.168306,99898770.0,0.0,99898770.0,0.66
1399,2022-04-14,2022-04-14 06:33:49,15534.45785,99901100.0,0.0,99901100.0,0.66


In [5]:
# marketplace volume aggregated daily 
df_vol_daily = pd.DataFrame({
    "date": pd.date_range(df_fills["date"].min(), df_fills["date"].max()), 
})
num_dates = len(df_vol_daily)
df_vol_daily = df_vol_daily.merge(
    df_fills, how="left", on="date"
).groupby("date")["amount"].sum().reset_index()
assert len(df_vol_daily) == num_dates
df_vol_daily = df_vol_daily.rename(columns=dict(amount="pod_volume"))
df_vol_daily.pod_volume = df_vol_daily.pod_volume.fillna(0)
df_vol_daily.tail()

Unnamed: 0,date,pod_volume
186,2022-08-10,102211.654185
187,2022-08-11,31455.947163
188,2022-08-12,20363.788045
189,2022-08-13,150837.592466
190,2022-08-14,60666.788367


In [6]:
"""
Some notes: 
    ser why is vega-lite so hard ʕっ•ᴥ•ʔっ

    Cross filtering by temporal domain through an interval selection: 
        In order to have the separate time axis perform cross filtering to the histogram and heatmap plots, it is necessary 
        that the encoding (brush uses encoding x, and in time axis, the encoding spec for x is date:T) that is used in the 
        plot housing the selection also exists and is of the same type within the cross filtered chart. This is why the detail 
        encoding of the heatmap uses this same value. See the following github link for reference. 
        https://stackoverflow.com/questions/71249346/use-interval-selection-from-one-dataset-chart-to-filter-data-in-another-dataset
        Also it's a bit strange but if this encoding is made ordinal (i.e. date:O in the time axis x encoding) then the inclusion 
        of the detail encoding in the heatmap is not necessary. Tbh not sure why this is true but documenting nonetheless. 
        
    Filtering by selection when the selection's mapped encoding is binned:
        In this case, make sure not to use condition's for encodings as they don't accurately reflect binning. This could be rectified 
        with an explicit bin transform. See this issue: 
        https://stackoverflow.com/questions/60994128/how-to-click-on-square-of-heatmap-to-filter-linked-bar-chart-altair
"""

width = 750
sel_point = alt.selection_single(encodings=['x'], nearest=True, on="mouseover", clear="mouseout", empty="none")
sel_brush = alt.selection_interval(encodings=['x'])

base = alt.Chart(df_fills[['amount', 'place_in_line', 'price_per_pod', 'date']]
).transform_filter(sel_brush)

xbin = alt.Bin(extent=[df_fills.place_in_line.min(), df_fills.place_in_line.max()], maxbins=50)

histogram_place_in_line = base.mark_rect(
).encode(
    x=alt.X("place_in_line:Q", bin=xbin, axis=None), 
    y=alt.Y("sum(amount):Q", axis=alt.Axis(title="Bin Volume (Pods)")), 
    tooltip=alt.Tooltip("sum(amount)", format=",d"),
).properties(width=width, height=100)

heatmap_base = base.properties(width=width, height=250)
heatmap = heatmap_base.mark_rect(
).encode(
    x=alt.X("place_in_line:Q", bin=xbin), 
    y=alt.Y(
        "price_per_pod:Q", 
        scale=alt.Scale(domain=(0,1)), 
        bin=alt.Bin(extent=[0,1], step=.1), 
        axis=alt.Axis(title="Price Per Pod ($)")
    ), 
    color=alt.Color("sum(amount)", scale=alt.Scale(type="log", scheme="plasma")), 
    detail='date:T',
    tooltip=alt.Tooltip("sum(amount)", format=",d",)
)

yield_curve_base = heatmap_base.encode(
    x=alt.X("place_in_line:Q", bin=xbin), 
    y=alt.Y("mean(price_per_pod)"), 
    color=alt.value("#03dbfc"),
)
yield_curve_point_size = alt.Size("sum(amount)", scale=alt.Scale(range=[5, 125]))
# scatter plot of yield curve 
yield_curve_points = yield_curve_base.mark_point(
).encode(
    size=yield_curve_point_size,
    tooltip=alt.Tooltip("mean(price_per_pod)")
).add_selection(sel_point)
# single mark for currently selected point (to visually highlight selection) 
yield_curve_point_selected = yield_curve_base.mark_point(filled=True
).encode(size=yield_curve_point_size
).transform_filter(sel_point)
# single mark showing value of currently selected point
yield_curve_text = yield_curve_base.mark_text(dy=-15, fontSize=15, stroke="black", strokeWidth=.4
).encode(
    text=alt.Text('mean(price_per_pod):Q', format='.2f')
).transform_filter(sel_point)

time_axis = alt.Chart(df_vol_daily
).mark_bar(
).encode(
    x=alt.X('date:T', axis=alt.Axis(title="Date", format="%b %Y", tickCount=8)), 
    y=alt.Y('pod_volume:Q', axis=alt.Axis(title="Farmer's Market Total Volume (Pods)")), 
).properties(width=width, height=100
).add_selection(sel_brush)

c = alt.vconcat(
    histogram_place_in_line, 
    alt.layer(heatmap, yield_curve_points, yield_curve_point_selected, yield_curve_text), 
    bounds="flush"
) & time_axis
c.save("../schemas/farmers_market_history.json")
c

In [19]:

snaps = bs.Query.podMarketplaceDailySnapshots(first=10000, orderBy="season", orderDirection='asc')
bs.PodMarketplaceDailySnapshot.total_bean_vol = bs.PodMarketplaceDailySnapshot.totalBeanVolume / 1e6
bs.PodMarketplaceDailySnapshot.total_pod_vol = bs.PodMarketplaceDailySnapshot.totalPodVolume / 1e6
bs.PodMarketplaceDailySnapshot.total_pod_listing_vol = bs.PodMarketplaceDailySnapshot.totalPodsFilled / 1e6
bs.PodMarketplaceDailySnapshot.total_pod_order_vol = bs.PodMarketplaceDailySnapshot.totalOrdersFilled / 1e6
df_snaps_raw = sg.query_df([
    snaps.timestamp, 
    snaps.season, 
    snaps.total_bean_vol, # total bean volume for filled orders / listings 
    snaps.total_pod_vol, # total pod volume for filled orders / listings 
    snaps.total_pod_listing_vol, # total pod volume for filled listings 
    snaps.total_pod_order_vol, # total pod volume for filled orders
], pagination_strategy=ShallowStrategy)
df_snaps_raw = remove_prefix(df_snaps_raw, 'podMarketplaceDailySnapshots_')

In [22]:
df_snaps = df_snaps_raw.copy()
df_snaps['total bean vol'] = df_snaps['total_bean_vol']
df_snaps['total pod listing vol'] = df_snaps['total_pod_listing_vol']
df_snaps['total pod order vol'] = df_snaps['total_pod_order_vol']
df_snaps['total pod vol'] = df_snaps['total_pod_vol']
value_vars = [
    'total_bean_vol', 'total_pod_vol', 'total_pod_listing_vol', 'total_pod_order_vol'
]
label_vars = [
    'total bean vol', 'total pod listing vol', 'total pod order vol', 'total pod vol'
] 
id_vars = ['timestamp', 'season'] + label_vars
df_snaps = df_snaps.melt(id_vars=id_vars, value_vars=value_vars).sort_values("season")
rm_indices = []
for v in value_vars: 
    rm_indices = rm_indices + list(
        np.argwhere(
            ((df_snaps.season == 6074) & (df_snaps.variable == v)).values
        ).ravel()[:-1]
    )
df_snaps = df_snaps.iloc[[i for i in range(len(df_snaps)) if i not in rm_indices]]
df_snaps = df_snaps.loc[df_snaps.season >= 4357]

area = alt.Chart(df_snaps).transform_filter(
    datum.variable != 'total_pod_vol'
).transform_stack(
    stack="value", 
    as_=['value_1', 'value_2'], 
    groupby=['season'],
    sort=[alt.SortField('variable', 'descending')]
).mark_area(point='transparent').encode(
    x="season:O", 
    y="value_1:Q", 
    y2="value_2:Q", 
    color="variable:N", 
    tooltip=[alt.Tooltip(f'{e}:Q', format=",d") for e in label_vars]
).properties(width=700)

line = alt.Chart(df_snaps).transform_filter(
    datum.variable == 'total_pod_vol'
).mark_line().encode(
    x="season:O", 
    y="value:Q", 
    color="variable:N", 
).properties(width=700)

c = area + line
c.save('../schemas/field_breakdown.json')
c

In [9]:
df_snaps.tail(5)

Unnamed: 0,timestamp,season,total bean vol,total pod listing vol,total pod order vol,total pod vol,variable,value
283,1662162438,6729,5961510.0,39907050.0,9305164.0,49212210.0,total_bean_vol,5961510.0
569,1662163230,6735,5961510.0,39907050.0,9305164.0,49212210.0,total_pod_vol,49212210.0
284,1662163230,6735,5961510.0,39907050.0,9305164.0,49212210.0,total_bean_vol,5961510.0
854,1662163230,6735,5961510.0,39907050.0,9305164.0,49212210.0,total_pod_listing_vol,39907050.0
1139,1662163230,6735,5961510.0,39907050.0,9305164.0,49212210.0,total_pod_order_vol,9305164.0
