In [1]:
import os 
import sys 
import json 
import logging 
from pathlib import Path 
from functools import cache
from itertools import product

# Required when developing in a jupyter-notebook environment 
cur_path = os.path.abspath("../..")
if cur_path not in sys.path: 
    sys.path.append(cur_path)

import numpy as np 
import pandas as pd 
import altair as alt 
from altair import datum
from dotenv import load_dotenv
from subgrounds.subgrounds import Subgrounds, Subgraph
from subgrounds.subgraph import SyntheticField
from subgrounds.pagination import ShallowStrategy

# Required when developing in a jupyter-notebook environment 
assert load_dotenv('../../../.env')

print(os.environ['SUBGRAPH_URL'])

from utils_notebook.utils import ddf, remove_prefix, load_subgraph, remove_keys
from utils_notebook.vega import condition_union, output_chart, apply_css, stack_order_expr
from utils_notebook.testing import validate_season_series
from utils_notebook.constants import ADDR_BEANSTALK
from utils_notebook.queries import QueryManager

https://api.thegraph.com/subgraphs/name/cujowolf/beanstalk


In [2]:
sg, bs = load_subgraph()
q = QueryManager(sg, bs) 

In [3]:
@cache
def query_barn(**kwargs): 
    return q.query_barn()

In [4]:
df_barn = query_barn(cache=1)
df_barn = df_barn[['season', 'sprouts', 'sprouts_rinsable']]
df_barn.head()

Unnamed: 0,season,sprouts,sprouts_rinsable
1,6074,86432680.0,0.0
2,6075,86443280.0,0.0
3,6076,86549760.0,2266.788451
4,6077,86583560.0,6824.618896
5,6078,86591880.0,13760.68547


In [7]:
col_map = {
    'newHarvestablePods': 'pods_harvestable_daily',
    'newHarvestedPods': 'pods_harvested_daily', 
    'newPods': 'pods_issued_daily', 
    'podIndex': 'pods_issued_cumulative', 
    'totalHarvestablePods': 'pods_harvestable_cumulative', 
}

In [8]:
@cache
def query_field_daily_snapshots(**kwargs): 
    return q.query_field_daily_snapshots(fields=['season'] + list(col_map.keys())) 

In [9]:
df_field = query_field_daily_snapshots(cache=1).copy()
df_field = df_field.rename(columns=col_map)
df_field.tail()

Unnamed: 0,season,pods_harvestable_daily,pods_harvested_daily,pods_harvestable_cumulative,pods_issued_daily,pods_issued_cumulative
310,7425,0.0,0.0,57560230.0,0.387397,828640300.0
311,7449,0.0,0.0,57560230.0,66910.245593,828707200.0
312,7473,0.0,0.0,57560230.0,4923.484482,828712100.0
313,7497,0.0,0.0,57560230.0,43572.583335,828755700.0
314,7517,0.0,0.0,57560230.0,15527.719407,828771200.0


In [10]:
def silo_emissions_pre_replant() -> pd.DataFrame: 
    """Temporary solution to subgraph not having silo emissions pre-replant 
    
    Data was downloaded from dune 
    """
    with Path("data/SupplyIncrease.json").open('r') as f: 
        data = json.loads(f.read())
    data = [remove_keys(d['data'], ['__typename']) for d in data]
    df_supply_inc = pd.DataFrame(data)[['season', 'newSilo']]
    return df_supply_inc

@cache 
def query_silo(refresh=None) -> pd.DataFrame: 
    silo_snaps = bs.Query.siloDailySnapshots(
        orderBy="season", 
        orderDirection="asc", 
        first=10000, 
        where={"silo": ADDR_BEANSTALK}
    )
    df = sg.query_df(
        [
            silo_snaps.season, 
            silo_snaps.dailyBeanMints, 
            # silo_snaps.totalBeanMints, # add back when subgraph includes historical data 
        ], 
        pagination_strategy=ShallowStrategy
    )
    return df 

In [11]:
# process post-replant silo data (subgraph)
df_silo = query_silo(refresh=1).copy()
df_silo = remove_prefix(df_silo, "siloDailySnapshots_")
df_silo = df_silo.loc[df_silo.season < 6074] # TODO: fix once the subgraph contains all historical data
df_silo = df_silo.rename(columns={"dailyBeanMints": "silo_emissions_daily"})
# process pre-replant silo data (downloaded from dune)
df_silo_old = silo_emissions_pre_replant()
df_silo_old = df_silo_old.rename(columns={"newSilo": "silo_emissions_daily"})
# Combine pre and post replant data (no seasons in common so outer join)
df_silo = df_silo.merge(df_silo_old, how="outer")
assert set(df_silo.columns) == set(['season', 'silo_emissions_daily'])
df_silo = df_silo.sort_values("season")
df_silo = df_silo.groupby('season').agg({
    # handles edge case for season 6074 which occurred multiple times 
    "silo_emissions_daily": "sum", 
}).reset_index()
df_silo['silo_emissions_daily'] /= 10**6
df_silo['silo_emissions_cumulative'] = df_silo.silo_emissions_daily.cumsum()
validate_season_series(df_silo, allow_missing=True)

In [12]:
@cache 
def query_seasons(refresh=None) -> pd.DataFrame: 
    seasons = bs.Query.seasons(
        first=10000, orderBy="season", orderDirection="asc"
    )
    bs.Season.bean_supply = bs.Season.beans / 1e6
    df = sg.query_df([
        seasons.season, 
        seasons.timestamp, 
        seasons.bean_supply, 
    ], pagination_strategy=ShallowStrategy)
    df = remove_prefix(df, 'seasons_')
    return df 

In [13]:
df_szns = query_seasons(refresh=1)
df_szns['timestamp'] = pd.to_datetime(df_szns.timestamp, unit='s')
validate_season_series(df_szns, allow_missing=False)
df_szns = df_szns.loc[df_szns.season >= 2] # timestamps are wrong for season 0 and 1 
df_szns.head()

Unnamed: 0,season,timestamp,bean_supply
2,2,2021-08-07 00:06:08,2078.821989
3,3,2021-08-07 01:07:38,4089.294648
4,4,2021-08-07 02:09:28,6085.876897
5,5,2021-08-07 03:07:35,8108.40849
6,6,2021-08-07 04:11:23,10087.230479


In [14]:
# data pre-processing 
df = df_szns.merge(
    df_barn, how='left', on='season'
).merge(
    df_field, how='left', on='season'
).merge(
    df_silo, how='left', on='season'
)
assert len(df) == len(df_szns)
df = df.rename(columns={
    # credit components 
    'sprouts_rinsable': 'fertilized beans', 
    'pods_harvestable_cumulative': 'pods harvestable', 
    'silo_emissions_cumulative': 'silo emissions', 
    # debt components 
    'sprouts': 'unfertilized beans', 
    'pods_issued_cumulative': 'pods issued', 
    # overall 
    'total_debt': 'total debt', 
    'total_credit': 'total credit', 
    'debt_credit_ratio': 'debt credit ratio', 
    'fertilizer_adjusted_pod_rate': 'fertilizer adjusted pod rate', 
})
df = df.ffill().fillna(0) # Not technically correct but close enough 
df['total debt'] = (
    df['pods issued'] + df['unfertilized beans']
) 
df['total credit'] = (
    df['fertilized beans'] + df['silo emissions'] + df['pods harvestable']
)
df['debt credit ratio'] = df['total debt'] / df['total credit'] 
df['fertilizer adjusted pod rate'] = df['total debt'] / df['bean_supply'] 
metrics_credit = [
    'silo emissions',
    'pods harvestable',
    'fertilized beans', 
]
metrics_debt = [
    'unfertilized beans', 
    'pods issued', 
]
metrics_credit_debt_aggregate = [
    'total debt', 
    'total credit', 
]
metrics_meta = [
    'debt credit ratio', 
    'fertilizer adjusted pod rate', 
]
metrics = metrics_credit + metrics_debt + metrics_credit_debt_aggregate + metrics_meta
columns = ['timestamp'] + metrics 
df = df[columns]
df = df.resample("W", on="timestamp").last().drop(columns="timestamp").reset_index()
# df_mask = df['silo emissions'].isna()
timestamp_min = df.timestamp.values[0]
# timestamp_exploit = df[df_mask].timestamp.values[0]
# timestamp_replant = df[df_mask].timestamp.values[-1]
df = df.dropna()
source = df.melt(
    id_vars=['timestamp'], 
    value_vars=metrics, 
).sort_values(["timestamp", "variable"]).reset_index(drop=True)
print(len(source))
source.head(10)

423


Unnamed: 0,timestamp,variable,value
0,2021-08-08,debt credit ratio,0.495642
1,2021-08-08,fertilized beans,0.0
2,2021-08-08,fertilizer adjusted pod rate,0.155825
3,2021-08-08,pods harvestable,15863.528585
4,2021-08-08,pods issued,18366.370137
5,2021-08-08,silo emissions,21192.194431
6,2021-08-08,total credit,37055.723016
7,2021-08-08,total debt,18366.370137
8,2021-08-08,unfertilized beans,0.0
9,2021-08-15,debt credit ratio,0.230439


### Chart Todo's 

- Add pod rate as a metric 


In [15]:
# alt.data_transformers.disable_max_rows()

dropdown = alt.binding_select(
    options=['ymd', 'ym'], labels=["weekly", "monthly"], name='aggregation level:')
selection = alt.selection_single(
    name="agglevel", fields=['AggLevel'], bind=dropdown, init={"AggLevel": 'ymd'}
)
selection_rule = alt.selection_single(
    fields=['tstamp'], nearest=True, on='mouseover', empty='none', clear='mouseout'
)
colors = {
    # credit components 
    'fertilized beans': '#57cc99', # green   
    'pods harvestable': '#38a3a5', # mid blue 
    'silo emissions': '#22577a', # navy blue 
    # debt components 
    'unfertilized beans': "#ef9b20", # Magenta 50
    'pods issued': '#fa4d56', # Red 50
    # overall 
    'total debt': '#9f1853', # Magenta 70 
    'total credit': '#80ed99', # mint green 
    'debt credit ratio': '#ffc300', # gold 
    'fertilizer adjusted pod rate': '#5e60ce' # purple-ish
}
format_decimal = ",d"
format_percent = ".2%"
tooltip_formats = {
    'fertilized beans':  format_decimal,
    'unfertilized beans':  format_decimal,
    'pods harvestable':  format_decimal,
    'silo emissions':  format_decimal,
    'pods issued':  format_decimal,
    'total debt':  format_decimal,
    'total credit':  format_decimal,
    'debt credit ratio': format_percent, 
    'fertilizer adjusted pod rate': format_percent,
}
assert set(colors.keys()) == set(metrics)
assert set(tooltip_formats.keys()) == set(metrics)

base = (
    alt.Chart(source)
    .properties(height=225, width=500)
    .transform_timeunit(
        ymd="yearmonthdate(timestamp)", 
        ym="yearmonth(timestamp)", 
    )
    .transform_calculate(
        tstamp="datum[agglevel.AggLevel]", 
    )
    .transform_aggregate(
        groupby=["tstamp", 'variable'], rvalue='max(value)'
    )
    .transform_calculate(
        # creates numeric stack order key encoding both x position and order of stacked area labels into single value 
        stack_order=f'time(datum.tstamp) + ({stack_order_expr("variable", list(reversed(metrics)))})'
    )
    .encode(
        x=alt.X(
            "tstamp:O", 
            axis=alt.Axis(
                formatType="time", 
                ticks=False, 
                labelExpr="timeFormat(toDate(datum.value), '%b %Y')", 
                labelOverlap=True, 
                labelSeparation=25, 
                labelPadding=5, 
                title="Date", 
                labelAngle=0, 
            ), 
        ),   
    )
)
base_bdv = (
    base
    .encode(
        y=alt.Y("rvalue:Q", axis=alt.Axis(title="BDV", format=".3~s", labelExpr="replace(datum.label, 'G', 'B')")),
        color=alt.Color(
            "variable:N", 
            scale=alt.Scale(
                domain=metrics_credit + metrics_debt + metrics_credit_debt_aggregate, 
                range=[colors[m] for m in metrics_credit + metrics_debt + metrics_credit_debt_aggregate]
            ),
            legend=alt.Legend(title=None)
        ),
        order=alt.Order('stack_order:Q', sort='ascending')
    )
)
base_ratio = base.encode(
    y=alt.Y("rvalue:Q", axis=alt.Axis(title="Percent", format=",%")),
    color=alt.Color(
        "variable:N", 
        scale=alt.Scale(
            domain=metrics_meta, range=[colors[m] for m in metrics_meta]
        ),
        legend=alt.Legend(title=None)
    ),
)
rule_exploit = (
    # selection captures nearest timestamp (for current mouse position) 
    # tooltip rendered uses this data point (pivoted, so we have all data for this timestamp) 
    base
    .transform_pivot('variable', value='rvalue', groupby=['tstamp'])
    .transform_filter("year(datum.tstamp) === 2022 && month(datum.tstamp) === 3 && date(datum.tstamp) === 17")
    .mark_rule(opacity=1, color='#474440', strokeDash=[2.5,1])
)
rule = (
    # selection captures nearest timestamp (for current mouse position) 
    # tooltip rendered uses this data point (pivoted, so we have all data for this timestamp) 
    base
    .transform_pivot('variable', value='rvalue', groupby=['tstamp'])
    .mark_rule(opacity=0)
    .encode(
        tooltip=(
            [alt.Tooltip('tstamp:O', timeUnit="yearmonthdate", title="date")] + 
            [
                alt.Tooltip(
                    f'{m}:Q', 
                    format=tooltip_formats[m], 
                    title=m.replace("_", " ").replace(" cumulative", "")
                ) for m in metrics
            ] 
        ) 
    )
    .add_selection(selection_rule)
)
credit = (
    base_bdv
    .mark_bar()
    .transform_filter(condition_union("==", "|", metrics_credit))
)
debt = (
    base_bdv
    .mark_bar()
    .transform_filter(condition_union("==", "|", metrics_debt))
) 
lines_debt_credit = (
    base_bdv
    .mark_line()
    .transform_filter(condition_union('==', '|', metrics_credit_debt_aggregate))
) 
line_ratio = (
    base_ratio
    .mark_line()
    .transform_filter(condition_union('==', '|', metrics_meta))
)
point_ratio = (
    base_ratio
    .mark_point(size=7)
    .transform_filter(condition_union('==', '|', metrics_meta))
)

c = (
    alt
    .vconcat(
        alt.layer(debt, credit, lines_debt_credit, rule, rule_exploit).properties(title="Beanstalk Credit Profile"),
        alt.layer(line_ratio, point_ratio, rule, rule_exploit).properties(title="Beanstalk Credit Metrics"),
    )
    .resolve_legend(color="independent")
    .resolve_axis(y="independent")
    .resolve_scale(y="independent", color="independent")
    .add_selection(selection) 
)


css_lines = [
    "div.chart-wrapper { display: flex; flex-direction: column; }", 
    "form.vega-bindings { display: block; order: -1; }", 
    "canvas { order: 1 }", 
    """
    div.vega-bind { 
        display: inline-block; 
        padding: 5px; 
    }
    span.vega-bind-name { 
        font-weight: 500 !important; 
        padding-right: 5px !important; 
    }
    span.vega-bind-name span { 
        font-weight: 600 !important; 
        padding-right: 5px !important; 
    }
    div.vega-bind select { 
        border: .5px solid #000000;
        border-radius: 3px;
    }
    """,
    "#vg-tooltip-element tr:nth-child(1) { font-weight: bold }", 
    "#vg-tooltip-element tr:nth-child(1) td:first-child { opacity: 0 }\n", 
]
for i, m in enumerate(metrics): 
    # i+2 bc css uses one based indexing and we also don't color timestamp (first element in tooltip)
    css_lines.append(
        "#vg-tooltip-element tr:nth-child(%d) td:first-child { color: %s }\n" % (i+2, colors[m])
    )
css = "\n".join(css_lines)
    
apply_css("")
# apply_css(css) 
c

In [16]:
output_chart(c, css=css)

<IPython.core.display.JSON object>