In [1]:
import os 
import sys 
from typing import List 

cur_path = os.path.abspath("../..")
if cur_path not in sys.path: 
    sys.path.append(cur_path)

from functools import cache 
import numpy as np 
import pandas as pd 
import altair as alt 
from IPython.display import clear_output
from altair import datum
from dotenv import load_dotenv
from subgrounds.subgrounds import Subgrounds, Subgraph
from subgrounds.pagination import ShallowStrategy

# Required when developing in a jupyter-notebook environment 
load_dotenv('../../../../.env')

from utils_notebook.utils import ddf, load_subgraph, remove_prefix
from utils_notebook.vega import (
    output_chart, 
    apply_css, 
    stack_order_expr, 
    wide_to_longwide, 
    chart_stack_area_overlay_line_timeseries,
)
from utils_notebook.queries import adjust_precision, QueryManager
from utils_notebook.testing import validate_season_series
from utils_notebook.css import css_tooltip_timeseries_multi_colored
from utils_notebook.vega import condition_union, XAXIS_DEFAULTS

In [2]:
sg: Subgrounds
bs: Subgraph
sg, bs = load_subgraph()

In [3]:
q = QueryManager(sg, bs) 

In [4]:
df = q.query_silo_daily_snapshots()
df.head()

Unnamed: 0,season,dailyBeanMints
0,3,31.65067
1,4,17.76026
2,5,43.709604
3,18,0.342173
4,21,676.195254


In [5]:
df_szns = q.query_seasons()
df_szns.head()

Unnamed: 0,season,timestamp
0,0,2021-08-06 22:06:08
1,1,2021-08-06 23:06:08
2,2,2021-08-07 00:06:08
3,3,2021-08-07 01:07:38
4,4,2021-08-07 02:09:28


In [6]:
df = df.merge(df_szns, how='left', on='season')
df = df.drop(columns=['season'])
df.head()

Unnamed: 0,dailyBeanMints,timestamp
0,31.65067,2021-08-07 01:07:38
1,17.76026,2021-08-07 02:09:28
2,43.709604,2021-08-07 03:07:35
3,0.342173,2021-08-07 16:16:35
4,676.195254,2021-08-07 19:12:47


In [7]:
df = df.resample("W", on="timestamp").sum().reset_index()
df['totalBeanMints'] = df.dailyBeanMints.cumsum()

In [19]:
df['one'] = df.dailyBeanMints * np.random.random(len(df))
df['two'] = df.totalBeanMints * np.random.random(len(df))
df.head()

Unnamed: 0,timestamp,dailyBeanMints,totalBeanMints,one,two
0,2021-08-08,21192.194431,21192.19,2586.106834,6024.416
1,2021-08-15,238628.922359,259821.1,162755.669001,209792.5
2,2021-08-22,278865.602534,538686.7,118691.348614,317548.3
3,2021-08-29,312848.021805,851534.7,37483.993248,562303.4
4,2021-09-05,329166.748664,1180701.0,245913.924186,1120886.0


In [20]:
def possibly_override(data = None, defaults = None, override = False):
    defaults = defaults or {}
    data = data or {} 
    # Mix by default, override optionally 
    return {**defaults, **data} if not override else data 


def chart(
    df: pd.DataFrame, 
    timestamp_col: str, 
    lmetrics: List[str], 
    rmetrics: List[str] = None, 
    lstrategy: str = 'line', 
    rstrategy: str = 'line', 
    title: str = '', 
    xaxis_kwargs = None, 
    xaxis_kwargs_override: bool = False, 
    yaxis_left_kwargs: dict = None, 
    yaxis_left_kwargs_override: bool = False, 
    yaxis_right_kwargs: dict = None, 
    yaxis_right_kwargs_override: bool = False, 
    color_map = None,      
    tooltip_formats = None, 
    dual_axes: bool = False, 
    show_exploit_rule: bool = True, 
    exploit_day: int = 17, # must be either 16 or 17
    width: int = 700, 
): 
    """Creates a chart with a shared time axis and up to two y axes 
        
    Assumes that data is in long-wide format (i.e. df was processed with function wide_to_longwide)
    """
    rmetrics = rmetrics or []
    assert not set(lmetrics).intersection(set(rmetrics)), "Same metric on two axes"
    metrics = lmetrics + rmetrics
    tooltip_formats = tooltip_formats or {}
    xaxis_kwargs = possibly_override(xaxis_kwargs, XAXIS_DEFAULTS, override=xaxis_kwargs_override)
    yaxis_left_kwargs = possibly_override(yaxis_left_kwargs, None, override=yaxis_left_kwargs_override)
    yaxis_right_kwargs = possibly_override(yaxis_right_kwargs, None, override=yaxis_right_kwargs_override)

    # Selection for nearest point 
    selection_nearest = alt.selection_single(
        fields=[timestamp_col], nearest=True, on='mouseover', empty='none', clear='mouseout'
    )

    # Color Scale 
    if color_map: 
        color_scale = alt.Scale(domain=metrics, range=[color_map[m] for m in metrics])
    else: 
        color_scale = alt.Scale(domain=metrics)
    
    base = (
        alt.Chart(df)
        .encode(x=alt.X(f"{timestamp_col}:O", axis=alt.Axis(**xaxis_kwargs)))
        .properties(title=title, width=width)
    )
        
    cbase = (
        base
        # Stack order matters when we are using an area chart 
        .transform_calculate(stack_order=stack_order_expr("variable", metrics))
        .encode(
            color=alt.Color("variable:N", scale=color_scale, legend=alt.Legend(title=None)), 
            order=alt.Order('stack_order:Q', sort='ascending'),
        )
    )

    class Strategies: 

        @staticmethod
        def line(base, axis):
            return (
                base 
                .mark_line()
                .encode(y=alt.Y("value:Q", axis=axis))
            )

        @staticmethod
        def stack_area(base, axis):
            return (
                base 
                .transform_calculate(sort_col=stack_order_expr("variable", metrics))
                .mark_area(point='transparent')
                .encode(y=alt.Y("value:Q", axis=axis)) 
            )
            
        @staticmethod
        def stack_bar(base, axis):
            return (
                base 
                .transform_calculate(sort_col=stack_order_expr("variable", metrics))
                .mark_bar()
                .encode(y=alt.Y("value:Q", axis=axis)) 
            )

    strategies = {
        "line": Strategies.line, 
        "stack_area": Strategies.stack_area, 
        "stack_bar": Strategies.stack_bar,
    }

    lstrat = strategies[lstrategy]
    left = lstrat(
        cbase.transform_filter(condition_union("==", "|", lmetrics)),
        alt.Axis(**yaxis_left_kwargs)
    ) 

    if rmetrics: 
        rstrat = strategies[rstrategy]
        right = rstrat(
            cbase.transform_filter(condition_union("==", "|", rmetrics)), 
            alt.Axis(**yaxis_right_kwargs), 
        )
    else: 
        right = None 

    nearest = (
        # selection captures nearest timestamp (for current mouse position) 
        # tooltip rendered uses this data point (pivoted, so we have all data for this timestamp) 
        base
        .transform_pivot('variable', value='value', groupby=[timestamp_col])
        .mark_rule(opacity=0, color="black")
        .encode(tooltip=(
            [alt.Tooltip(f'{timestamp_col}:O', timeUnit="yearmonthdate", title="date")] + 
            [alt.Tooltip(f'{m}:Q', format=tooltip_formats.get(m, ",d")) for m in metrics]
        ))
        .add_selection(selection_nearest)
    )

    assert exploit_day in [16, 17]
    rule_exploit = (
        # selection captures nearest timestamp (for current mouse position) 
        # tooltip rendered uses this data point (pivoted, so we have all data for this timestamp) 
        base
        .transform_pivot('variable', value='value', groupby=[timestamp_col])
        .transform_filter(f"""
            year(datum['{timestamp_col}']) === 2022 && 
            month(datum['{timestamp_col}']) === 3 && 
            date(datum['{timestamp_col}']) === {exploit_day} 
        """) # && warn(datetime(datum['{timestamp_col}']))
        .mark_rule(opacity=1, color='#474440', strokeDash=[2.5,1])
    )

    # Compose plot 
    if not rmetrics: 
        if show_exploit_rule: 
            c = left + rule_exploit + nearest
        else: 
            c = left + nearest
    else: 
        if show_exploit_rule: 
            # Parentheses are important in case where dual_axes is True 
            c = left + (right + rule_exploit + nearest)
        else: 
            # Parentheses are important in case where dual_axes is True 
            c = left + (right + nearest)
    if dual_axes: 
        assert rmetrics, "Can't have two axes if you didn't specify rmetrics" 
        c = (
            c
            .resolve_scale(y="independent")
            .resolve_axis(y="independent")
        )
    return c 

In [21]:
df.head()

Unnamed: 0,timestamp,dailyBeanMints,totalBeanMints,one,two
0,2021-08-08,21192.194431,21192.19,2586.106834,6024.416
1,2021-08-15,238628.922359,259821.1,162755.669001,209792.5
2,2021-08-22,278865.602534,538686.7,118691.348614,317548.3
3,2021-08-29,312848.021805,851534.7,37483.993248,562303.4
4,2021-09-05,329166.748664,1180701.0,245913.924186,1120886.0


In [23]:
from utils_notebook.vega import wide_to_longwide

sdf = wide_to_longwide(df, 'timestamp', ['timestamp'], ['dailyBeanMints', 'totalBeanMints', 'one', 'two'])
chart(
    sdf, 
    'timestamp', 
    ['dailyBeanMints'], 
    lstrategy='stack_bar', 
    title="Daily Silo Emissions", 
    width=350, 
) | chart(
    sdf, 
    'timestamp', 
    ['totalBeanMints'], 
    lstrategy='line', 
    title="Total Silo Emissions", 
    width=350, 
)

# css_lines = css_tooltip_timeseries_multi_colored(value_vars, colors) 
# css = "\n".join(css_lines)

# apply_css("")
# # apply_css(css)

# c

  for col_name, dtype in df.dtypes.iteritems():


In [12]:
# output_chart(c, css=css)