In [4]:
import os 
import sys 
import json 
import logging 
from pathlib import Path 
from functools import cache
from itertools import product

# Required when developing in a jupyter-notebook environment 
cur_path = os.path.abspath("../..")
if cur_path not in sys.path: 
    sys.path.append(cur_path)

import numpy as np 
import pandas as pd 
import altair as alt 
from altair import datum
from palettable.scientific.sequential import Batlow_5
from palettable.tableau import Tableau_10
from dotenv import load_dotenv
from subgrounds.subgrounds import Subgrounds, Subgraph
from subgrounds.subgraph import SyntheticField
from subgrounds.pagination import ShallowStrategy

# Required when developing in a jupyter-notebook environment 
load_dotenv('../../../../.env')

print(os.environ['SUBGRAPH_URL'])
# logging.basicConfig(level=logging.DEBUG)

from utils_notebook.utils import ddf, remove_prefix, load_subgraph, remove_keys
from utils_notebook.vega import (
    output_chart, 
    apply_css, 
    wide_to_longwide, 
    chart, 
)
from utils_notebook.testing import validate_season_series
from utils_notebook.constants import ADDR_BEANSTALK
from utils_notebook.queries import QueryManager
from utils_notebook.css import css_tooltip_timeseries_multi_colored

https://api.thegraph.com/subgraphs/name/cujowolf/beanstalk


In [5]:
sg, bs = load_subgraph()
q = QueryManager(sg, bs) 

In [6]:
plots = bs.Query.plots(first=100000)
df_plots = sg.query_df([
    plots.pods, 
    plots.farmer.id
])
df_plots = remove_prefix(df_plots, "plots_")

In [7]:
df_plots.pods /= 1e6 
df_plots = df_plots.rename(columns={"farmer_id": "address"})
df_plots.head()

Unnamed: 0,pods,address
0,4828.325653,0x61e413de4a40b8d03ca2f18026980e885ae2b345
1,1007.244059,0xacc53f19851ce116d52b730aee8772f7bd568821
2,29922.479411,0xb66924a7a23e22a87ac555c950019385a3438951
3,9549.919578,0xd3c1e750b5664170f4af828145295b678bafd460
4,77102.185528,0xa33be425a086db8899c33a357d5ad53ca3a6046e


In [44]:
from functools import partial 

breakpoints = [
    1e4, 5e4, 1e5, 2.5e5, 5e5, 1e6, 5e6, 1e7, 2e7
]

def classify(order, pods):
    for i in range(len(breakpoints)):
        if i == 0: 
            b0 = 1
        else: 
            b0 = breakpoints[i-1]
        if i < len(breakpoints) - 1:
            b1 = breakpoints[i]
        else: 
            b1 = float('inf')
        if b0 < pods <= b1: 
            if not order: 
                if b1 == float('inf'): 
                    return f"{int(b0):,}+ pods"
                else: 
                    return f"{int(b0):,} - {int(b1):,} pods"
            else: 
                return i
            
df_class_order = pd.DataFrame(data=[
    {'class': classify(False, breakpoints[i]), 'order': i}   
    for i in range(len(breakpoints))
]) 
df = df_plots.groupby(by="address").sum().reset_index()
df['class'] = df.pods.apply(partial(classify, False))
df['order'] = df.pods.apply(partial(classify, True))
df = df.sort_values('order').reset_index(drop=True)
df = df.dropna(subset="class")
df.tail()

Unnamed: 0,address,pods,class,order
1735,0x9a00beffa3fc064104b71f6b7ea93babdc44d9da,36798500.0,"10,000,000+ pods",8.0
1736,0x8d06ffb1500343975571cc0240152c413d803778,11576440.0,"10,000,000+ pods",8.0
1737,0x4a24e54a090b0fa060f7faaf561510775d314e84,10517320.0,"10,000,000+ pods",8.0
1738,0x9f791ae2160f4ec6d9c8986c40da9b99c3a5f2fe,20734650.0,"10,000,000+ pods",8.0
1739,0xd79e92124a020410c238b23fb93c95b2922d0b9e,18790580.0,"10,000,000+ pods",8.0


In [45]:
# Get the count of pod holders by classification 
df_count_class = (
    df[['class', 'pods']]
    .groupby('class').count()
    .reset_index()
    .merge(df_class_order, how="left", on="class")
    .rename(columns={"pods": "count"})
)
# Sum value held by each class of holders 
df_class_value = (
    df[['class', 'pods']]
    .groupby('class').sum()
    .reset_index()
    .merge(df_class_order, how="left", on="class")
)
color_domain = list(sorted(df_count_class['class'].unique()))
color_range = [Tableau_10.hex_colors[i] for i in range(len(color_domain))]
df_count_class.head()

Unnamed: 0,class,count,order
0,"1 - 10,000 pods",455,0
1,"1,000,000 - 5,000,000 pods",112,6
2,"10,000 - 50,000 pods",458,1
3,"10,000,000+ pods",15,8
4,"100,000 - 250,000 pods",237,3


In [48]:
width = 500 
x = alt.X("class:O", sort=alt.SortField("order"))
color = alt.Color(
    "class:O", 
    legend=None, 
    scale=alt.Scale(domain=color_domain, range=color_range)
)

# Chart count class 
base_count_class = (
    alt.Chart(df_count_class, width=width)
    .mark_bar()
    .encode(x=x, y=alt.Y("count:Q"))
)
chart_count_class_histogram = (
    base_count_class
    .encode(color=color)
    .mark_bar()
)
chart_count_class_text = (
    base_count_class
    .encode(text=alt.Text("count:Q", format=",d"))
    .mark_text(color='black', dy=-10)
)

# Chart class value 
base_class_value = (
    alt.Chart(df_class_value, width=width)
    .mark_bar()
    .encode(x=x, y=alt.Y("pods:Q"),)
)
chart_class_value_histogram = (
    base_class_value
    .encode(color=color)
    .mark_bar()
)
chart_class_value_text = (
    base_class_value
    .encode(text=alt.Text("pods:Q", format=".3s"))
    .mark_text(color='black', dy=-10)
)

c = (
    (chart_count_class_histogram + chart_count_class_text) | 
    (chart_class_value_histogram + chart_class_value_text)
)
c

In [15]:
# TODO: update css for this chart 
# output_chart(c, css=css)

<IPython.core.display.JSON object>