In [1]:
import os 
import sys 
import json 
import logging 
from pathlib import Path 
from functools import cache
from itertools import product

# Required when developing in a jupyter-notebook environment 
cur_path = os.path.abspath("../..")
if cur_path not in sys.path: 
    sys.path.append(cur_path)

import numpy as np 
import pandas as pd 
import altair as alt 
from altair import datum
from palettable.scientific.sequential import Batlow_5
from palettable.tableau import Tableau_10
from dotenv import load_dotenv
from subgrounds.subgrounds import Subgrounds, Subgraph
from subgrounds.subgraph import SyntheticField
from subgrounds.pagination import ShallowStrategy

# Required when developing in a jupyter-notebook environment 
load_dotenv('../../../../.env')

print(os.environ['SUBGRAPH_URL'])
# logging.basicConfig(level=logging.DEBUG)

from utils_notebook.utils import ddf, remove_prefix, load_subgraph, remove_keys
from utils_notebook.vega import (
    output_chart, 
    apply_css, 
    wide_to_longwide, 
    chart, 
)
from utils_notebook.testing import validate_season_series
from utils_notebook.constants import ADDR_BEANSTALK
from utils_notebook.queries import QueryManager
from utils_notebook.css import css_tooltip_timeseries_multi_colored

https://api.thegraph.com/subgraphs/name/cujowolf/beanstalk


In [2]:
sg, bs = load_subgraph()
q = QueryManager(sg, bs) 

In [3]:
plots = bs.Query.plots(first=100000)
df_plots = sg.query_df([
    plots.pods, 
    plots.farmer.id
])
df_plots = remove_prefix(df_plots, "plots_")

In [4]:
df_plots.pods /= 1e6 
df_plots = df_plots.rename(columns={"farmer_id": "address"})
df_plots.head()

Unnamed: 0,pods,address
0,4828.325653,0x61e413de4a40b8d03ca2f18026980e885ae2b345
1,1007.244059,0xacc53f19851ce116d52b730aee8772f7bd568821
2,29922.479411,0xb66924a7a23e22a87ac555c950019385a3438951
3,9549.919578,0xd3c1e750b5664170f4af828145295b678bafd460
4,77102.185528,0xa33be425a086db8899c33a357d5ad53ca3a6046e


In [18]:
from functools import partial 

breakpoints = [
    1, 1e4, 5e4, 1e5, 2.5e5, 5e5, 1e6, 5e6, 1e7, 2e7, float("inf") 
]

def classify(order, pods):
    for i in range(1, len(breakpoints)):
        b0 = breakpoints[i-1] 
        b1 = breakpoints[i]
        if b0 <= pods < b1: 
            if order: 
                return i 
            else: 
                if b1 == float('inf'): 
                    return f"{int(b0):,}+ pods"
                else: 
                    return f"{int(b0):,} - {int(b1):,} pods"
            
df_class_order = pd.DataFrame(data=[
    {'class': classify(False, breakpoints[i]), 'order': i}   
    for i in range(len(breakpoints))
]) 
df = df_plots.groupby(by="address").sum().reset_index()
df['class'] = df.pods.apply(partial(classify, False))
df['order'] = df.pods.apply(partial(classify, True))
df = df.sort_values('pods').reset_index(drop=True)
df = df.dropna(subset="class")
df.tail()

Unnamed: 0,address,pods,class,order
1764,0x735cab9b02fd153174763958ffb4e0a971dd7f29,18301950.0,"10,000,000 - 20,000,000 pods",9.0
1765,0xd79e92124a020410c238b23fb93c95b2922d0b9e,18790580.0,"10,000,000 - 20,000,000 pods",9.0
1766,0x9f791ae2160f4ec6d9c8986c40da9b99c3a5f2fe,20734650.0,"20,000,000+ pods",10.0
1767,0x87c9e571ae1657b19030eee27506c5d7e66ac29e,23227070.0,"20,000,000+ pods",10.0
1768,0x9a00beffa3fc064104b71f6b7ea93babdc44d9da,36798500.0,"20,000,000+ pods",10.0


In [19]:
# Get the count of pod holders by classification 
df_count_class = (
    df[['class', 'pods']]
    .groupby('class').count()
    .reset_index()
    .merge(df_class_order, how="left", on="class")
    .rename(columns={"pods": "count"})
)
# Sum value held by each class of holders 
df_class_value = (
    df[['class', 'pods']]
    .groupby('class').sum()
    .reset_index()
    .merge(df_class_order, how="left", on="class")
)
color_domain = list(sorted(df_count_class['class'].unique()))
color_range = [Tableau_10.hex_colors[i] for i in range(len(color_domain))]
df_count_class.head()

Unnamed: 0,class,count,order
0,"1 - 10,000 pods",452,0
1,"1,000,000 - 5,000,000 pods",112,6
2,"10,000 - 50,000 pods",460,1
3,"10,000,000 - 20,000,000 pods",12,8
4,"100,000 - 250,000 pods",238,3


In [35]:
width = 500 
x = alt.X("class:O", sort=alt.SortField("order"), axis=alt.Axis(title="Classification"))
color = alt.Color(
    "class:O", 
    legend=None, 
    scale=alt.Scale(domain=color_domain, range=color_range)
)
selection = alt.selection_single(
    encodings=['x'], nearest=True, on='mouseover', empty='none', clear='mouseout'
)

# Chart count class 
base_count_class = (
    alt.Chart(df_count_class, width=width, title="Count of Holders by Pods Held Classification")
    .mark_bar()
    .encode(x=x, y=alt.Y("count:Q", axis=alt.Axis(title="Unique Addresses")))
)
chart_count_class_histogram = (
    base_count_class
    .encode(
        color=color, 
        stroke=alt.condition(selection, alt.value("black"), alt.value("white"))
    )
    .mark_bar()
)
chart_count_class_text = (
    base_count_class
    .encode(
        text=alt.Text("count:Q", format=",d"),
        stroke=alt.value("black"), 
        strokeWidth=alt.condition(selection, alt.value(.6), alt.value(0)),
    )
    .mark_text(color='black', dy=-10)
)

# Chart class value 
base_class_value = (
    alt.Chart(df_class_value, width=width, title="Cumulative Pods by Pods Held Classification")
    .mark_bar()
    .encode(x=x, y=alt.Y("pods:Q", axis=alt.Axis(title="Pods")),)
)
chart_class_value_histogram = (
    base_class_value
    .encode(
        color=color,
        stroke=alt.condition(selection, alt.value("black"), alt.value("white")),
    )
    .mark_bar()
)
chart_class_value_text = (
    base_class_value
    .encode(
        text=alt.Text("pods:Q", format=".3s"),
        stroke=alt.value("black"), 
        strokeWidth=alt.condition(selection, alt.value(.6), alt.value(0)),
    )
    .mark_text(color='black', dy=-10)
)

c = (
    alt.hconcat(
        alt.layer(chart_count_class_histogram, chart_count_class_text).add_selection(selection), 
        alt.layer(chart_class_value_histogram, chart_class_value_text).add_selection(selection), 
    )
)
c

In [36]:
output_chart(c)

<IPython.core.display.JSON object>