In [1]:
import os 
import sys 
import re 
import logging 
import json 

logging.basicConfig(level=logging.INFO)

cur_path = os.path.abspath("../../..")
if cur_path not in sys.path: 
    sys.path.append(cur_path)

import numpy as np 
import pandas as pd 
import altair as alt 
from deepdiff import DeepSearch
from altair import datum
from IPython.display import JSON

os.environ['SUBGRAPH_URL'] = 'https://api.thegraph.com/subgraphs/name/cujowolf/beanstalk'

from utils_notebook.utils import remove_prefix, ddf
from utils_notebook.vega import output_chart

In [2]:
from deepdiff import grep

from pathlib import Path 

paths = {p for p in Path("schemas").iterdir() if str(p).endswith(".json")}
schemas = {}
for p in paths: 
    with p.open("r") as f: 
        schemas[p.stem] = json.loads(f.read())

In [4]:
from utils_notebook.vega import compute_width_paths

schema = schemas['silo']
compute_width_paths(schema)
#     m_radius = m_radius.union(set(m.keys()))

[{'path': ['root', 'config', 'view', 'continuousWidth'], 'factor': 1},
 {'path': ['root', 'mark', 'innerRadius'], 'factor': 0.5},
 {'path': ['root', 'mark', 'outerRadius'], 'factor': 0.5}]

In [102]:
# # Postprocessing pipeline to run on all production schemas to ensure they adhere to our constraints 

# # 1. Disallow faceting 
# # ---------------------------------------------------------------------

# # 1.1. Disallow facet operator 
# # https://vega.github.io/vega-lite/docs/facet.html#facet-operator
# """
# Disallow all objects of the form 
# {
#   "facet": any,
#   "spec": any 
# }
# """
# def has_facet_operator(spec): 
#     if isinstance(spec, dict): 
#         if "spec" in spec and "facet" in spec: 
#             return True 
#         return any(has_facet_operator(v) for v in spec.values()) 
#     return False 
   
# facet_op_spec = {
#   "data": {"url": "data/cars.json"},
#   "facet": {"row": {"field": "Origin"}},
#   "spec": {
#     "mark": "bar",
#     "encoding": {
#       "x": {
#         "bin": {"maxbins": 15},
#         "field": "Horsepower",
#         "type": "quantitative"
#       },
#       "y": {"aggregate": "count", "type": "quantitative"}
#     }
#   }
# }

# assert has_facet_operator(facet_op_spec)

# # 1.2. Disallow facet encoding channels 
# # https://vega.github.io/vega-lite/docs/facet.html#facet-row-and-column-encoding-channels
# """
# Disallow all objects of the form 
# {
#   "encoding": {
#     // contains key "row" OR "col" OR "facet" 
#   }
# }
# """
# def has_facet_encoding(spec): 
#     if isinstance(spec, dict): 
#         for k, v in spec.items(): 
#             if k == "encoding" and isinstance(v, dict):
#                 if {"row", "column", "facet"}.intersection(set(v.keys())):
#                     return True 
#         return any(has_facet_encoding(v) for v in spec.values())                       
#     return False 
   
# facet_encoding_spec = {
#   "name": "trellis_barley",
#   "data": {"url": "data/barley.json"},
#   "mark": "point",
#   "height": {"step": 12},
#   "encoding": {
#     "facet": {
#       "field": "site",
#       "type": "ordinal",
#       "columns": 2,
#       "sort": {"op": "median", "field": "yield"}
#     },
#     "x": {
#       "aggregate": "median",
#       "field": "yield",
#       "type": "quantitative",
#       "scale": {"zero": False}
#     },
#     "y": {
#       "field": "variety",
#       "type": "ordinal",
#       "sort": "-x"
#     },
#     "color": {"field": "year", "type": "nominal"}
#   }
# }

# assert has_facet_encoding(facet_encoding_spec)

# # 2. Disallow repeat  
# # ---------------------------------------------------------------------

# # 2.1. Disallow repeat operator 
# # https://vega.github.io/vega-lite/docs/repeat.html#repeat-mapping
# """
# Disallow all objects of the form 
# {
#   "repeat": any,
#   "spec": any 
# }
# """
# def has_repeat_operator(spec): 
#     if isinstance(spec, dict): 
#         if "spec" in spec and "repeat" in spec: 
#             return True 
#         return any(has_repeat_operator(v) for v in spec.values()) 
#     return False 

# repeat_spec = {
#   "data": {
#     "url": "data/weather.csv"
#   },
#   "repeat": [
#     "temp_max",
#     "precipitation",
#     "wind"
#   ],
#   "spec": {
#     "mark": "line",
#     "encoding": {
#       "x": {"field": "date", "timeUnit": "month"},
#       "y": {
#         "field": {"repeat": "repeat"},
#         "aggregate": "mean"
#       },
#       "color": {"field": "location"}
#     }
#   }
# }

# assert has_repeat_operator(repeat_spec)

In [103]:
JSON(schemas['fertilizerbreakdown'])

<IPython.core.display.JSON object>

In [176]:
# def validate_schema(schema):
#     assert not has_facet_operator(schema)
#     assert not has_facet_encoding(schema)
#     assert not has_repeat_operator(schema)
    
    
"""
Schema Validation and Sizing Update Path Determination. 
--------------------------------------------------------------------------------

For each schema in our production notebook set, we want to do the following 

1. Ensure that schema does not use facet / repeat (operators + encodings).  
- This is done because the sizing for these kinds of chart layouts is quite 
  complex and it would be very difficult to support dynamic resizing of 
  charts when using these layout modes. 
  
2. Ensure that all sub-views within the schema have a defined width.
- This is not a hard requirement of vega-lite but a requirement we impose 
  to support dynamic resizing of charts on the frontend. 


2.a. Ensure that 
2.b. Idenfity the paths the client must modify in order to change the chart size 
     dynamically on the frontend. This is a subset of all width defining paths. 
"""

# def try_match_width(spec, raise_error=False): 
#     match spec: 
#         case {"width": {"step": value}}: 
#             try: 
#                 float(value) 
#             except: 
#                 raise ValueError("Width step must be a number") 
#             return ['width', 'step']
#         case {"width": value}: 
#             try: 
#                 float(value) 
#             except: 
#                 raise ValueError("Width must be a number") 
#             return ['width']
#     if raise_error: 
#         raise ValueError("Did not find width") 
#     return None 


# def get_width_paths(spec, cur_path=[]):
#     """For a given schema, returns all paths to internal 
#     widths that must be updated on the frontend to support 
#     dynamic resizing. 
    
#     In the process of gathering these paths, we validate that 
#     all sub-views within spec define a width.
    
#     If current spec level specifies hconcat or vconcat:
#     - All children must define a width. 
#     If current spec level specifies layer:
#     - One of the children must define a width. 
#         - If multiple do, only first one is relevant. 
        
#     Returns: 
#         width: width of the current spec (if non-layout) 
#     """
#     width_paths = []
#     if "layer" in spec: 
#         # This is a layer chart 
#         # All children of layer must be single view charts (vega-lite constraint). 
#         # Since this is true, there is no need to recurse on children.
        
#         # Option 1: Set width directly on the layer chart 
#         local_wpath = try_match_width(spec) 
#         if local_wpath: 
#             width_paths.append(cur_path + local_wpath)
#         else: 
#             success = False 
#             for i, layer in enumerate(spec['layer']): 
#                 local_wpath = try_match_width(spec) 
#                 if local_wpath: 
#                     success = True
#                     width_paths.append(cur_path + ['layer', i] + local_wpath)
#             if not success: 
#                 raise ValueError(
#                     f"Spec at path {cur_path} was layer chart but didn't "
#                     "set top level width or have at least one child set width." 
#                 )
#     if "hconcat" in spec:
#         # All children must define width 
#         for i, spec_col in enumerate(spec['hconcat']): 
#             width_paths = (
#                 width_paths + 
#                 get_width_paths(spec_col, cur_path + ['hconcat', i])
#             )
#     elif "vconcat" in spec: 
#         # All children must define width 
#         for i, spec_col in enumerate(spec['vconcat']): 
#             width_paths = (
#                 width_paths + 
#                 get_width_paths(spec_col, cur_path + ['vconcat', i])
#             )
#     else: 
#         # This is a singular view 
#         try_match_width(spec, raise_error=True) 
#     return width_paths 

from deepdiff import DeepSearch
from deepdiff.path import _path_to_elements


def is_number(v): 
    return type(v) == int or type(v) == float 
    
    
for sn, schema in schemas.items(): 
    validate_schema(schema)
    # determine all edit paths 
    # ----------------------------------------------------------------------------
    # 1. Top level config 
    # https://vega.github.io/vega-lite/docs/spec.html#config
    
    # 1.1. Match top level config "continuousWidth". Value is always a number.
    m_view_cw = DeepSearch(schema, 'continuousWidth', verbose_level=2).get('matched_paths', {})
    m_view_cw = set(m_view_cw).intersection(set([
        "root['config']['view']['continuousWidth']"
    ]))
    
    # 1.2. Match top level config "discreteWidth". Value is either number or object 
    m_view_dw = DeepSearch(schema, 'discreteWidth', verbose_level=2).get('matched_paths', {})
    m_view_dw = set(m_view_dw).intersection(set([
        "root['config']['view']['discreteWidth']", 
        "root['config']['view']['discreteWidth']['step']",
    ]))
    if len(m_view_cw) == 2: 
        # One of the paths is a prefix of the other
        m_view_dw.remove("root['config']['view']['discreteWidth']")
        
    # 1.3 Match top level config "step"  
    m_view_step = DeepSearch(schema, 'step', verbose_level=2).get('matched_paths', {})
    m_view_step = set(m_view_step).intersection(set([
        "root['config']['view']['step']"
    ]))
    # ----------------------------------------------------------------------------
    # 2. View level config 
    
    # 2.1. Match view level "width" mapping to a number  
    m_w = DeepSearch(schema, 'width', verbose_level=2).get('matched_paths', {})
    keep_keys = set()
    for p, v in m_w.items(): 
        if p.endswith("['width']"): 
            assert v != "container" 
            assert is_number(v) 
            keep_keys.add(p) 
    m_w = {p for p, v in m_w.items() if p in keep_keys} 
    
    # 2.2. Match view level "width" mapping to an object  
    m_w_step = DeepSearch(schema, 'step', verbose_level=2).get('matched_paths', {})
    keep_keys = set()
    for p, v in m_w_step.items(): 
        if p.endswith("['width']['step']"): 
            assert is_number(v) 
            keep_keys.add(p) 
    m_w_step = {p for p, v in m_w_step.items() if p in keep_keys} 
    
    # ----------------------------------------------------------------------------
    # 3. Disallow certain types of autosize 
    if schema.get("autosize", {}).get("type", None) in ['fit', 'fit-x', 'fit-y']: 
        raise ValueError("autosize fit not allowed")
    
    # ----------------------------------------------------------------------------
    # 4. Combine all different path types together. Convert from string form 
    #    to list of attribute accesses. 
    wpaths = [
        [list(e)[0] for e in _path_to_elements(wps)]
        for wps in (
            m_view_cw
            .union(m_view_dw)
            .union(m_view_step)
            .union(m_w) 
            .union(m_w_step)
        )
    ]
    print(sn)
    print(wpaths)

fertilizerbreakdown
[['root', 'config', 'view', 'continuousWidth']]
silo
[['root', 'config', 'view', 'continuousWidth']]
beanstalkcreditworthiness
[['root', 'config', 'view', 'continuousWidth'], ['root', 'vconcat', 0, 'layer', 1, 'width'], ['root', 'vconcat', 0, 'layer', 0, 'width'], ['root', 'vconcat', 0, 'layer', 3, 'width'], ['root', 'vconcat', 0, 'layer', 2, 'width'], ['root', 'vconcat', 1, 'width'], ['root', 'vconcat', 2, 'width']]
farmersmarkethistory
[['root', 'vconcat', 1, 'layer', 2, 'width'], ['root', 'config', 'view', 'continuousWidth'], ['root', 'vconcat', 1, 'layer', 3, 'width'], ['root', 'vconcat', 0, 'width'], ['root', 'vconcat', 1, 'layer', 1, 'width'], ['root', 'vconcat', 1, 'layer', 0, 'width'], ['root', 'vconcat', 2, 'width']]
fieldoverview
[['root', 'config', 'view', 'continuousWidth'], ['root', 'layer', 0, 'width'], ['root', 'layer', 1, 'width']]


In [167]:
s = {
    "data": {"url": "data/population.json"},
    "transform": [{
        "filter": "datum.year == 2000"
    }],
    "mark": "bar",
    "encoding": {
        "x": {
          "aggregate": "sum", "field": "people", "title": "population"
        }
    },
    "config": {
        "view": {
            "step": 10, 
            "discreteWidth": {"step": 5}
        }
    }
}

mc1 = s | grep('continuousWidth') 
mcw = s | grep('discreteWidth') 
print(mc1) 
print(mcw)

{}
{'matched_paths': ["root['config']['view']['discreteWidth']", "root['config']['view']['discreteWidth']['step']"]}


In [114]:
import altair as alt
from altair.expr import datum

# from vega_datasets import data
stocks = data.stocks.url


base = alt.Chart(stocks).encode(
    x='date:T',
    y='price:Q',
    color='symbol:N'
).transform_filter(
    datum.symbol == 'GOOG'
)

# base.mark_line() + base.mark_point()

c = alt.hconcat(
    alt.vconcat(
        base.mark_line(),
        base.mark_point().properties(width=600),
    ), 
    base.mark_area().properties()
) 


# alt.layer(
#     base.mark_line(), 
#     base.mark_point(),
#     base.mark_point().properties(width=200),
# ).properties(width=500)
c