In [1]:
import os 
import sys 
import logging 
import json 

logging.basicConfig(level=logging.INFO)

cur_path = os.path.abspath("../../..")
if cur_path not in sys.path: 
    sys.path.append(cur_path)

import numpy as np 
import pandas as pd 
import altair as alt 
from altair import datum

os.environ['SUBGRAPH_URL'] = 'https://graph.node.bean.money/subgraphs/name/beanstalk-dev'

from utils_notebook.utils import remove_prefix, ddf
from utils_notebook.vega import output_chart

In [2]:
from deepdiff import grep

from pathlib import Path 

paths = {p for p in Path("schemas").iterdir()}
schemas = {}
for p in paths: 
    with p.open("r") as f: 
        schemas[p.stem] = json.loads(f.read())

In [15]:
# Postprocessing pipeline to run on all production schemas to ensure they adhere to our constraints 

# 1. Disallow faceting 
# ---------------------------------------------------------------------

# 1.1. Disallow facet operator 
# https://vega.github.io/vega-lite/docs/facet.html#facet-operator
"""
Disallow all objects of the form 
{
  "facet": any,
  "spec": any 
}
"""
def has_facet_operator(spec): 
    if isinstance(spec, dict): 
        if "spec" in spec and "facet" in spec: 
            return True 
        return any(has_facet_operator(v) for v in spec.values()) 
    return False 
   
facet_op_spec = {
  "data": {"url": "data/cars.json"},
  "facet": {"row": {"field": "Origin"}},
  "spec": {
    "mark": "bar",
    "encoding": {
      "x": {
        "bin": {"maxbins": 15},
        "field": "Horsepower",
        "type": "quantitative"
      },
      "y": {"aggregate": "count", "type": "quantitative"}
    }
  }
}

assert has_facet_operator(facet_op_spec)

# 1.2. Disallow facet encoding channels 
# https://vega.github.io/vega-lite/docs/facet.html#facet-row-and-column-encoding-channels
"""
Disallow all objects of the form 
{
  "encoding": {
    // contains key "row" OR "col" OR "facet" 
  }
}
"""
def has_facet_encoding(spec): 
    if isinstance(spec, dict): 
        for k, v in spec.items(): 
            if k == "encoding" and isinstance(v, dict):
                if {"row", "column", "facet"}.intersection(set(v.keys())):
                    return True 
        return any(has_facet_encoding(v) for v in spec.values())                       
    return False 
   
facet_encoding_spec = {
  "name": "trellis_barley",
  "data": {"url": "data/barley.json"},
  "mark": "point",
  "height": {"step": 12},
  "encoding": {
    "facet": {
      "field": "site",
      "type": "ordinal",
      "columns": 2,
      "sort": {"op": "median", "field": "yield"}
    },
    "x": {
      "aggregate": "median",
      "field": "yield",
      "type": "quantitative",
      "scale": {"zero": False}
    },
    "y": {
      "field": "variety",
      "type": "ordinal",
      "sort": "-x"
    },
    "color": {"field": "year", "type": "nominal"}
  }
}

assert has_facet_encoding(facet_encoding_spec)

# 2. Disallow repeat  
# ---------------------------------------------------------------------

# 2.1. Disallow repeat operator 
# https://vega.github.io/vega-lite/docs/repeat.html#repeat-mapping
"""
Disallow all objects of the form 
{
  "repeat": any,
  "spec": any 
}
"""
def has_repeat_operator(spec): 
    if isinstance(spec, dict): 
        if "spec" in spec and "repeat" in spec: 
            return True 
        return any(has_repeat_operator(v) for v in spec.values()) 
    return False 

repeat_spec = {
  "data": {
    "url": "data/weather.csv"
  },
  "repeat": [
    "temp_max",
    "precipitation",
    "wind"
  ],
  "spec": {
    "mark": "line",
    "encoding": {
      "x": {"field": "date", "timeUnit": "month"},
      "y": {
        "field": {"repeat": "repeat"},
        "aggregate": "mean"
      },
      "color": {"field": "location"}
    }
  }
}

assert has_repeat_operator(repeat_spec)

In [37]:
def validate_schema(schema):
    assert not has_facet_operator(schema)
    assert not has_facet_encoding(schema)
    assert not has_repeat_operator(schema)
    
for sn, schema in schemas.items(): 
    validate_schema(schema)
    is_concat = "hconcat" in schema['schema'] or "vconcat" in schema['schema']
    if not is_concat: 
        # is simple chart. Either single plot or layer plot. 
        is_simple_layer = "layer" in schema['schema']
        if is_simple_layer: 
            # Is a layer chart 
            
            pass 
        else: 
            # Is single chart 
            pass 
        print(f"{sn} / concat {is_concat} / simple_layer {is_simple_layer}") 
    else: 
        # grid layout chart using hconcat and vconcat. 
        pass 
        print(f"{sn} / concat {is_concat}")
    match = schema | grep("width")
    matches = [
        m for m in match['matched_paths'] 
        if any(re.match(p, s) for p in [
            r"root\['schema'\]\['config'\]\['view'\]\['continuousWidth'\]",
            r"root\['schema'\]\['width'\]", 
            r".*\['layer'\]\[[0-9]+\]\['width'\]",
            r".*\['hconcat'\]\[[0-9]+\]\['width'\]",
            r".*\['vconcat'\]\[[0-9]+\]\['width'\]",
        ])
    ]
    for m in matches: 
        print(" " * 4 + m) 

fieldoverview / concat False / simple_layer True
    root['schema']['config']['view']['continuousWidth']
    root['schema']['layer'][0]['width']
    root['schema']['layer'][1]['width']
fertilizerbreakdown / concat True
    root['schema']['config']['view']['continuousWidth']
farmersmarkethistory / concat True
    root['schema']['config']['view']['continuousWidth']
    root['schema']['vconcat'][0]['width']
    root['schema']['vconcat'][1]['layer'][0]['width']
    root['schema']['vconcat'][1]['layer'][1]['width']
    root['schema']['vconcat'][1]['layer'][2]['width']
    root['schema']['vconcat'][1]['layer'][3]['width']
    root['schema']['vconcat'][2]['width']
beanstalkcreditworthiness / concat True
    root['schema']['config']['view']['continuousWidth']
    root['schema']['vconcat'][0]['layer'][0]['width']
    root['schema']['vconcat'][0]['layer'][1]['width']
    root['schema']['vconcat'][0]['layer'][2]['width']
    root['schema']['vconcat'][0]['layer'][3]['width']
    root['schema']['vc

In [29]:
import altair as alt
from altair.expr import datum

from vega_datasets import data
stocks = data.stocks.url

base = alt.Chart(stocks).encode(
    x='date:T',
    y='price:Q',
    color='symbol:N'
).transform_filter(
    datum.symbol == 'GOOG'
)

base.mark_line() + base.mark_point()

c = alt.layer(
  base.mark_line().properties(width=300),
  base.mark_point().properties(width=600),
  base.mark_rule()
).interactive()
c

In [27]:
output_chart(c)

<IPython.core.display.JSON object>