# Text and graphical YAML representations

SPDX-License-Identifier: 0BSD

In [1]:
from collections.abc import Mapping, Sequence
import html
import itertools
import json
import sys

from graphviz import Digraph
import requests
from typeguard import typechecked
import yaml

In [2]:
@typechecked
def is_stringy(obj: object) -> bool:
    """Check if obj is string-like."""
    return isinstance(obj, bytearray | bytes | str)

TypeError: <module '__main__'> is a built-in module

In [None]:
@typechecked
def is_scalar(obj: object) -> bool:
    """Check if obj is an instance of a scalar type."""
    return (not isinstance(obj, Mapping | Sequence)
            or isinstance(obj, bytearray | bytes | str))

In [None]:
@typechecked
def make_label(obj: object) -> str:
    """Make an HTML-escaped vertex or edge label showing the repr of obj."""
    return html.escape(repr(obj))

In [None]:
@typechecked
def build_graph(root: object) -> Digraph:
    """Build a digraph of references that may be deserialized YAML."""
    vis = dict[int, object]()  # Stores visitation info and keeps objects alive.
    graph = Digraph()
    leaf_counter = itertools.count()
    no_key = object()

    @typechecked
    def add_edge(parent_name: str | None,
                 current_name: str,
                 current_key: object) -> None:
        if parent_name is None:
            return

        if current_key is no_key:
            graph.edge(parent_name, current_name)
        else:
            graph.edge(parent_name, current_name, make_label(current_key))

    @typechecked
    def dfs(parent_name: str | None,
            current_key: object,
            current_value: object) -> None:
        if is_scalar(current_value):
            current_name = f'leaf-{next(leaf_counter)}'
            graph.node(current_name, label=make_label(current_value))
            add_edge(parent_name, current_name, current_key)
            return
        
        current_id = id(current_value)
        current_name = str(current_id)
        seen_before = current_id in vis

        if not seen_before:
            vis[current_id] = current_value
            graph.node(current_name, shape='point')

        add_edge(parent_name, current_name, current_key)

        if seen_before:
            return
        
        match current_value:
            case Mapping():
                for child_key, child_value in current_value.items():
                    dfs(current_name, child_key, child_value)
            case Sequence():
                for child_value in current_value:
                    dfs(current_name, no_key, child_value)
    
    dfs(None, no_key, root)
    return graph

## Example 1: A simple `environment.yml`

In [None]:
with open('environment.yml', encoding='utf-8') as file:
    conda_env_def_raw = file.read()

print(conda_env_def_raw)

In [None]:
conda_env_def = yaml.safe_load(conda_env_def_raw)
conda_env_def

In [None]:
json.dump(conda_env_def, sys.stdout, indent=4)

In [None]:
build_graph(conda_env_def)

## Example 2: A `codeql.yml` file

*A file defining a CodeQL CI workflow for GitHub Actions.*

In [None]:
response = requests.get(
    'https://raw.githubusercontent.com/EliahKagan/palgoviz/main/.github/workflows/codeql.yml',
)
response.raise_for_status()
codeql_raw = response.text
print(codeql_raw)

In [None]:
codeql = yaml.safe_load(codeql_raw)
codeql

In [None]:
json.dump(codeql, sys.stdout, indent=4)

In [None]:
build_graph(codeql)

## Example 3: Data from a REST API

A REST API returns (and accepts) data as JSON, not YAML...

In [None]:
response = requests.get('https://jsonplaceholder.typicode.com/todos/1')
response.raise_for_status()
parsed_json = response.json()
parsed_json

...except that YAML also accepts the JSON syntax, so any valid JSON is also valid YAML:

In [None]:
yaml_from_json = yaml.safe_dump(parsed_json)
print(yaml_from_json)

In [None]:
parsed_yaml = yaml.safe_load(yaml_from_json)

In [None]:
parsed_json == parsed_yaml

In [None]:
build_graph(parsed_json)

## Example 4: A cyclic structure

JSON prohibits this, but YAML permits it.

In [None]:
cycle = [{}]
cycle[0]['x'] = [[[cycle], []]]
cycle

In [None]:
raw_cycle = yaml.safe_dump(cycle)
print(raw_cycle)

In [None]:
yaml.safe_load(raw_cycle)

In [None]:
build_graph(cycle)