In [3]:
import yaml
import re
import enum
from pprint import pprint, pformat
import itertools
import math
import time

from dataclasses import dataclass
from typing import List, Tuple, Set
from copy import copy, deepcopy

# def listify_spec(spec):
#   listable = set({'affects', 'covered-by', 'mapto', 'actions'})
#   recursable = set({'structures'})
#   top-level = set({'structures', 'object-types', 'structure-types', 'representation-types', 'behavior-types'})

#   for key, content in spec.items():

  

def parse_spec(file_path):
  with open(file_path, 'r') as file_handle:
    spec = yaml.safe_load(file_handle)
  return spec

In [4]:
ex_spec = parse_spec('video-editor.yaml')

## Comparing Objects

In [5]:
def process_listable(target, func):
  if target is None:
    return

  if type(target) == list:
    for target_item in target:
      func(target_item)
  else:
    func(target)

def strip_type(obj):
  return re.sub(r'\((\w|-)*\) ', '', obj)


In [6]:
class ObjectRegistry(object):
  # obj mapto obj registry
  # {obj name: set(obj names)}
  registry: dict[str, Set[str]] = {}
  
  # # registry for structures between structures, repr, etc.
  # # {obj name: set((entity-name, connection-type), etc...) }
  # connections: dict[str, Set[Tuple[str, str]]] = {}

  def __init__(self, spec=None):
    self.registry = {}
    if spec is not None:
      self.register_spec_objects(spec)

  def __str__(self):
    return pformat(self.registry)

  def register_object(self, obj):
    # -> means mapto
    # . means subset
    # / means component  # TODO: actually do this!!
    assert(type(obj) is str)
    obj = strip_type(obj)

    arrow_array = obj.split('->')
    for arrow_idx, arrow_term in enumerate(arrow_array):
      
      dot_array = arrow_term.split('.')
      for dot_idx, dot_term in enumerate(dot_array):
        subject = '.'.join(dot_array[:dot_idx + 1]) # join up to idx
        if not self.registry.get(subject):
          self.registry[subject] = set()
        
        if dot_idx > 0:
          # every sequence maps to the previous element eg. a.b.c => {a.b.c: {a.b}, a.b: {a}}
          previous = '.'.join(dot_array[:dot_idx])
          self.registry[subject].add(previous)
      
      if arrow_idx > 0:
        # a pair of arrows lhs->rhs => {lhs: {rhs}}
        lhs = self.registry[arrow_array[arrow_idx - 1]]
        rhs = arrow_array[arrow_idx]
        lhs.add(rhs)

  def register_struct_objects(self, struct, process_func):
    if struct.get('type') == 'group':
      # Groups also behave as objects, so register them
      name = struct.get('name')
      if name is None:
        print('Warning! No name provided for group. Using `NO NAME PROVIDED` instead. TODO: generate ID.')
        name = 'NO NAME PROVIDED'

      if self.registry.get(name) is None:
        self.registry[name] = set()
      
      # TODO: do groups map to their elements? Really, does the transitive property apply? My hunch is no, but need to think more

    process_listable(struct.get('affects'), process_func)
    process_listable(struct.get('covered-by'), process_func)
    # TODO: relate all of the objects affected/covered by a structure. 

    for derivative in struct.get('structures', []):
      self.register_struct_objects(derivative, process_func=process_func)

  def register_repr_objects(self, repr, process_func):
    repr_type = repr.get('type', '')
    # print(repr_type)
    for repr_item in repr.get('objects', []):
      assert(len(repr_item.values()) == 1)
      repr_obj, target_objs = list(repr_item.items())[0]

      process_listable(target_objs, process_func)

      # Map every item to the associated representational object
      # eg. {message: {textbox}, author: {textbox}}
      def register_repr_maps(target):
        # note: this is a bit backwards compared to the syntax!
        if self.registry.get(target) is None:
          self.registry[target] = set()
        self.registry[target].add(repr_obj)
        # objects[target].add(repr_type + '/' + repr_obj)  # when we prefix the core stuff
      process_listable(target_objs, register_repr_maps)

  def register_spec_objects(self, spec): # {object: [mapto-targets]}
    def register_object_here(target):
      self.register_object(target)
    
    process_listable(spec.get('objects'), register_object_here)
    # TODO: deal with `objects` block

    for struct in spec.get('structures', []):
      self.register_struct_objects(struct, register_object_here)
      
    for repr in spec.get('representations', []):
      self.register_repr_objects(repr, register_object_here)


In [7]:
# Test in Object Registry
print('testing: a')
test = ObjectRegistry()
print('expected:')
pprint({'a': set()})
print('got:')
test.register_object('a')
print(test)

print()

print('testing: a.b.c')
print('expected:')
pprint({'a': set(), 'a.b': {'a'}, 'a.b.c': {'a.b'}})
print('got:')
test = ObjectRegistry()
test.register_object('a.b.c')
print(test)

print()

print('testing: a->b->c')
print('expected:')
pprint({'a': {'b'}, 'b': {'c'}, 'c': set()})

print('got:')
test = ObjectRegistry()
test.register_object('a->b->c')
print(test)

print()

print('testing: a.b->x->z.w')
print('expected:')
print({'a': set(), 'a.b': {'x', 'a'}, 'x': {'z.w'}, 'z': set(), 'z.w': {'z'}})

print('got:')
test = ObjectRegistry()
test.register_object('a.b->x->z.w')
print(test)

testing: a
expected:
{'a': set()}
got:
{'a': set()}

testing: a.b.c
expected:
{'a': set(), 'a.b': {'a'}, 'a.b.c': {'a.b'}}
got:
{'a': set(), 'a.b': {'a'}, 'a.b.c': {'a.b'}}

testing: a->b->c
expected:
{'a': {'b'}, 'b': {'c'}, 'c': set()}
got:
{'a': {'b'}, 'b': {'c'}, 'c': set()}

testing: a.b->x->z.w
expected:
{'a': set(), 'a.b': {'x', 'a'}, 'x': {'z.w'}, 'z': set(), 'z.w': {'z'}}
got:
{'a': set(), 'a.b': {'x', 'a'}, 'x': {'z.w'}, 'z': set(), 'z.w': {'z'}}


In [8]:
ex_obj_registry = ObjectRegistry()
ex_obj_registry.register_spec_objects(ex_spec)
print(ex_obj_registry)

{'playhead': {'videos.in-editor/images', 'vlines'},
 'playhead->videos.in-editor/images': {'rects'},
 'timestamps': {'vlines'},
 'tracks': {'regions'},
 'videos': set(),
 'videos.in-editor': {'videos', 'rects'},
 'videos.in-editor/images': {'videos'},
 'videos/first-frame': {'regions'}}


In [9]:
core_spec = parse_spec('core.yaml')
core_objs = ObjectRegistry()

# TODO: probably need to consider prefixing gui on these
for repr_type in core_spec.get('representation-types', []):
  # print(repr_type['name'])
  core_objs.register_spec_objects(repr_type)

# register_spec_objects(core_spec, core_objs)
print(core_objs)

{'hlines': {'lines'},
 'icons': {'regions'},
 'lines': {'regions'},
 'points': set(),
 'rects': {'regions'},
 'regions': set(),
 'vlines': {'lines'}}


In [10]:
# NOTE: This is general! Assuming your relations/registry is shaped properly
# TODO: count depth
def get_node_join(left, right, relations, visited=set(), depth=0) -> str | None:
  assert(type(left) is str)
  assert(type(right) is str)
  
  if relations.get(left) is None or relations.get(right) is None:
    return None

  # Trace print
  # print(f"{left:<8} {right:<}")

  if left in visited or right in visited:
    # To avoid cycles, abort if we've already visited a node
    # NOTE: Not like 100% sure this is clean
    print("WARNING! Cycle detected in the relations.")
    return None

  if left == right:
    return left
  
  for next_left in relations[left]:
    left_res = get_node_join(next_left, right, relations, visited=visited)
    if left_res is None:
      # When you reach the leaf of a spanning tree, pop back up to recurse
      for next_right in relations[right]:
        return get_node_join(left, next_right, relations, visited=visited)
    else:
      return left_res

  if len(relations[right]) == 0 and len(relations[left]) == 0:
    return None

print()
print('vlines, hlines => ' + str(get_node_join('vlines', 'hlines', core_objs.registry)))
print('rects, hlines => ' + str(get_node_join('rects', 'hlines', core_objs.registry)))
print('points, hlines => ' + str(get_node_join('points', 'hlines', core_objs.registry)))


vlines, hlines => lines
rects, hlines => regions
points, hlines => None


## Comparing Structures
- need to show that two structures are "the same" based on their types
- do structures map to other structures

In [11]:
class StructRegistry(object):
  # Keep track of the type of each structure
  # TODO: I guess you only ever have the one type huh? keeping it a set for now
  # so that I can use get_node_join() without changing it.
  typeRegistry: dict[str, Set[str]] = {}

  # Keep track of which structure maps to which
  mapRegistry: dict[str, Set[str]] = {}

  def __init__(self, spec=None):
    self.typeRegistry = {}
    self.mapRegistry = {}
    if spec is not None:
      self.register_spec(spec)
  
  def _prefix(parents: List[str]):
    return '/'.join(parents) + '/'

  def register_struct(self, struct, parents=None):
    name = struct.get('name')
    assert(name is not None)

    # TODO: do prefixes for structure components
    prefix = '' if parents is None else self._prefix(parents)

    # Register type
    if self.typeRegistry.get(name) is None:
      self.typeRegistry[name] = set()
    
    if struct_type := struct.get('type'):
      self.typeRegistry[name].add(struct_type) 

    # Register map
    if self.mapRegistry.get(name) is None:
      self.mapRegistry[name] = set()
    
    if struct_maps := struct.get('mapto'):
      def process_struct_map(target):
        self.mapRegistry[name].add(target)
      
      process_listable(struct_maps, process_struct_map)
    
    # Registrer connections for objects
    # TODO
  
  def register_spec(self, spec, parents=None):
    for struct_type in core_spec.get('structure-types', []):
      self.register_struct_type(struct_type)

    for struct in spec.get('structures', []):
      self.register_struct(struct, parents)
  
  def register_struct_type(self, struct_type):
    # TODO: not 100% sure about how the semantics are being used (ie. type mapto is like a type for structure)
    name = struct_type.get('name')
    assert(name is not None)

    # Register type
    if self.typeRegistry.get(name) is None:
      self.typeRegistry[name] = set()
    
    # HACK: for the sake of comparing the type of structures from two different specs, it's easiest to think of
    # component-mapto as a type relation 
    if target := struct_type.get('mapto'):
      self.typeRegistry[name].add(target)
      
      

## Construct Graph for Pattern Matching

How does this thing even work?  

Nodes are structures.  
A directed edge between nodes if S1 mapsto S2.
An undirected edge between nodes if they refer to 


In [12]:
__next_id = 0
def new_id():
  global __next_id
  current = __next_id
  __next_id += 1
  return current

def reset_id_gen():
  global __next_id
  print("WARNING: Resetting ID Gen, any previously generated ids are no longer meaningful.")
  __next_id = 0

In [13]:
class NodeKind(enum.Enum):
  Structure = enum.auto()

class EdgeKind(enum.Enum):
  # Directed
  StructMap = enum.auto()

  # Undirected
  ObjRef = enum.auto()


@dataclass
class Node:
  id: int
  kind: NodeKind
  def __str__(self):
    return f'{self.kind}: {self.id}'

@dataclass
class Edge:
  id: int
  kind: EdgeKind
  label: str
  source: int
  target: int

  def __str__(self):
    return f'{self.kind}: ({self.source}-{self.target})'

@dataclass
class Graph:
  nodes: List[Node]
  edges: List[Edge]

  def __str__(self):
    res = 'Nodes:\n'
    res += ', '.join([str(n) for n in self.nodes])
    res += '\n'
    res += 'Edges: \n'
    res += '\n'.join([str(e) for e in self.edges])
    return res

In [14]:
def register_id(name: str, registry: dict[str, int]) -> int:
  if existing_id := registry.get(name) is None:
    return existing_id
  
  new_id = new_id()
  registry[name] = new_id
  return new_id
  

def make_graph(spec_file: str) -> Graph:
  graph = Graph([],[])
  spec = parse_spec(spec_file)  
  
  obj_reg = ObjectRegistry()
  obj_reg.register_spec_objects(spec)

  struct_reg = StructRegistry()
  struct_reg.register_spec(spec)
  
  ids: dict[str, int] = {} # { spec_name: id }
  for struct in spec.get('structures', []):
    struct_name = struct.get('name')
    assert(struct_name is not None)

    struct_node = Node(register_id(struct_name), NodeKind.Structure)
    graph.nodes.append(struct_node)


## Error Tolerant Graph Matching

In [15]:
# Returns
# [
#   [first pairing],
#   [second pairing],
#   ...
# ]
def list_pairings(s1: Set, s2: Set):
  assert(len(s1) <= len(s2))
  l1 = list(s1)
  mappings = []

  for subset_choice in itertools.combinations(s2, len(s1)):
    # Pick a subset that you want to look at
    for permutation in itertools.permutations(subset_choice):
      # try all permutations of s2 against (static) s1
      mappings.append(list(zip(l1, permutation)))
  
  return mappings

# def count_list_pairings(s1: Set, s2: Set):
#   assert(len(s1) <= len(s2))
#   return math.comb(len(s2), len(s1)) * math.perm(len(s1))

In [16]:
def subset_pairings(s1: Set, s2: Set, size):
  l1 = list(s1)
  l2 = list(s2)

  assert(size <= len(l1) <= len(l2))

  res = []
  for subset1 in itertools.combinations(s1, size):
    for subset2 in itertools.combinations(s2, size):
      # Pick out a subset for s1 and s2, and list their pairings
      # print(len(list_pairings(subset1, subset2)))
      res += list_pairings(subset1, subset2)

  return res

# def count_subset_pairings(s1, s2, size: int):
#   assert(size <= len(s1) <= len(s2))
#   new_s1_size = len(s1) - size
#   new_s2_size = len(s2) - size
#   print('new size', new_s1_size)
#   n1 = list(range(new_s1_size))
#   n2 = list(range(new_s2_size))
#   print(math.comb(len(s1), size))
#   print(math.comb(len(s2), size))
#   print(count_list_pairings(n1, n2))
#   print('TODO: I think this is wrong')
#   # return math.comb(len(s1), size) * math.comb(len(s2), size) * count_list_pairings(n1, n2)


In [17]:
s1 = set({1,2,3})
s2 = set({'a','b', 'c'})

subset_pairings(s1, s2, 2)

[[(1, 'c'), (2, 'a')],
 [(1, 'a'), (2, 'c')],
 [(1, 'c'), (2, 'b')],
 [(1, 'b'), (2, 'c')],
 [(1, 'a'), (2, 'b')],
 [(1, 'b'), (2, 'a')],
 [(1, 'c'), (3, 'a')],
 [(1, 'a'), (3, 'c')],
 [(1, 'c'), (3, 'b')],
 [(1, 'b'), (3, 'c')],
 [(1, 'a'), (3, 'b')],
 [(1, 'b'), (3, 'a')],
 [(2, 'c'), (3, 'a')],
 [(2, 'a'), (3, 'c')],
 [(2, 'c'), (3, 'b')],
 [(2, 'b'), (3, 'c')],
 [(2, 'a'), (3, 'b')],
 [(2, 'b'), (3, 'a')]]

In [18]:
def all_subset_pairings(s1: Set, s2: Set, verbose=False):
  acc = []
  for i in range(min(len(s1), len(s2))):
    size = i + 1
    inter_res = subset_pairings(s1, s2, size)
    acc += inter_res

    if verbose:
      num_res = len(inter_res)
      print(f'size = {size}')
      print(inter_res)
      print(f'len = {num_res}')
      print()
  return acc

# def count_all_subset_pairings(s1, s2, verbose=False):
#   count = 0
#   max_size = min(len(s1), len(s2))
#   if verbose:
#     print(f'Going up to size {max_size}')
#   for i in range(max_size):
#     size = i + 1
#     inter_res = len(subset_pairings(s1, s2, size))
#     count += inter_res

#     if verbose:
#       print(f'size = {size}')
#       print(f'len = {inter_res}')
#       print()
#   return count

# count_all_subset_pairings(g1.nodes, g2.nodes, True)

print('total: ' + str(len(all_subset_pairings(s1, s2, verbose=True))))

size = 1
[[(1, 'c')], [(1, 'a')], [(1, 'b')], [(2, 'c')], [(2, 'a')], [(2, 'b')], [(3, 'c')], [(3, 'a')], [(3, 'b')]]
len = 9

size = 2
[[(1, 'c'), (2, 'a')], [(1, 'a'), (2, 'c')], [(1, 'c'), (2, 'b')], [(1, 'b'), (2, 'c')], [(1, 'a'), (2, 'b')], [(1, 'b'), (2, 'a')], [(1, 'c'), (3, 'a')], [(1, 'a'), (3, 'c')], [(1, 'c'), (3, 'b')], [(1, 'b'), (3, 'c')], [(1, 'a'), (3, 'b')], [(1, 'b'), (3, 'a')], [(2, 'c'), (3, 'a')], [(2, 'a'), (3, 'c')], [(2, 'c'), (3, 'b')], [(2, 'b'), (3, 'c')], [(2, 'a'), (3, 'b')], [(2, 'b'), (3, 'a')]]
len = 18

size = 3
[[(1, 'c'), (2, 'a'), (3, 'b')], [(1, 'c'), (2, 'b'), (3, 'a')], [(1, 'a'), (2, 'c'), (3, 'b')], [(1, 'a'), (2, 'b'), (3, 'c')], [(1, 'b'), (2, 'c'), (3, 'a')], [(1, 'b'), (2, 'a'), (3, 'c')]]
len = 6

total: 33


In [19]:
last_time = time.time()
# num_iterations = 11
num_iterations = 3
for v in range(num_iterations):
  s1 = set(range(v))
  print(f'each graph has {v} nodes')
  print(f'number of node pairings: {len(all_subset_pairings(s1, s1)):.2e}')
  new_time = time.time()
  print(f"time taken: {new_time - last_time:4f} seconds\n")
  last_time = new_time

# each graph has 0 nodes
# number of node pairings: 0.00e+00
# time taken: 0.000597 seconds

# each graph has 1 nodes
# number of node pairings: 1.00e+00
# time taken: 0.000037 seconds

# each graph has 2 nodes
# number of node pairings: 6.00e+00
# time taken: 0.000034 seconds

# each graph has 3 nodes
# number of node pairings: 3.30e+01
# time taken: 0.000073 seconds

# each graph has 4 nodes
# number of node pairings: 2.08e+02
# time taken: 0.000251 seconds

# each graph has 5 nodes
# number of node pairings: 1.54e+03
# time taken: 0.001818 seconds

# each graph has 6 nodes
# number of node pairings: 1.33e+04
# time taken: 0.020417 seconds

# each graph has 7 nodes
# number of node pairings: 1.31e+05
# time taken: 0.265961 seconds

# each graph has 8 nodes
# number of node pairings: 1.44e+06
# time taken: 9.128887 seconds

# each graph has 9 nodes
# number of node pairings: 1.76e+07
# time taken: 278.424717 seconds

# each graph has 10 nodes
# (ran out of application memory)


each graph has 0 nodes
number of node pairings: 0.00e+00
time taken: 0.000389 seconds

each graph has 1 nodes
number of node pairings: 1.00e+00
time taken: 0.000702 seconds

each graph has 2 nodes
number of node pairings: 6.00e+00
time taken: 0.000041 seconds



# Greedy Approach
- cost is based on matching edges, rather than nodes
- start with structures that have matching structure
- for each pair, look at each option greedly maximizing edges
- keep score and sort

TODO: I should really preprocess specs so that every listable is a list...

In [89]:
class ConnectionType(enum.Enum):
  Affects = enum.auto()
  Cover = enum.auto()
  # TODO: more

def init_registry_entry(registry, item):
  if registry.get(item) is None:
    registry[item] = set()
# Return a list even if it's a single thing, so you can always loop through it
def listable(content):
  if type(content) is str:
    return [content]
  elif type(content) is list:
    return content
  elif content is None:
    return []
  else:
    print('Listable item is of type:', type(content))
    assert(False)

def find_struct(spec: dict, struct_name: str) -> dict | None:
  for struct in spec.get('structures', []):
    if struct.get('name') == struct_name:
      return struct
  return None

def add_connection(struct, target_name, connector, connection_type):
  key = None
  if connection_type == ConnectionType.Affects:
    key = 'affected-connections'
  elif connection_type == ConnectionType.Cover:
    key = 'cover-connections'
  else:
    print('Invalid connection type:', connection_type)
  
  value = {
    'target-structure': target_name,
    'connector-object': connector
  }

  if struct.get(key) is None:
    struct[key] = []
  struct[key].append(value)

def add_span_edits(edits, obj_registry, connection_type, source_struct, target_struct):
  key = None
  if connection_type == ConnectionType.Affects:
    key = 'affects'
  elif connection_type == ConnectionType.Cover:
    key = 'covered-by'
  else:
    print('Invalid connection type:', connection_type)

  source_struct_name = source_struct.get('name')
  target_struct_name = target_struct.get('name')
  
  for source_affected in listable(source_struct.get(key)):
    for target_affected in listable(target_struct.get(key)):
      # TODO: could cache the joins pretty easily if it's slow
      join = get_node_join(source_affected, target_affected, obj_registry.registry)
      # print('structures:', source_struct_name, ';', target_struct_name)
      # print('objs:', source_affected, ';', target_affected)
      # print('join:', join)
      # print()
      if join is not None:
        edits.add((source_struct_name, target_struct_name, join, connection_type))

def add_span_edges(spec):
  obj_registry = ObjectRegistry(spec)
  # pprint(obj_registry.registry)
  # print()

  edits = set() # { (source, target, obj, connection_type) }

  # 1. For every pair of structs and every pair of their target objects, see if it makes sense to add a link
  # double black diamond slope over here
  for source_struct in spec.get('structures'):
    source_struct_name = source_struct.get('name')
    for target_struct in spec.get('structures'):
      target_struct_name = target_struct.get('name')
      if source_struct_name == target_struct_name:
        continue

      add_span_edits(edits, obj_registry, ConnectionType.Affects, source_struct, target_struct)
      add_span_edits(edits, obj_registry, ConnectionType.Cover, source_struct, target_struct)
  
  # 2. Make the edits
  new_spec = deepcopy(spec)

  for edit in edits:
    source_struct_name, target_struct_name, connecting_object, connection_type = edit
    struct = find_struct(new_spec, source_struct_name)
    add_connection(struct, target_struct_name, connecting_object, connection_type)

  # print('\nresult:')
  ## pprint([{'_name': struct.get('name'), 'affected-connection': struct.get('affected-connections'), 'cover-connection': struct.get('cover-connections')} for struct in new_spec.get('structures')])
  # pprint(new_spec.get('structures'))
  return new_spec

pprint(add_span_edges(parse_spec('video-editor.yaml')))


{'behavior': [{'input': 'mouse/drag',
               'moves': {'along': 'editor-timeline',
                         'object': 'videos.in-editor.selected'},
               'name': ['add-video', 'move-video']},
              {'moves': {'along': 'editor-timeline', 'object': 'playhead'},
               'name': 'play'}],
 'object-types': [{'behavior': [{'moves': {'along': 'timeline',
                                           'object': 'playhead'},
                                 'name': 'play'}],
                   'name': 'video',
                   'objects': ['(text) name', '(image) images'],
                   'representations': [{'name': 'current-frame',
                                        'objects': {'regions': 'playhead->images'},
                                        'type': 'gui'},
                                       {'name': 'first-frame',
                                        'objects': {'regions': 'playhead->first'},
                                        'type': '

In [None]:
# Failed attempts at doing span

# # An object connection is a bidirectional edge between two structures that 
# # indicates that they both point to the same object (including mapto).
# # connection registry: {obj-name: set(structures, reprs, behaviors)}
# def object_connections(spec):
#   obj_registry = ObjectRegistry(spec)
#   # print('obj_registry:')
#   # pprint(obj_registry.registry)
#   # print()
  
#   connection_registry: dict[str, list[tuple[str, ConnectionType]]] = {}
  
#   # 1. register object uses
#   # Structures
#   for struct in spec.get('structures', []):
#     struct_name = struct.get('name')

#     def register_connection(obj, connection):
#       init_registry_entry(connection_registry, obj)
#       connection_registry[obj].add((struct_name, connection))
#       # print(f'adding: {obj} -> ({struct_name}, {connection})')

#       for mapto_target in obj_registry.registry.get(obj, []):
#         register_connection(mapto_target, connection)
      
    
#     # Process affected objects
#     for target in listable(struct.get('affects')):
#       connection = ConnectionType.Affects
#       # print(target, connection)
#       register_connection(target, connection)

#       # print()

#     # Process covers
#     for target in listable(struct.get('covered-by')):
#       connection = ConnectionType.Cover
#       # print(target, connection)
#       register_connection(target, connection)
#       # print()
  
#   # TODO: do representations etc.
  
#   print('\nconnection registry:')
#   # pprint(connection_registry)
#   print()
#   pprint({key: [(first, second) for first, second in val if second == ConnectionType.Affects] for key, val in connection_registry.items()})
#   print()

#   # # 2. loop through again and add a connection based on registry
#   # new_spec = deepcopy(spec)
#   # for struct in new_spec.get('structures', []):
#   #   struct_name = struct.get('name')

#   #   # Process affected objects
#   #   struct['affected-connections'] = []
#   #   for target in listable(struct.get('affects')):
#   #     for connection in connection_registry.get(target):
#   #       entity, relation = connection
#   #       # print(struct_name, entity, relation)
#   #       if entity != struct_name and relation == ConnectionType.Affects:
#   #         struct['affected-connections'].append(entity)

#   #   # Process covers
#   #   struct['cover-connections'] = []
#   #   for target in listable(struct.get('covered-by')):
#   #     for connection in connection_registry.get(target):
#   #       entity, relation = connection
#   #       if entity != struct_name and relation == ConnectionType.Cover:
#   #         # struct['cover-connections'].append(entity)
#   #         add_connection(struct, entity, target, connection)

#   # # 2. loop through connections and add if appropriate there are more than one edges
#   # new_spec = deepcopy(spec)
#   # for connecting_object, values in connection_registry.items():
#   #   if len(values) < 2:
#   #     # gotta have two ends for a connection
#   #     continue

#   #   values = list(values)

#   #   for struct_name, connection_type in values:
#   #     struct = find_struct(new_spec, struct_name)
#   #     if struct is None:
#   #       print('Struct not found??:', struct_name)
#   #       continue


#   #     for target_struct_name, inner_connection_type in values:
#   #       if struct_name == target_struct_name:
#   #         # don't connect with yourself
#   #         continue
          
#   #       print(f'add_connection({struct_name}, {target_struct_name}, {connecting_object}, {inner_connection_type})')
#   #       add_connection(struct, target_struct_name, connecting_object, inner_connection_type)

#   # 2. Loop through potential connections and list edit operations.
#   # NOTE: this is a dumb, overcounting way to do it. Basically iterating through edges of a complete graph, which can 
#   # be done without duplicating edges, but that's annoying so here I just throw stuff in a set to avoid dupes. 
#   # If it's slow, it can definitely be sped up. But I suspect we're not looking at a ton of these connections in practice.
#   edits = set() # { (source, target, obj, connection_type) }
#   for connecting_object, values in connection_registry.items():
#     if len(values) < 2:
#       continue

#     # double loop through possible connections and dump them into a set
#     for val in values:
#       source_struct_name, _ = val
#       for val2 in values:
#         target_struct_name, connection_type = val2
#         if source_struct_name != target_struct_name: # don't connect with yourself
#           edits.add((source_struct_name, target_struct_name, connecting_object, connection_type))

#   print('edits')
#   pprint(edits)
#   print()

#   # 3. Loop through edits and add them.
#   new_spec = deepcopy(spec)
#   for edit in edits:
#     source_struct_name, target_struct_name, connecting_object, connection_type = edit
#     struct = find_struct(new_spec, source_struct_name)
#     add_connection(struct, target_struct_name, connecting_object, connection_type)
  
#   print('\nresult:')
#   pprint([{'_name': struct.get('name'), 'affected-connection': struct.get('affected-connections'), 'cover-connection': struct.get('cover-connections')} for struct in new_spec.get('structures')])
#   # NOTE: this isn't correct yet because connections aren't symmetric (eg. alphabet and timeline)]
#   # TODO: add why the connection is the way it is (joining node)
# object_connections(parse_spec('video-editor.yaml'))
# object_connections(parse_spec('calendar.yaml'))

In [None]:
# More points is better (as opposed to cost where more is worse)
# Doing points because it's harder to iterate all of the missed opportunity than
# to count what we can match.
greedy_points = {
  "type": 5,
  "affects": 1,
  "covered-by": 1,
  "struct-mapto": 0, # not sure yet
  "same-target": 1,
  "obj-mapto": 0,
  "repr-factor": 1,  # multiply by the struct/obj mapto for representation stuff
  "indirect-factor": 1.1, # penalize indirect connections (?)
}

def stringify_pairing(pairing):
  # NOTE: there's no guarantee that the order is the same, so it's not ideal
  res = ''
  for pair in pairing:
    left, right = pair
    res += f'{left} -> {right}\n'
  return res

def get_struct_dict(spec):
  res = {} # {struct name: struct spec}
  for struct in spec.get('structures', []):
    res[struct.get('name')] = struct
  # print('res', res)
  return res

def greedy_overlaps(sinister_file, dextera_file, core_file, verbose=False):
  def vprint(*args):
    if verbose:
      print(*args)
  
  core = parse_spec(core_file)
  sinister = add_span_edges(parse_spec(sinister_file))
  dextera = add_span_edges(parse_spec(dextera_file))

  # Setup struct registry
  struct_registry = StructRegistry(core)
  struct_registry.register_spec(sinister)
  struct_registry.register_spec(dextera)

  sinister_structs = get_struct_dict(sinister)
  dextera_structs = get_struct_dict(dextera)

  vprint('sinister structs')
  pprint(sinister_structs)
  vprint()

  # vprint('-- structure type registry:')
  # for key, val in struct_registry.typeRegistry.items():
  #   skey = str(key)
  #   sval = str(val)
  #   vprint(f'{skey}: {sval}')
  # vprint()

  # pairing = [[(v1, u1), (v1, u2)], [(v2, u1)], etc...]
  pairings = all_subset_pairings(sinister_structs.keys(), dextera_structs.keys())
  print('-- Pairings:')
  pprint(pairings)
  print()

  score_card = {} # { stringified pair: score }
  # Pair off structures in every way
  for pairing in pairings:
    vprint()
    pairing_str = stringify_pairing(pairing)
    score_card[pairing_str] = 0
    for pair in pairing:
      left, right = pair
      vprint('pair:', pair)
      left_struct = sinister_structs.get(left)
      right_struct = dextera_structs.get(right)

      # Check type
      type_join = get_node_join(left, right, struct_registry.typeRegistry)
      vprint(f'type join: {left}, {right} -> {type_join}')
      if type_join is not None:
        score_card[pairing_str] += greedy_points.get('type')

      # Check affected
      # These are the list of structure names connected to the current structure
      all_left_affected = left_struct.get('affected-connections', [])
      all_right_affected = right_struct.get('affected-connections', [])

      for left_connection in all_left_affected: # for each connection struct-name
        for right_connection in all_right_affected:
          vprint(f'comparing {left}--{left_connection} to {right}--{right_connection}')
          if stringify_pairing(left_connection, right_connection) in pairing:
            # NOTE: this doesn't take into account that structures can map to other structures?
            # This also mostly doesn't matter without considering presentations...
            score_card[pairing_str] += greedy_points.get('affects')

  for pairing in pairings:
    vprint()
    pairing_str = stringify_pairing(pairing)
    vprint('- score:', score_card[pairing_str])
    vprint('for pairing:')
    vprint(pairing)

      


greedy_overlaps('calendar.yaml', 'video-editor.yaml', 'core.yaml', verbose=True)

sinister structs
{'time': {'affects': 'timestamps',
          'covered-by': ['days', 'weeks', 'events'],
          'name': 'time',
          'structures': [{'affects': 'weeks',
                          'name': 'week-order',
                          'type': 'order'}],
          'type': 'linear'}}

-- Pairings:
[[('time', 'editor-timeline')],
 [('time', 'tracks')],
 [('time', 'alphabetical')]]


pair: ('time', 'editor-timeline')
type join: time, editor-timeline -> linear

pair: ('time', 'tracks')
type join: time, tracks -> None

pair: ('time', 'alphabetical')
type join: time, alphabetical -> linear

- score: 5
for pairing:
[('time', 'editor-timeline')]

- score: 0
for pairing:
[('time', 'tracks')]

- score: 5
for pairing:
[('time', 'alphabetical')]
