In [None]:
# 1. Demonstrate some hand-written work graphs of varying complexity.
# 2. Demonstrate a simple Context that can hold a valid work graph, but not execute it.
# 3. Implement one or two Operation factories that establish a work graph.
# 4. Serialize and deserialize a work graph.

In [None]:
import json
import collections
import hashlib
import networkx
import numpy

In [None]:
# Prototypical records as native Python dictionaries.

def graph_prototype():
    return {'version': 'gmxapi_graph_0_2',
            'elements': {}}

def element_prototype():
    return {'label': None,
            'operation': None,
            'input': {},
            'output': {},
            'depends': [],
            'interface': {}}

In [None]:
# This information in this record packages a 2x2 integer array source terminal.

sample1 = {
    'version': 'gmxapi_graph_0_2',
    'elements': {
        '69e69fedbdbab6dcda556db6d5835375cefb4e801fb8279d0d7ef3995154bc15': {
            'label': 'my_array',
            'operation': ['gmxapi', 'Integer64'],
            'input': {'data': [[0, 1], [2, 3]]},
            'output': {
                'meta': {
                    'type': 'gmxapi.Integer64',
                    'shape': [2,2]
                }
            }
        }
    }
}

In [None]:
sample2 = {
    'version': 'gmxapi_graph_0_2',
    'elements': {
        'cli_XXX': {
            'namespace': 'gmxapi',
            'operation': 'cli',
            # 'input': [[0, 1], [2, 3]],
            # 'output': {
            #     'meta': {
            #         'type': 'gmxapi.Integer',
            #         'shape': [2,2]
            #     }
            # }
        }
    }
}

In [None]:
class WorkGraph(collections.UserDict):
    """Helper class to hold a representation of a deserialized work graph.
    
    This is not intended to be a required part of the object model, but just a
    tool for validation or visualization. Client code should interact with a
    work graph through a Context. The Context is free to use an appropriate
    internal data structure.
    """
    def __init__(self):
        super().__init__(graph_prototype())

In [None]:
graph = WorkGraph()
graph['elements']['foo'] = 'bar'
print(WorkGraph().data)

In [None]:
# Key types

class ObjectName(collections.UserString):
    def __init__(self, name: str):
        # TODO: validation
        super().__init__(name)

class Label(collections.UserString):
    def __init__(self, name: str):
        # TODO: validation
        super().__init__(name)

# Value types

class Data:
    """Literal data for work graph input values.
    
    Literal data objects are dense arrays of uniform element type. 
    """

class Collection:
    """Value that is a nested mapping of keys to value objects.
    
    Keys are strings subject to the constraints of Label.
    
    Values are Data, Collections, or References.
    """

class Reference:
    """Work record value object that is a reference to another graph entity."""

In [None]:
# Manage the structure and rules of a work record.
import copy
class WorkRecord(collections.UserDict):
    def __init__(self):
        super().__init__(graph_prototype())
        
    def __getitem__(self, item):
        return copy.deepcopy(super().__getitem__(item))

    def as_json(self, **kwargs):
        import json
        json.dumps(self.data, **kwargs)
        

In [None]:
record = WorkRecord()
print(str(record))
print(repr(record))
print(record)

In [None]:
import collections
import json    
import typing

# The behavior of `bytes` is sufficient that a UID class is probably not necessary,
# though we might want to guarantee that a UID is exactly 32 bytes. TBD...

class Fingerprint(object):
    import hashlib as _hashlib

    def __init__(self, *, 
                 operation: typing.Sequence, 
                 input: typing.Union[str, typing.Mapping], 
                 depends: typing.Sequence = ()):

        # TODO: replace (list, tuple) with abstraction for valid operation values
        if not isinstance(operation, (list, tuple)):
            raise ValueError('Fingerprint requires a sequence of operation name components.')
        else:
            self.operation = tuple(operation)

        # TODO: replace (dict, str) with abstraction for valid input values.
        if not isinstance(input, (dict, str)):
            raise ValueError('Fingerprint requires a valid input representation.')
        elif isinstance(input, str):
            # TODO: chase reference
            self.input = str(input)
        else:
            assert isinstance(input, dict)
            self.input = {key: value for key, value in input.items()}

        # TODO: replace (list, tuple) with abstraction for valid depends values.
        if not isinstance(depends, (list, tuple)):
            ValueError('Fingerprint requires a sequence for dependency specification.')
        else:
            self.depends = tuple(depends)
    
    def compact_json(self):
        identifiers = collections.OrderedDict([
            ('depends', self.depends),
            ('input', self.input),
            ('operation', self.operation)
        ])
        id_string = json.dumps(identifiers,separators=(',', ':'), sort_keys=True, ensure_ascii=True)
        return id_string

    def uid(self) -> bytes:
        id_string = self.compact_json()
        id_bytes = id_string.encode('utf-8')
        id_hash = Fingerprint._hashlib.sha256(id_bytes)
        size = id_hash.digest_size
        if not size == 32:
            raise ValueError('Expected digest_size 8, but got {}'.format(size))
        digest = id_hash.digest()
        assert isinstance(digest, bytes)
        assert len(digest) == size
        return digest


class OperationIdentifier(tuple):
    def namespace(self):
        return tuple(self[0:-2])
    def operation_name(self):
        return self[-1]
    def __str__(self):
        return '.'.join(self)

    
class Integer64(object):
    import json as _json
    # TODO: Replace numpy dependency with memoryview manager or core gmxapi
    #  buffer protocol provider.
    # Note that the built-in Python array module only provides 1-dimensional arrays.
    from numpy import array as _array
    
    operation = OperationIdentifier(['gmxapi', 'Integer64'])
    
    def __init__(self, data):
        # Note: numpy may be too forgiving regarding source data and we may want extra sanitization.
        self.data = Integer64._array(data, dtype='int64')
    
    def to_json(self, **json_args) -> str:
        record = {
            'operation': Integer64.operation,
            'input': {'data': self.data.tolist()},
            'depends': ()
        }
        serialization = Integer64._json.dumps(record, **json_args)
        return serialization
    
    @classmethod
    def from_json(cls, serialized: str):
        record = cls._json.loads(serialized)
        for required_key in ['operation', 'input']:
            if required_key not in record:
                raise ValueError('Invalid record received.')
        if tuple(record['operation']) != cls.operation:
            raise ValueError('Not a {} record.'.format(cls.operation))
        if 'data' not in record['input']:
            raise ValueError('Expected "data" input field.')
        try:
            data = cls._array(record['input']['data'], dtype='int64')    
        # TODO: Handle exceptions as we figure out what can go wrong.
        except ValueError as e:
            raise ValueError('Could not create {} from data.'.format(cls.operation)) from e
        return cls(data)
            
    def fingerprint(self):
        return Fingerprint(operation=['gmxapi', 'Integer64'], input={'data': self.data.tolist()})

# TODO: Probably want a generic SerializedOperation named type or abstract handling
#  for the various ways const nodes could be passed.

In [None]:
my_array = Integer64([[1,2], [3,4]])

json_args = {'separators': (',', ':'),
             'sort_keys': True}

fingerprint = my_array.fingerprint()
print(fingerprint.compact_json())

uid = hashlib.sha256(fingerprint.compact_json().encode('utf-8')).digest()
print(uid.hex())
print(int.from_bytes(uid, byteorder='big'))

uid = fingerprint.uid()
print(uid.hex())
print(int.from_bytes(uid, byteorder='big'))
print(my_array.to_json(**json_args))

In [None]:
# Test fingerprinting for Integer64
import hashlib

expected_json = '{"depends":[],"input":{"data":[[1,2],[3,4]]},"operation":["gmxapi","Integer64"]}'
estimated_json = my_array.to_json(**json_args)
actual_json = fingerprint.compact_json()
assert estimated_json == actual_json
assert expected_json == actual_json

expected_hash = hashlib.sha256(expected_json.encode('utf-8')).digest()
actual_hash = my_array.fingerprint().uid()
assert expected_hash == actual_hash

print(expected_hash.hex())

In [None]:
# Test deserialization of Integer64

my_array = Integer64.from_json('{"depends":[],"input":{"data":[[1,2],[3,4]]},"operation":["gmxapi","Integer64"]}')

In [None]:
# Use Python xdr module with "network" byte order to establish
# base line byte sequence for fingerprinting binary data.
#
# Let native data be established through transformations of the base line.
# 
# Automatically generate reverse transformation and fingerprint so that later
# transformations can be performed at minimal cost.

In [None]:
# Use Python xdr module with "network" byte order to establish
# base line byte sequence for fingerprinting binary data.
#
# Let native data be established through transformations of the base line.
# 
# Automatically generate reverse transformation and fingerprint so that later
# transformations can be performed at minimal cost.

In [None]:
# Test fingerprinting for Integer64
import hashlib

expected_json = '{"depends":[],"input":{"data":[[1,2],[3,4]]},"operation":["gmxapi","Integer64"]}'
expected_hash = hashlib.sha256(expected_json.encode('utf-8')).digest()
print(expected_hash.hex())

In [None]:
# Use Python xdr module with "network" byte order to establish
# base line byte sequence for fingerprinting binary data.
#
# Let native data be established through transformations of the base line.
# 
# Automatically generate reverse transformation and fingerprint so that later
# transformations can be performed at minimal cost.

In [None]:
# Demonstrate a simple Context that can hold a work graph (but which cannot execute anything).

class SimpleGraphContext(object):
    def __index__(self):
        """Initialize and empty Context."""
        self._work = WorkRecord()
    
    def serialize(self) -> str:
        """Create a serialized representation of the managed work."""
    
    @classmethod
    def deserialize(cls, record: str) -> 'SimpleGraphContext':
        """Create a new instance from a serialized record."""
        context = SimpleGraphContext()
        return context
    
    def add_node(self, record: str):
        """Validate and add a node to the managed work graph.
        
        Initially requires a valid and fully formed JSON serialized record of
        the new node.
        
        Arguments:
            record (str): JSON serialized node to add.
        
        Returns:
            Reference to the new node.
        """
        try:
            node_record = json.loads(record)
            # assert hasattr(node_record, 'operation')
            # assert hasattr(node_record, 'input')
            # assert hasattr(node_record, 'output')
        except:
            message = 'Could not deserialize a node from argument to add_node.'
            raise ValueError(message)
        # 
        #return

    @property
    def nodes(self):
        """Access the node proxy for the managed work.
        
        To do...
        
        Reference the networkx.DiGraph `nodes` attribute.
        """
        return {}

In [None]:
# 1. Add a source node and a transformation operation to the graph.
# 2. Serialize and deserialize the graph.
# 3. Retrieve data from the source node and the operation result and check.

my_array = Integer64([[1,2], [3,4]])
context = SimpleGraphContext()
array_ref = context.add_node(my_array.to_json())

transpose = element_prototype()
transpose['operation'] = ['scalems_test', 'transpose']
# TODO: enable the following.
#transpose['input'] = array_ref
reference_string = my_array.fingerprint().uid().hex()
transpose['input'] = reference_string
transpose['output'] = {}

context.add_node(transpose)

In [None]:
# 1. Add a source node and a transformation operation to the graph.
# 2. Serialize and deserialize the graph.
# 3. Retrieve data from the source node and the operation result and check.

my_array = Integer64([[1,2], [3,4]])
context = SimpleGraphContext()
array_ref = context.add_node(my_array.to_json())

transpose = element_prototype()
transpose['operation'] = ['scalems_test', 'transpose']
# TODO: enable the following.
#transpose['input'] = array_ref
reference_string = my_array.fingerprint().uid().hex()
transpose['input'] = reference_string
transpose['output'] = {}

context.add_node(transpose)