rework xarray initialization

still need to work out stock inits.
SDXorg · Jun 27, 2016 · 6498655 · 6498655
1 parent c757692
commit 6498655
Show file tree

Hide file tree

Showing 14 changed files with 3,427 additions and 899 deletions.
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/Xarray value assignment.ipynb b/Xarray value assignment.ipynb
diff --git a/pysd/builder.py b/pysd/builder.py
@@ -48,6 +48,7 @@ def build(elements, subscript_dict, namespace, outfile_name):
     """
     from __future__ import division
     import numpy as np
+    from pysd import utils
     %(imports)s
     from pysd.functions import cache
     from pysd import functions
@@ -71,7 +72,7 @@ def time():
            'version': __version__}
 
     text = autopep8.fix_code(textwrap.dedent(text),
-                             options={'aggressive': 10,
+                             options={'aggressive': 100,
                                       'max_line_length': 99,
                                       'experimental': True})
 
@@ -96,7 +97,7 @@ def build_element(element, subscript_dict):
             An expression that has been converted already into python syntax
         - subs: list of lists
             Each sublist contains coordinates for initialization of a particular
-            part of a subscripted function
+            part of a subscripted function, the list of subscripts vensim attaches to an equation
 
     subscript_dict: dictionary
 
@@ -105,7 +106,7 @@ def build_element(element, subscript_dict):
 
     """
     # Todo: I don't like how we identify the types of initializations here, using tokens from
-    #  stings. It isn't explicit, or robust. These should be identified explicitly somewhere else.
+    #  strings. It isn't explicit, or robust. These should be identified explicitly somewhere else.
 
     if element['kind'] == 'constant':
         cache_type = "@cache('run')"
@@ -119,30 +120,10 @@ def build_element(element, subscript_dict):
         raise AttributeError("Bad value for 'kind'")
 
     if len(element['py_expr']) > 1:
-        contents = "ret = %s\n" % create_base_array(element['subs'], subscript_dict)
-
-        for sub, expr in zip(element['subs'], element['py_expr']):
-            contents += 'ret.loc[%(coord_dict)s] = %(expr)s\n' % {
-                'coord_dict': repr(make_coord_dict(sub, subscript_dict)),
-                'expr': expr}
-
-        contents += "return ret"
-
-    elif element['kind'] in ['constant', 'setup'] and len(element['subs'][0]) > 0:
-        if ',' in element['py_expr'][0]:  # array type initialization
-            contents = 'return xr.DataArray(data=%(expr)s, coords=%(coords)s, dims=%(dims)s )' % {
-                'expr': '[' + element['py_expr'][0] + ']',
-                'coords': {dim: subscript_dict[dim] for dim in element['subs'][0]},
-                'dims': element['subs'][0]}
-        elif '(' in element['py_expr'][0]:  # reference type initialization
-            contents = "return " + element['py_expr'][0]
-        else:  # float type initialization
-            contents = "return " + create_base_array(element['subs'],
-                                                     subscript_dict,
-                                                     initial_val=element['py_expr'][0])
-
+        contents = 'return utils.xrmerge([%(das)s])' % {'das': ',\n'.join(element['py_expr'])}
     else:
-        contents = "return %s" % element['py_expr'][0]
+        contents = 'return %(py_expr)s' % {'py_expr': element['py_expr'][0]}
+
 
     indent = 8
     element.update({'cache': cache_type,
@@ -166,54 +147,77 @@ def %(py_name)s():
         ''' % element
     return func
 
-
-def create_base_array(subs_list, subscript_dict, initial_val='np.NaN'):
-    """
-    Given a list of subscript references,
-    returns a base array that can be populated by these references
-
-    Parameters
-    ----------
-    subs_list
-
-    subscript_dict: dictionary
-
-    Returns
-    -------
-    base_array: string
-        A string that
-
-    >>> create_base_array([['Dim1', 'D'], ['Dim1', 'E'], ['Dim1', 'F']],
-    ...                    {'Dim1': ['A', 'B', 'C'],
-    ...                     'Dim2': ['D', 'E', 'F', 'G']})
-    "xr.DataArray(data=np.empty([3, 3])*NaN, coords={'Dim2': ['D', 'E', 'F'], 'Dim1': ['A', 'B', 'C']})"
-
-    # >>> create_base_array([['Dim1', 'A'], ['Dim1', 'B'], ['Dim1', 'C']],
-    # ...                    {'Dim1': ['A', 'B', 'C']})
-
-    """
-    sub_names_list = subscript_dict.keys()
-    sub_elems_list = [y for x in subscript_dict.values() for y in x]
-
-    coords = dict()
-    for subset in subs_list:
-        for sub in subset:
-            if sub in sub_names_list:
-                if sub not in coords:
-                    coords[sub] = subscript_dict[sub]
-            elif sub in sub_elems_list:
-                name = find_subscript_name(subscript_dict, sub)
-                if name not in coords:
-                    coords[name] = [sub]
-                else:
-                    if sub not in coords[name]:
-                        coords[name] += [sub]
-
-    return "xr.DataArray(data=np.ones(%(shape)s)*%(init)s, coords=%(coords)s)" % {
-        'shape': repr(map(len, coords.values())),
-        'coords': repr(coords),
-        'init': initial_val
-    }
+# not needed?
+#
+# def make_xarray(subs, expr, subscript_dict):
+#     # Todo: this function should take a single line from a vensim model file
+#     # (abstracted as elements in an array passed to the build elemtn funrction)
+#     # and make an xarray from them. Either just build up the syntax to construct one on the fly
+#     # during model load, or actually build one, deconstruct it, and use the array presentation
+#     # to make an xarray using simple syntax.
+#
+#     xarray_string = """\
+#         xr.DataArray(data=%(expr)s,
+#                      coords=%(coords_dict)s)
+#     """ % {'expr': expr,
+#            'coords_dict': make_coord_dict(subs, subscript_dict, terse=False)}
+#
+#     return xarray_string
+#
+#
+# def create_base_array(subs_list, subscript_dict, initial_val='np.NaN'):
+#     """
+#     Given a list of subscript references,
+#     returns a base array that can be populated by these references
+#
+#     Parameters
+#     ----------
+#     subs_list
+#
+#     subscript_dict: dictionary
+#
+#     Returns
+#     -------
+#     base_array: string
+#         A string that
+#
+#     >>> create_base_array([['Dim1', 'D'], ['Dim1', 'E'], ['Dim1', 'F']],
+#     ...                    {'Dim1': ['A', 'B', 'C'],
+#     ...                     'Dim2': ['D', 'E', 'F', 'G']})
+#     "xr.DataArray(data=np.empty([3, 3])*NaN, coords={'Dim2': ['D', 'E', 'F'], 'Dim1': ['A', 'B', 'C']})"
+#
+#     # >>> create_base_array([['Dim1', 'A'], ['Dim1', 'B'], ['Dim1', 'C']],
+#     # ...                    {'Dim1': ['A', 'B', 'C']})
+#
+#     """
+#     sub_names_list = subscript_dict.keys()
+#     sub_elems_list = [y for x in subscript_dict.values() for y in x]
+#     coords = dict()
+#     for subset in subs_list:
+#         for sub in subset:
+#             if sub in sub_names_list:
+#                 if sub not in coords:
+#                     coords[sub] = subscript_dict[sub]
+#             elif sub in sub_elems_list:
+#                 name = find_subscript_name(subscript_dict, sub)
+#                 if name not in coords:
+#                     coords[name] = [sub]
+#                 else:
+#                     if sub not in coords[name]:
+#                         coords[name] += [sub]
+#
+#     dims = [find_subscript_name(subscript_dict, element) if element in sub_elems_list else element
+#             for element in subs_list[0]]
+#
+#     return textwrap.dedent("""\
+#         xr.DataArray(data=np.ones(%(shape)s)*%(init)s,
+#                      coords=%(coords)s,
+#                      dims=%(dims)s )""" % {
+#         'shape': [len(coords[dim]) for dim in dims],
+#         'coords': repr(coords),
+#         'init': initial_val,
+#         'dims': dims
+#     })
 
 # def identify_subranges(subscript_dict):
 #     """

diff --git a/pysd/utils.py b/pysd/utils.py
@@ -2,6 +2,7 @@
 import keyword
 import numpy as np
 import pandas as pd
+import xarray as xr
 
 
 def dict_find(in_dict, value):
@@ -23,24 +24,53 @@ def dict_find(in_dict, value):
 
     Examples
     --------
-    >>> dict_find({'Key1': 'A', 'Key2': 'B'})
+    >>> dict_find({'Key1': 'A', 'Key2': 'B'}, 'B')
+    'Key2'
 
     """
     # Todo: make this robust to repeated values
     # Todo: make this robust to missing values
     return in_dict.keys()[in_dict.values().index(value)]
 
+def xrmerge(das, accept_new=True):
+    """
+    Merges xarrays with different dimension sets
+    Parameters
+    ----------
+    das : list of data_arrays
+
+    accept_new
+
+    Returns
+    -------
+    da : an xarray that is the merge of das
+
+    References
+    ----------
+    Thanks to @jcmgray https://github.com/pydata/xarray/issues/742#issue-130753818
+
+    In the future, we may not need this as xarray may provide the merge for us.
+    """
+    da = das[0]
+    for new_da in das[1:]:
+        # Expand both to have same dimensions, padding with NaN
+        da, new_da = xr.align(da, new_da, join='outer')
+        # Fill NaNs one way or the other re. accept_new
+        da = new_da.fillna(da) if accept_new else da.fillna(new_da)
+    return da
 
 def find_subscript_name(subscript_dict, element):
     """
     Given a subscript dictionary, and a member of a subscript family,
-    return the first key of which the member is within the value list
+    return the first key of which the member is within the value list.
+    If element is already a subscript name, return that
 
     Parameters
     ----------
     subscript_dict: dictionary
         Follows the {'subscript name':['list','of','subscript','elements']} format
-    element: sting
+
+    element: string
 
     Returns
     -------
@@ -52,12 +82,15 @@ def find_subscript_name(subscript_dict, element):
     ...                      'D')
     'Dim2'
     """
+    if element in subscript_dict.keys():
+        return element
+
     for name, elements in subscript_dict.iteritems():
         if element in elements:
             return name
 
 
-def make_coord_dict(subs, subscript_dict):
+def make_coord_dict(subs, subscript_dict, terse=True):
     """
     This is for assisting with the lookup of a particular element, such that the output
     of this function would take the place of %s in this expression
@@ -76,14 +109,17 @@ def make_coord_dict(subs, subscript_dict):
     --------
     >>> make_coord_dict(['Dim1', 'D'], {'Dim1':['A','B','C'], 'Dim2':['D', 'E', 'F']})
     {'Dim2': ['D']}
-
+    >>> make_coord_dict(['Dim1', 'D'], {'Dim1':['A','B','C'], 'Dim2':['D', 'E', 'F']}, terse=False)
+    {'Dim2': ['D'], 'Dim1': ['A', 'B', 'C']}
     """
     sub_elems_list = [y for x in subscript_dict.values() for y in x]
     coordinates = {}
     for sub in subs:
         if sub in sub_elems_list:
             name = find_subscript_name(subscript_dict, sub)
             coordinates[name] = [sub]
+        elif not terse:
+            coordinates[sub] = subscript_dict[sub]
     return coordinates
 
 

diff --git a/pysd/vensim2py.py b/pysd/vensim2py.py
@@ -12,6 +12,8 @@
 import parsimonious
 import builder
 import utils
+import textwrap
+import numpy as np
 
 
 def get_file_sections(file_str):
@@ -146,7 +148,7 @@ def get_model_elements(model_str):
 
     model_structure_grammar = r"""
     model = (entry / section)+ sketch?
-    entry = element "~" element "~" element "|"
+    entry = element "~" element "~" element ("~" element)? "|"
     section = element "~" element "|"
     sketch = ~r".*"  #anything
 
@@ -164,7 +166,7 @@ def __init__(self, ast):
             self.entries = []
             self.visit(ast)
 
-        def visit_entry(self, n, (eqn, _1, unit, _2, doc, _3)):
+        def visit_entry(self, n, (eqn, _1, unit, _2, doc, _3, annotation)):
             self.entries.append({'eqn': eqn.strip(),
                                  'unit': unit.strip(),
                                  'doc': doc.strip(),
@@ -382,7 +384,7 @@ def parse_general_expression(element, namespace=None, subscript_dict=None):
                                    'unit': None,
                                    'py_expr': '_t'}])
                 }
-
+    # Todo: integ needs to process the init as its own element
     builders = {
         "integ": lambda expr, init: builder.add_stock(element['py_name'], element['subs'],
                                                       expr, init),
@@ -494,11 +496,26 @@ def visit_builtin(self, n, vc):
             return name + '()'
 
         def visit_array(self, n, vc):
-            text = n.text.strip(';').replace(' ', '')  # remove trailing semi if exists
-            if ';' in text:
-                return '[' + text.replace(';', '],[') + ']'
+            if element['subs']:
+                coords = utils.make_coord_dict(element['subs'], subscript_dict, terse=False)
+                dims = [utils.find_subscript_name(subscript_dict, sub) for sub in element['subs']]
+                shape = [len(coords[dim]) for dim in dims]
+                if ';' in n.text or ',' in n.text:
+                    text = n.text.strip(';').replace(' ', '').replace(';', ',')
+                    data = np.array([float(s) for s in text.split(',')]).reshape(shape)
+                else:
+                    data = np.tile(float(n.text), shape)
+                datastr = np.array2string(data, separator=',').replace('\n', '').replace(' ', '')
+                return textwrap.dedent("""\
+                    xr.DataArray(data=%(datastr)s,
+                                 coords=%(coords)s,
+                                 dims=%(dims)s )""" % {
+                    'datastr': datastr,
+                    'coords': repr(coords),
+                    'dims': repr(dims)})
+
             else:
-                return text
+                return n.text.replace(' ', '')
 
         def visit_subscript_list(self, n, (lb, _1, refs, rb)):
             subs = [x.strip() for x in refs.split(',')]
@@ -542,7 +559,7 @@ def translate_vensim(mdl_file):
 
     Examples
     --------
-    #>>> translate_vensim('../../tests/test-models/tests/subscript_3d_arrays/test_subscript_3d_arrays.mdl')
+    >>> translate_vensim('../tests/test-models/tests/subscript_3d_arrays/test_subscript_3d_arrays.mdl')
 
     #>>> translate_vensim('../../tests/test-models/tests/abs/test_abs.mdl')