Skip to content

Commit

Permalink
rework xarray initialization
Browse files Browse the repository at this point in the history
still need to work out stock inits.
  • Loading branch information
JamesPHoughton committed Jun 27, 2016
1 parent c757692 commit 6498655
Show file tree
Hide file tree
Showing 14 changed files with 3,427 additions and 899 deletions.
909 changes: 515 additions & 394 deletions .idea/workspace.xml

Large diffs are not rendered by default.

1,927 changes: 1,927 additions & 0 deletions Xarray value assignment.ipynb

Large diffs are not rendered by default.

152 changes: 78 additions & 74 deletions pysd/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def build(elements, subscript_dict, namespace, outfile_name):
"""
from __future__ import division
import numpy as np
from pysd import utils
%(imports)s
from pysd.functions import cache
from pysd import functions
Expand All @@ -71,7 +72,7 @@ def time():
'version': __version__}

text = autopep8.fix_code(textwrap.dedent(text),
options={'aggressive': 10,
options={'aggressive': 100,
'max_line_length': 99,
'experimental': True})

Expand All @@ -96,7 +97,7 @@ def build_element(element, subscript_dict):
An expression that has been converted already into python syntax
- subs: list of lists
Each sublist contains coordinates for initialization of a particular
part of a subscripted function
part of a subscripted function, the list of subscripts vensim attaches to an equation
subscript_dict: dictionary
Expand All @@ -105,7 +106,7 @@ def build_element(element, subscript_dict):
"""
# Todo: I don't like how we identify the types of initializations here, using tokens from
# stings. It isn't explicit, or robust. These should be identified explicitly somewhere else.
# strings. It isn't explicit, or robust. These should be identified explicitly somewhere else.

if element['kind'] == 'constant':
cache_type = "@cache('run')"
Expand All @@ -119,30 +120,10 @@ def build_element(element, subscript_dict):
raise AttributeError("Bad value for 'kind'")

if len(element['py_expr']) > 1:
contents = "ret = %s\n" % create_base_array(element['subs'], subscript_dict)

for sub, expr in zip(element['subs'], element['py_expr']):
contents += 'ret.loc[%(coord_dict)s] = %(expr)s\n' % {
'coord_dict': repr(make_coord_dict(sub, subscript_dict)),
'expr': expr}

contents += "return ret"

elif element['kind'] in ['constant', 'setup'] and len(element['subs'][0]) > 0:
if ',' in element['py_expr'][0]: # array type initialization
contents = 'return xr.DataArray(data=%(expr)s, coords=%(coords)s, dims=%(dims)s )' % {
'expr': '[' + element['py_expr'][0] + ']',
'coords': {dim: subscript_dict[dim] for dim in element['subs'][0]},
'dims': element['subs'][0]}
elif '(' in element['py_expr'][0]: # reference type initialization
contents = "return " + element['py_expr'][0]
else: # float type initialization
contents = "return " + create_base_array(element['subs'],
subscript_dict,
initial_val=element['py_expr'][0])

contents = 'return utils.xrmerge([%(das)s])' % {'das': ',\n'.join(element['py_expr'])}
else:
contents = "return %s" % element['py_expr'][0]
contents = 'return %(py_expr)s' % {'py_expr': element['py_expr'][0]}


indent = 8
element.update({'cache': cache_type,
Expand All @@ -166,54 +147,77 @@ def %(py_name)s():
''' % element
return func


def create_base_array(subs_list, subscript_dict, initial_val='np.NaN'):
"""
Given a list of subscript references,
returns a base array that can be populated by these references
Parameters
----------
subs_list
subscript_dict: dictionary
Returns
-------
base_array: string
A string that
>>> create_base_array([['Dim1', 'D'], ['Dim1', 'E'], ['Dim1', 'F']],
... {'Dim1': ['A', 'B', 'C'],
... 'Dim2': ['D', 'E', 'F', 'G']})
"xr.DataArray(data=np.empty([3, 3])*NaN, coords={'Dim2': ['D', 'E', 'F'], 'Dim1': ['A', 'B', 'C']})"
# >>> create_base_array([['Dim1', 'A'], ['Dim1', 'B'], ['Dim1', 'C']],
# ... {'Dim1': ['A', 'B', 'C']})
"""
sub_names_list = subscript_dict.keys()
sub_elems_list = [y for x in subscript_dict.values() for y in x]

coords = dict()
for subset in subs_list:
for sub in subset:
if sub in sub_names_list:
if sub not in coords:
coords[sub] = subscript_dict[sub]
elif sub in sub_elems_list:
name = find_subscript_name(subscript_dict, sub)
if name not in coords:
coords[name] = [sub]
else:
if sub not in coords[name]:
coords[name] += [sub]

return "xr.DataArray(data=np.ones(%(shape)s)*%(init)s, coords=%(coords)s)" % {
'shape': repr(map(len, coords.values())),
'coords': repr(coords),
'init': initial_val
}
# not needed?
#
# def make_xarray(subs, expr, subscript_dict):
# # Todo: this function should take a single line from a vensim model file
# # (abstracted as elements in an array passed to the build elemtn funrction)
# # and make an xarray from them. Either just build up the syntax to construct one on the fly
# # during model load, or actually build one, deconstruct it, and use the array presentation
# # to make an xarray using simple syntax.
#
# xarray_string = """\
# xr.DataArray(data=%(expr)s,
# coords=%(coords_dict)s)
# """ % {'expr': expr,
# 'coords_dict': make_coord_dict(subs, subscript_dict, terse=False)}
#
# return xarray_string
#
#
# def create_base_array(subs_list, subscript_dict, initial_val='np.NaN'):
# """
# Given a list of subscript references,
# returns a base array that can be populated by these references
#
# Parameters
# ----------
# subs_list
#
# subscript_dict: dictionary
#
# Returns
# -------
# base_array: string
# A string that
#
# >>> create_base_array([['Dim1', 'D'], ['Dim1', 'E'], ['Dim1', 'F']],
# ... {'Dim1': ['A', 'B', 'C'],
# ... 'Dim2': ['D', 'E', 'F', 'G']})
# "xr.DataArray(data=np.empty([3, 3])*NaN, coords={'Dim2': ['D', 'E', 'F'], 'Dim1': ['A', 'B', 'C']})"
#
# # >>> create_base_array([['Dim1', 'A'], ['Dim1', 'B'], ['Dim1', 'C']],
# # ... {'Dim1': ['A', 'B', 'C']})
#
# """
# sub_names_list = subscript_dict.keys()
# sub_elems_list = [y for x in subscript_dict.values() for y in x]
# coords = dict()
# for subset in subs_list:
# for sub in subset:
# if sub in sub_names_list:
# if sub not in coords:
# coords[sub] = subscript_dict[sub]
# elif sub in sub_elems_list:
# name = find_subscript_name(subscript_dict, sub)
# if name not in coords:
# coords[name] = [sub]
# else:
# if sub not in coords[name]:
# coords[name] += [sub]
#
# dims = [find_subscript_name(subscript_dict, element) if element in sub_elems_list else element
# for element in subs_list[0]]
#
# return textwrap.dedent("""\
# xr.DataArray(data=np.ones(%(shape)s)*%(init)s,
# coords=%(coords)s,
# dims=%(dims)s )""" % {
# 'shape': [len(coords[dim]) for dim in dims],
# 'coords': repr(coords),
# 'init': initial_val,
# 'dims': dims
# })

# def identify_subranges(subscript_dict):
# """
Expand Down
46 changes: 41 additions & 5 deletions pysd/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import keyword
import numpy as np
import pandas as pd
import xarray as xr


def dict_find(in_dict, value):
Expand All @@ -23,24 +24,53 @@ def dict_find(in_dict, value):
Examples
--------
>>> dict_find({'Key1': 'A', 'Key2': 'B'})
>>> dict_find({'Key1': 'A', 'Key2': 'B'}, 'B')
'Key2'
"""
# Todo: make this robust to repeated values
# Todo: make this robust to missing values
return in_dict.keys()[in_dict.values().index(value)]

def xrmerge(das, accept_new=True):
"""
Merges xarrays with different dimension sets
Parameters
----------
das : list of data_arrays
accept_new
Returns
-------
da : an xarray that is the merge of das
References
----------
Thanks to @jcmgray https://github.com/pydata/xarray/issues/742#issue-130753818
In the future, we may not need this as xarray may provide the merge for us.
"""
da = das[0]
for new_da in das[1:]:
# Expand both to have same dimensions, padding with NaN
da, new_da = xr.align(da, new_da, join='outer')
# Fill NaNs one way or the other re. accept_new
da = new_da.fillna(da) if accept_new else da.fillna(new_da)
return da

def find_subscript_name(subscript_dict, element):
"""
Given a subscript dictionary, and a member of a subscript family,
return the first key of which the member is within the value list
return the first key of which the member is within the value list.
If element is already a subscript name, return that
Parameters
----------
subscript_dict: dictionary
Follows the {'subscript name':['list','of','subscript','elements']} format
element: sting
element: string
Returns
-------
Expand All @@ -52,12 +82,15 @@ def find_subscript_name(subscript_dict, element):
... 'D')
'Dim2'
"""
if element in subscript_dict.keys():
return element

for name, elements in subscript_dict.iteritems():
if element in elements:
return name


def make_coord_dict(subs, subscript_dict):
def make_coord_dict(subs, subscript_dict, terse=True):
"""
This is for assisting with the lookup of a particular element, such that the output
of this function would take the place of %s in this expression
Expand All @@ -76,14 +109,17 @@ def make_coord_dict(subs, subscript_dict):
--------
>>> make_coord_dict(['Dim1', 'D'], {'Dim1':['A','B','C'], 'Dim2':['D', 'E', 'F']})
{'Dim2': ['D']}
>>> make_coord_dict(['Dim1', 'D'], {'Dim1':['A','B','C'], 'Dim2':['D', 'E', 'F']}, terse=False)
{'Dim2': ['D'], 'Dim1': ['A', 'B', 'C']}
"""
sub_elems_list = [y for x in subscript_dict.values() for y in x]
coordinates = {}
for sub in subs:
if sub in sub_elems_list:
name = find_subscript_name(subscript_dict, sub)
coordinates[name] = [sub]
elif not terse:
coordinates[sub] = subscript_dict[sub]
return coordinates


Expand Down
33 changes: 25 additions & 8 deletions pysd/vensim2py.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import parsimonious
import builder
import utils
import textwrap
import numpy as np


def get_file_sections(file_str):
Expand Down Expand Up @@ -146,7 +148,7 @@ def get_model_elements(model_str):

model_structure_grammar = r"""
model = (entry / section)+ sketch?
entry = element "~" element "~" element "|"
entry = element "~" element "~" element ("~" element)? "|"
section = element "~" element "|"
sketch = ~r".*" #anything
Expand All @@ -164,7 +166,7 @@ def __init__(self, ast):
self.entries = []
self.visit(ast)

def visit_entry(self, n, (eqn, _1, unit, _2, doc, _3)):
def visit_entry(self, n, (eqn, _1, unit, _2, doc, _3, annotation)):
self.entries.append({'eqn': eqn.strip(),
'unit': unit.strip(),
'doc': doc.strip(),
Expand Down Expand Up @@ -382,7 +384,7 @@ def parse_general_expression(element, namespace=None, subscript_dict=None):
'unit': None,
'py_expr': '_t'}])
}

# Todo: integ needs to process the init as its own element
builders = {
"integ": lambda expr, init: builder.add_stock(element['py_name'], element['subs'],
expr, init),
Expand Down Expand Up @@ -494,11 +496,26 @@ def visit_builtin(self, n, vc):
return name + '()'

def visit_array(self, n, vc):
text = n.text.strip(';').replace(' ', '') # remove trailing semi if exists
if ';' in text:
return '[' + text.replace(';', '],[') + ']'
if element['subs']:
coords = utils.make_coord_dict(element['subs'], subscript_dict, terse=False)
dims = [utils.find_subscript_name(subscript_dict, sub) for sub in element['subs']]
shape = [len(coords[dim]) for dim in dims]
if ';' in n.text or ',' in n.text:
text = n.text.strip(';').replace(' ', '').replace(';', ',')
data = np.array([float(s) for s in text.split(',')]).reshape(shape)
else:
data = np.tile(float(n.text), shape)
datastr = np.array2string(data, separator=',').replace('\n', '').replace(' ', '')
return textwrap.dedent("""\
xr.DataArray(data=%(datastr)s,
coords=%(coords)s,
dims=%(dims)s )""" % {
'datastr': datastr,
'coords': repr(coords),
'dims': repr(dims)})

else:
return text
return n.text.replace(' ', '')

def visit_subscript_list(self, n, (lb, _1, refs, rb)):
subs = [x.strip() for x in refs.split(',')]
Expand Down Expand Up @@ -542,7 +559,7 @@ def translate_vensim(mdl_file):
Examples
--------
#>>> translate_vensim('../../tests/test-models/tests/subscript_3d_arrays/test_subscript_3d_arrays.mdl')
>>> translate_vensim('../tests/test-models/tests/subscript_3d_arrays/test_subscript_3d_arrays.mdl')
#>>> translate_vensim('../../tests/test-models/tests/abs/test_abs.mdl')
Expand Down

0 comments on commit 6498655

Please sign in to comment.