-
Notifications
You must be signed in to change notification settings - Fork 2
/
expression.py
1897 lines (1585 loc) · 84.3 KB
/
expression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
""" Utilities for processing mathematical expressions used by obj_tables models
:Author: Arthur Goldberg <Arthur.Goldberg@mssm.edu>
:Author: Jonathan Karr <karr@mssm.edu>
:Date: 2018-12-19
:Copyright: 2016-2019, Karr Lab
:License: MIT
"""
from enum import Enum
from io import BytesIO
import ast
import astor
import collections
import copy
import keyword
import math
import pint # noqa: F401
import re
import token
import tokenize
import types # noqa: F401
from obj_tables.core import (Model, RelatedAttribute, OneToOneAttribute, ManyToOneAttribute,
InvalidObject, InvalidAttribute)
from wc_utils.util.misc import DFSMAcceptor
__all__ = [
'OneToOneExpressionAttribute',
'ManyToOneExpressionAttribute',
'ObjTablesTokenCodes',
'IdMatch',
'ObjTablesToken',
'LexMatch',
'ExpressionTermMeta',
'ExpressionStaticTermMeta',
'ExpressionDynamicTermMeta',
'ExpressionExpressionTermMeta',
'Expression',
'ParsedExpressionError',
'ParsedExpression',
'LinearParsedExpressionValidator',
]
class ObjTablesTokenCodes(int, Enum):
""" ObjTablesToken codes used in parsed expressions """
obj_id = 1
math_func_id = 2
number = 3
op = 4
other = 5
# a matched token pattern used by tokenize
IdMatch = collections.namedtuple('IdMatch', 'model_type, token_pattern, match_string')
IdMatch.__doc__ += ': Matched token pattern used by tokenize'
IdMatch.model_type.__doc__ = 'The type of Model matched'
IdMatch.token_pattern.__doc__ = 'The token pattern used by the match'
IdMatch.match_string.__doc__ = 'The matched string'
# a token in a parsed expression, returned in a list by tokenize
ObjTablesToken = collections.namedtuple('ObjTablesToken', 'code, token_string, model_type, model_id, model')
# make model_type, model_id, and model optional: see https://stackoverflow.com/a/18348004
ObjTablesToken.__new__.__defaults__ = (None, None, None)
ObjTablesToken.__doc__ += ': ObjTablesToken in a parsed obj_tables expression'
ObjTablesToken.code.__doc__ = 'ObjTablesTokenCodes encoding'
ObjTablesToken.token_string.__doc__ = "The token's string"
ObjTablesToken.model_type.__doc__ = "When code is obj_id, the obj_tables obj's type"
ObjTablesToken.model_id.__doc__ = "When code is obj_id, the obj_tables obj's id"
ObjTablesToken.model.__doc__ = "When code is obj_id, the obj_tables obj"
# container for an unambiguous Model id
LexMatch = collections.namedtuple('LexMatch', 'obj_tables_tokens, num_py_tokens')
LexMatch.__doc__ += ': container for an unambiguous Model id'
LexMatch.obj_tables_tokens.__doc__ = "List of ObjTablesToken's created"
LexMatch.num_py_tokens.__doc__ = 'Number of Python tokens consumed'
class OneToOneExpressionAttribute(OneToOneAttribute):
""" Expression one-to-one attribute """
def serialize(self, expression, encoded=None):
""" Serialize related object
Args:
expression (:obj:`obj_tables.Model`): the referenced :obj:`Expression`
encoded (:obj:`dict`, optional): dictionary of objects that have already been encoded
Returns:
:obj:`str`: simple Python representation
"""
if expression:
return expression.serialize()
else:
return ''
def deserialize(self, value, objects, decoded=None):
""" Deserialize value
Args:
value (:obj:`str`): String representation
objects (:obj:`dict`): dictionary of objects, grouped by model
decoded (:obj:`dict`, optional): dictionary of objects that have already been decoded
Returns:
:obj:`tuple` of :obj:`object`, :obj:`InvalidAttribute` or :obj:`None`: tuple of cleaned value and cleaning error
"""
if value:
return self.related_class.deserialize(value, objects)
return (None, None)
def get_xlsx_validation(self, sheet_models=None, doc_metadata_model=None):
""" Get XLSX validation
Args:
sheet_models (:obj:`list` of :obj:`Model`, optional): models encoded as separate sheets
doc_metadata_model (:obj:`type`): model whose worksheet contains the document metadata
Returns:
:obj:`wc_utils.workbook.io.FieldValidation`: validation
"""
validation = super(OneToOneAttribute, self).get_xlsx_validation(sheet_models=sheet_models,
doc_metadata_model=doc_metadata_model)
if self.related_class.Meta.expression_is_linear:
type = 'linear '
else:
type = ''
terms = []
for attr in self.related_class.Meta.attributes.values():
if isinstance(attr, RelatedAttribute) and \
attr.related_class.__name__ in self.related_class.Meta.expression_term_models:
terms.append(attr.related_class.Meta.verbose_name_plural)
if terms:
if len(terms) == 1:
terms = terms[0]
else:
terms = '{} and {}'.format(', '.join(terms[0:-1]), terms[-1])
input_message = 'Enter a {}expression of {}.'.format(type, terms)
error_message = 'Value must be a {}expression of {}.'.format(type, terms)
else:
input_message = 'Enter a {}expression.'.format(type, terms)
error_message = 'Value must be a {}expression.'.format(type, terms)
if validation.input_message:
validation.input_message += '\n\n'
validation.input_message += input_message
if validation.error_message:
validation.error_message += '\n\n'
validation.error_message += error_message
return validation
class ManyToOneExpressionAttribute(ManyToOneAttribute):
""" Expression many-to-one attribute """
def serialize(self, expression, encoded=None):
""" Serialize related object
Args:
expression (:obj:`Expression`): the related :obj:`Expression`
encoded (:obj:`dict`, optional): dictionary of objects that have already been encoded
Returns:
:obj:`str`: simple Python representation of the rate law expression
"""
if expression:
return expression.serialize()
else:
return ''
def deserialize(self, value, objects, decoded=None):
""" Deserialize value
Args:
value (:obj:`str`): String representation
objects (:obj:`dict`): dictionary of objects, grouped by model
decoded (:obj:`dict`, optional): dictionary of objects that have already been decoded
Returns:
:obj:`tuple` of :obj:`object`, :obj:`InvalidAttribute` or :obj:`None`: tuple of cleaned value and cleaning error
"""
if value:
return self.related_class.deserialize(value, objects)
return (None, None)
def get_xlsx_validation(self, sheet_models=None, doc_metadata_model=None):
""" Get XLSX validation
Args:
sheet_models (:obj:`list` of :obj:`Model`, optional): models encoded as separate sheets
doc_metadata_model (:obj:`type`): model whose worksheet contains the document metadata
Returns:
:obj:`wc_utils.workbook.io.FieldValidation`: validation
"""
validation = super(ManyToOneAttribute, self).get_xlsx_validation(sheet_models=sheet_models,
doc_metadata_model=doc_metadata_model)
if self.related_class.Meta.expression_is_linear:
type = 'linear '
else:
type = ''
terms = []
for attr in self.related_class.Meta.attributes.values():
if isinstance(attr, RelatedAttribute) and \
attr.related_class.__name__ in self.related_class.Meta.expression_term_models:
terms.append(attr.related_class.Meta.verbose_name_plural)
if terms:
if len(terms) == 1:
terms = terms[0]
else:
terms = '{} and {}'.format(', '.join(terms[0:-1]), terms[-1])
input_message = 'Enter a {}expression of {}.'.format(type, terms)
error_message = 'Value must be a {}expression of {}.'.format(type, terms)
else:
input_message = 'Enter a {}expression.'.format(type, terms)
error_message = 'Value must be a {}expression.'.format(type, terms)
if validation.input_message:
validation.input_message += '\n\n'
validation.input_message += input_message
if validation.error_message:
validation.error_message += '\n\n'
validation.error_message += error_message
return validation
class ExpressionTermMeta(object):
""" Metadata for subclasses that can appear in expressions
Attributes:
expression_term_token_pattern (:obj:`tuple`): token pattern for the name of the
term in expression
expression_term_units (:obj:`str`): name of attribute which describes the units
of the expression term
"""
expression_term_token_pattern = (token.NAME, )
expression_term_units = 'units'
class ExpressionStaticTermMeta(ExpressionTermMeta):
""" Metadata for subclasses with static values that can appear in expressions
Attributes:
expression_term_value (:obj:`str`): name of attribute which encodes the value of
the term
"""
expression_term_value = 'value'
class ExpressionDynamicTermMeta(ExpressionTermMeta):
""" Metadata for subclasses with dynamic values that can appear in expressions """
pass
class ExpressionExpressionTermMeta(ExpressionTermMeta):
""" Metadata for subclasses with expressions that can appear in expressions
Attributes:
expression_term_model (:obj:`str`): name of attribute which encodes the expression for
the term
"""
expression_term_model = None
class Expression(object):
""" Generic methods for mathematical expressions
Attributes:
_parsed_expression (:obj:`ParsedExpression`): parsed expression
"""
class Meta(object):
""" Metadata for subclasses of :obj:`Expression`
Attributes:
expression_term_models (:obj:`tuple` of :obj:`str`): names of classes
which can appear as terms in the expression
expression_valid_functions (:obj:`tuple` of :obj:`types.FunctionType`): Python
functions which can appear in the expression
expression_is_linear (:obj:`bool`): if :obj:`True`, validate that the expression is linear
expression_type (:obj:`type`): type of the expression
expression_unit_registry (:obj:`pint.UnitRegistry`): unit registry
"""
expression_term_models = ()
expression_valid_functions = (
float,
math.fabs,
math.ceil,
math.floor,
round,
math.exp,
math.expm1,
math.pow,
math.sqrt,
math.log,
math.log1p,
math.log10,
math.log2,
math.factorial,
math.sin,
math.cos,
math.tan,
math.acos,
math.asin,
math.atan,
math.atan2,
math.hypot,
math.degrees,
math.radians,
min,
max)
expression_is_linear = False
expression_type = None
expression_unit_registry = None
def serialize(self):
""" Generate string representation
Returns:
:obj:`str`: value of primary attribute
"""
return self.expression
@classmethod
def deserialize(cls, model_cls, value, objects):
""" Deserialize :obj:`value` into an :obj:`Expression`
Args:
model_cls (:obj:`type`): :obj:`Expression` class or subclass
value (:obj:`str`): string representation of the mathematical expression, in a
Python expression
objects (:obj:`dict`): dictionary of objects which can be used in :obj:`Expression`, grouped by model
Returns:
:obj:`tuple`: on error return (:obj:`None`, :obj:`InvalidAttribute`),
otherwise return (object in this class with instantiated :obj:`_parsed_expression`, :obj:`None`)
"""
value = value or ''
expr_field = 'expression'
try:
parsed_expression = ParsedExpression(model_cls, expr_field, value, objects)
except ParsedExpressionError as e:
attr = model_cls.Meta.attributes['expression']
return (None, InvalidAttribute(attr, [str(e)]))
_, used_objects, errors = parsed_expression.tokenize()
if errors:
attr = model_cls.Meta.attributes['expression']
return (None, InvalidAttribute(attr, errors))
if model_cls not in objects:
objects[model_cls] = {}
if value in objects[model_cls]:
obj = objects[model_cls][value]
else:
obj = model_cls(expression=value)
objects[model_cls][value] = obj
for attr_name, attr in model_cls.Meta.attributes.items():
if isinstance(attr, RelatedAttribute) and \
attr.related_class.__name__ in model_cls.Meta.expression_term_models:
attr_value = list(used_objects.get(attr.related_class, {}).values())
setattr(obj, attr_name, attr_value)
obj._parsed_expression = parsed_expression
# check expression is linear and, if so, compute linear coefficients for the related objects
parsed_expression.is_linear, _ = LinearParsedExpressionValidator().validate(parsed_expression)
return (obj, None)
@classmethod
def validate(cls, model_obj, parent_obj):
""" Determine whether an expression model is valid
One check eval's its deserialized expression
Args:
model_obj (:obj:`Expression`): expression object
parent_obj (:obj:`Model`): parent of expression object
Returns:
:obj:`InvalidObject` or None: :obj:`None` if the object is valid,
otherwise return a list of errors in an :obj:`InvalidObject` instance
"""
model_cls = model_obj.__class__
# generate _parsed_expression
objs = {}
for related_attr_name, related_attr in model_cls.Meta.attributes.items():
if isinstance(related_attr, RelatedAttribute):
objs[related_attr.related_class] = {
m.get_primary_attribute(): m for m in getattr(model_obj, related_attr_name)
}
try:
model_obj._parsed_expression = ParsedExpression(model_obj.__class__, 'expression',
model_obj.expression, objs)
except ParsedExpressionError as e:
attr = model_cls.Meta.attributes['expression']
attr_err = InvalidAttribute(attr, [str(e)])
return InvalidObject(model_obj, [attr_err])
is_valid, _, errors = model_obj._parsed_expression.tokenize()
if is_valid is None:
attr = model_cls.Meta.attributes['expression']
attr_err = InvalidAttribute(attr, errors)
return InvalidObject(model_obj, [attr_err])
model_obj._parsed_expression.is_linear, _ = LinearParsedExpressionValidator().validate(
model_obj._parsed_expression)
# check that related objects match the tokens of the _parsed_expression
related_objs = {}
for related_attr_name, related_attr in model_cls.Meta.attributes.items():
if isinstance(related_attr, RelatedAttribute):
related_model_objs = getattr(model_obj, related_attr_name)
if related_model_objs:
related_objs[related_attr.related_class] = set(related_model_objs)
token_objs = {}
token_obj_ids = {}
for obj_table_token in model_obj._parsed_expression._obj_tables_tokens:
if obj_table_token.model_type is not None:
if obj_table_token.model_type not in token_objs:
token_objs[obj_table_token.model_type] = set()
token_obj_ids[obj_table_token.model_type] = set()
token_objs[obj_table_token.model_type].add(obj_table_token.model)
token_obj_ids[obj_table_token.model_type].add(obj_table_token.token_string)
if related_objs != token_objs:
attr = model_cls.Meta.attributes['expression']
attr_err = InvalidAttribute(attr, ['Related objects must match the tokens of the analyzed expression'])
return InvalidObject(model_obj, [attr_err])
# check that expression is valid
try:
rv = model_obj._parsed_expression.test_eval()
if model_obj.Meta.expression_type:
if not isinstance(rv, model_obj.Meta.expression_type):
attr = model_cls.Meta.attributes['expression']
attr_err = InvalidAttribute(attr,
["Evaluating '{}', a {} expression, should return a {} but it returns a {}".format(
model_obj.expression, model_obj.__class__.__name__,
model_obj.Meta.expression_type.__name__, type(rv).__name__)])
return InvalidObject(model_obj, [attr_err])
except ParsedExpressionError as e:
attr = model_cls.Meta.attributes['expression']
attr_err = InvalidAttribute(attr, [str(e)])
return InvalidObject(model_obj, [attr_err])
# check expression is linear
if model_obj.Meta.expression_is_linear and not model_obj._parsed_expression.is_linear:
attr = model_cls.Meta.attributes['expression']
attr_err = InvalidAttribute(attr, ['Expression must be linear in species counts'])
return InvalidObject(model_obj, [attr_err])
# return :obj:`None` to indicate valid object
return None
@staticmethod
def make_expression_obj(model_type, expression, objs):
""" Make an expression object
Args:
model_type (:obj:`type`): an :obj:`Model` that uses a mathemetical expression, like
:obj:`Function` and :obj:`Observable`
expression (:obj:`str`): the expression used by the :obj:`model_type` being created
objs (:obj:`dict` of :obj:`dict`): all objects that are referenced in :obj:`expression`
Returns:
:obj:`tuple`: if successful, (:obj:`Model`, :obj:`None`) containing a new instance of
:obj:`model_type`'s expression helper class; otherwise, (:obj:`None`, :obj:`InvalidAttribute`)
reporting the error
"""
expr_model_type = model_type.Meta.expression_term_model
return expr_model_type.deserialize(expression, objs)
@classmethod
def make_obj(cls, model, model_type, primary_attr, expression, objs, allow_invalid_objects=False):
""" Make a model that contains an expression by using its expression helper class
For example, this uses :obj:`FunctionExpression` to make a :obj:`Function`.
Args:
model (:obj:`Model`): an instance of :obj:`Model` which is the root model
model_type (:obj:`type`): a subclass of :obj:`Model` that uses a mathemetical expression, like
:obj:`Function` and :obj:`Observable`
primary_attr (:obj:`object`): the primary attribute of the :obj:`model_type` being created
expression (:obj:`str`): the expression used by the :obj:`model_type` being created
objs (:obj:`dict` of :obj:`dict`): all objects that are referenced in :obj:`expression`
allow_invalid_objects (:obj:`bool`, optional): if set, return object - not error - if
the expression object does not validate
Returns:
:obj:`Model` or :obj:`InvalidAttribute`: a new instance of :obj:`model_type`, or,
if an error occurs, an :obj:`InvalidAttribute` reporting the error
"""
expr_model_obj, error = cls.make_expression_obj(model_type, expression, objs)
if error:
return error
error_or_none = expr_model_obj.validate()
if error_or_none is not None and not allow_invalid_objects:
return error_or_none
related_name = model_type.Meta.attributes['model'].related_name
related_in_model = getattr(model, related_name)
new_obj = related_in_model.create(expression=expr_model_obj)
setattr(new_obj, model_type.Meta.primary_attribute.name, primary_attr)
return new_obj
def merge_attrs(self, other, other_objs_in_self, self_objs_in_other):
""" Merge attributes of two objects
Args:
other (:obj:`Model`): other model
other_objs_in_self (:obj:`dict`): dictionary that maps instances of objects in another model to objects
in a model
self_objs_in_other (:obj:`dict`): dictionary that maps instances of objects in a model to objects
in another model
"""
for cls, other_related_objs in other._parsed_expression.related_objects.items():
for obj_id, other_obj in other_related_objs.items():
self._parsed_expression.related_objects[cls][obj_id] = other_objs_in_self.get(other_obj, other_obj)
class ParsedExpressionError(ValueError):
""" Exception raised for errors in :obj:`ParsedExpression`
Attributes:
message (:obj:`str`): the exception's message
"""
def __init__(self, message=None):
"""
Args:
message (:obj:`str`, optional): the exception's message
"""
super().__init__(message)
class ParsedExpression(object):
""" An expression in an :obj:`ObjTables` :obj:`Model`
These expressions are limited Python expressions with specific semantics:
* They must be syntactically correct Python, except that an identifier can begin with numerical digits.
* No Python keywords, strings, or tokens that do not belong in expressions are allowed.
* All Python identifiers must be the primary attribute of an :obj:`ObjTables` object or the name of a
function in the :obj:`math` package. Objects in the model
are provided in :obj:`_objs`, and the allowed subset of functions in :obj:`math` must be provided in an
iterator in the :obj:`expression_valid_functions` attribute of the :obj:`Meta` class of a model whose whose expression
is being processed.
* Currently (July, 2018), an identifier may refer to a :obj:`Species`, :obj:`Parameter`,
:obj:`Reaction`, :obj:`Observable` or :obj:`DfbaObjReaction`.
* Cycles of references are illegal.
* An identifier must unambiguously refer to exactly one related :obj:`Model` in a model.
* Each :obj:`Model` that can be used in an expression must have an ID that is an identifier,
or define :obj:`expression_term_token_pattern` as an attribute that describes the :obj:`Model`\ 's
syntactic Python structure. See :obj:`Species` for an example.
* Every expression must be computable at any time during a simulation. The evaluation of an expression
always occurs at a precise simulation time, which is implied by the expression but not explicitly
represented. E.g., a reference to a :obj:`Species` means its concentration at the time the expression is
:obj:`eval`\ ed. These are the meanings of references:
* :obj:`Species`: its current concentration
* :obj:`Parameter`: its value, which is static
* :obj:`Observable`: its current value, whose units depend on its definition
* :obj:`Reaction`: its current flux
* :obj:`DfbaObjReaction`: its current flux
The modeller is responsible for ensuring that units in expressions are internally consistent and appropriate
for the expression's use.
Attributes:
model_cls (:obj:`type`): the :obj:`Model` which has an expression
attr (:obj:`str`): the attribute name of the expression in :obj:`model_cls`
expression (:obj:`str`): the expression defined in the obj_tables :obj:`Model`
_py_tokens (:obj:`list` of :obj:`collections.namedtuple`): a list of Python tokens generated by :obj:`tokenize.tokenize()`
_objs (:obj:`dict`): dict of obj_tables Models that might be referenced in :obj:`expression`;
maps model type to a dict mapping ids to Model instances
valid_functions (:obj:`set`): the union of all :obj:`valid_functions` attributes for :obj:`_objs`
unit_registry (:obj:`pint.UnitRegistry`): unit registry
related_objects (:obj:`dict`): models that are referenced in :obj:`expression`; maps model type to
dict that maps model id to model instance
lin_coeffs (:obj:`dict`): linear coefficients of models that are referenced in :obj:`expression`;
maps model type to dict that maps models to coefficients
errors (:obj:`list` of :obj:`str`): errors found when parsing an :obj:`expression` fails
_obj_tables_tokens (:obj:`list` of :obj:`ObjTablesToken`): tokens obtained when an :obj:`expression`
is successfully :obj:`tokenize`\ d; if empty, then this :obj:`ParsedExpression` cannot use :obj:`eval`
_compiled_expression (:obj:`str`): compiled expression that can be evaluated by :obj:`eval`
_compiled_expression_with_units (:obj:`str`): compiled expression with units that can be evaluated by :obj:`eval`
_compiled_namespace (:obj:`dict`): compiled namespace for evaluation by :obj:`eval`
_compiled_namespace_with_units (:obj:`dict`): compiled namespace with units for evaluation by :obj:`eval`
"""
# ModelType.model_id
MODEL_TYPE_DISAMBIG_PATTERN = (token.NAME, token.DOT, token.NAME)
FUNC_PATTERN = (token.NAME, token.LPAR)
# enumerate and detect Python tokens that are legal in obj_tables expressions
LEGAL_TOKENS_NAMES = (
'NUMBER', # number
'NAME', # variable names
'LSQB', 'RSQB', # for compartment names
'DOT', # for disambiguating variable types
'COMMA', # for function arguments
'DOUBLESTAR', 'MINUS', 'PLUS', 'SLASH', 'STAR', # mathematical operators
'LPAR', 'RPAR', # for mathematical grouping and functions
'EQEQUAL', 'GREATER', 'GREATEREQUAL', 'LESS', 'LESSEQUAL', 'NOTEQUAL', # comparison operators
)
LEGAL_TOKENS = set()
for legal_token_name in LEGAL_TOKENS_NAMES:
legal_token = getattr(token, legal_token_name)
LEGAL_TOKENS.add(legal_token)
def __init__(self, model_cls, attr, expression, objs):
""" Create an instance of ParsedExpression
Args:
model_cls (:obj:`type`): the :obj:`Model` which has an expression
attr (:obj:`str`): the attribute name of the expression in :obj:`model_cls`
expression (:obj:`obj`): the expression defined in the obj_tables :obj:`Model`
objs (:obj:`dict`): dictionary of model objects (instances of :obj:`Model`) organized
by their type
Raises:
:obj:`ParsedExpressionError`: if :obj:`model_cls` is not a subclass of :obj:`Model`,
or lexical analysis of :obj:`expression` raises an exception,
or :obj:`objs` includes model types that :obj:`model_cls` should not reference
"""
if not issubclass(model_cls, Model):
raise ParsedExpressionError("model_cls '{}' is not a subclass of Model".format(
model_cls.__name__))
if not hasattr(model_cls.Meta, 'expression_term_models'):
raise ParsedExpressionError("model_cls '{}' doesn't have a 'Meta.expression_term_models' attribute".format(
model_cls.__name__))
self.term_models = set()
for expression_term_model_type_name in model_cls.Meta.expression_term_models:
related_class = None
for attr in model_cls.Meta.attributes.values():
if isinstance(attr, RelatedAttribute) \
and attr.related_class.__name__ == expression_term_model_type_name:
related_class = attr.related_class
break
if related_class:
self.term_models.add(related_class)
else:
raise ParsedExpressionError('Expression term {} must have a relationship to {}'.format(
expression_term_model_type_name, model_cls.__name__))
self.valid_functions = set()
if hasattr(model_cls.Meta, 'expression_valid_functions'):
self.valid_functions.update(model_cls.Meta.expression_valid_functions)
self.unit_registry = model_cls.Meta.expression_unit_registry
self._objs = objs
self.model_cls = model_cls
self.attr = attr
if isinstance(expression, int) or isinstance(expression, float):
expression = str(expression)
if not isinstance(expression, str):
raise ParsedExpressionError(f"Expression '{expression}' in {model_cls.__name__} must be "
"string, float or integer")
# strip leading and trailing whitespace from expression, which would create a bad token error
self.expression = expression.strip()
# allow identifiers that start with a number
expr = self.__prep_expr_for_tokenization(self.expression)
try:
g = tokenize.tokenize(BytesIO(expr.encode('utf-8')).readline)
# strip the leading ENCODING token and trailing NEWLINE and ENDMARKER tokens
self._py_tokens = list(g)[1:-1]
if self._py_tokens and self._py_tokens[-1].type == token.NEWLINE:
self._py_tokens = self._py_tokens[:-1]
except tokenize.TokenError as e:
raise ParsedExpressionError("parsing '{}', a {}.{}, creates a Python syntax error: '{}'".format(
self.expression, self.model_cls.__name__, self.attr, str(e)))
self.__reset_tokenization()
@staticmethod
def __prep_expr_for_tokenization(expr):
""" Prepare an expression for tokenization with the Python tokenizer
* Add prefix ("__digit__") to names (identifiers of obj_tables objects) that begin with a number
Args:
expr (:obj:`str`): expression
Returns:
:obj:`str`: prepared expression
"""
return re.sub(r'(^|\b)'
# ignore tokens which are regular, exponential, and hexidecimal numbers
r'(?!((0[x][0-9a-f]+(\b|$))|([0-9]+e[\-\+]?[0-9]+(\b|$))))'
r'([0-9]+[a-z_][0-9a-z_]*)'
r'(\b|$)',
r'__digit__\7', expr, flags=re.I)
def __reset_tokenization(self):
""" Reset tokenization
"""
self.related_objects = {}
self.lin_coeffs = {}
for model_type in self.term_models:
self.related_objects[model_type] = {}
self.lin_coeffs[model_type] = {}
self.errors = []
self._obj_tables_tokens = []
self._compiled_expression = ''
self._compiled_expression_with_units = ''
self._compiled_namespace = {}
self._compiled_namespace_with_units = {}
def _get_trailing_whitespace(self, idx):
""" Get the number of trailing spaces following a Python token
Args:
idx (:obj:`int`): index of the token in :obj:`self._py_tokens`
"""
if len(self._py_tokens) - 1 <= idx:
return 0
# get distance between the next token's start column and end column of the token at idx
# assumes that an expression uses only one line
return self._py_tokens[idx + 1].start[1] - self._py_tokens[idx].end[1]
def recreate_whitespace(self, expr):
""" Insert the whitespace in this object's :obj:`expression` into an expression with the same token count
Used to migrate an expression to a different set of model type names.
Args:
expr (:obj:`str`): a syntactically correct Python expression
Returns:
:obj:`str`: :obj:`expr` with the whitespace in this instance's :obj:`expression` inserted between
its Python tokens
Raises:
:obj:`ParsedExpressionError`: if tokenizing :obj:`expr` raises an exception,
or if :obj:`expr` doesn't have the same number of Python tokens as :obj:`self.expression`
"""
prepped_expr = self.__prep_expr_for_tokenization(expr)
try:
g = tokenize.tokenize(BytesIO(prepped_expr.encode('utf-8')).readline)
# strip the leading ENCODING marker and trailing NEWLINE and ENDMARKER tokens
tokens = list(g)[1:-1]
if tokens and tokens[-1].type == token.NEWLINE:
tokens = tokens[:-1]
except tokenize.TokenError as e:
raise ParsedExpressionError("parsing '{}' creates a Python syntax error: '{}'".format(
expr, str(e)))
if len(tokens) != len(self._py_tokens):
raise ParsedExpressionError("can't recreate whitespace in '{}', as it has {} instead "
"of {} tokens expected".format(expr, len(tokens), len(self._py_tokens)))
expanded_expr = []
for i_tok, tok in enumerate(tokens):
if tok.type == token.NAME and tok.string.startswith('__digit__'):
expanded_expr.append(tok.string[9:])
else:
expanded_expr.append(tok.string)
ws = ' ' * self._get_trailing_whitespace(i_tok)
expanded_expr.append(ws)
return ''.join(expanded_expr)
def _get_model_type(self, name):
""" Find the `ObjTables` model type corresponding to :obj:`name`
Args:
name (:obj:`str`): the name of a purported `ObjTables` model type in an expression
Returns:
:obj:`object`: :obj:`None` if no model named :obj:`name` exists in :obj:`self.term_models`,
else the type of the model with that name
"""
for model_type in self.term_models:
if name == model_type.__name__:
return model_type
return None
def _match_tokens(self, token_pattern, idx):
""" Indicate whether :obj:`tokens` begins with a pattern of tokens that match :obj:`token_pattern`
Args:
token_pattern (:obj:`tuple` of :obj:`int`): a tuple of Python token numbers, taken from the
:obj:`token` module
idx (:obj:`int`): current index into :obj:`tokens`
Returns:
:obj:`object`: :obj:`bool`, False if the initial elements of :obj:`tokens` do not match the
syntax in :obj:`token_pattern`, or :obj:`str`, the matching string
"""
if not token_pattern:
return False
if len(self._py_tokens) - idx < len(token_pattern):
return False
for tok_idx, token_pat_num in enumerate(token_pattern):
if self._py_tokens[idx + tok_idx].exact_type != token_pat_num:
return False
# because a obj_tables primary attribute shouldn't contain white space, do not allow it between the self._py_tokens
# that match token_pattern
if 0 < tok_idx and self._py_tokens[idx + tok_idx - 1].end != self._py_tokens[idx + tok_idx].start:
return False
match_val = ''
for tok in self._py_tokens[idx:idx + len(token_pattern)]:
if tok.type == token.NAME and tok.string.startswith('__digit__'):
match_val += tok.string[9:]
else:
match_val += tok.string
return match_val
def _get_disambiguated_id(self, idx, case_fold_match=False):
""" Try to parse a disambiguated `ObjTables` id from :obj:`self._py_tokens` at :obj:`idx`
Look for a disambugated id (a Model written as :obj:`ModelType.model_id`). If tokens do not match,
return :obj:`None`. If tokens match, but their values are wrong, return an error :obj:`str`.
If a disambugated id is found, return a :obj:`LexMatch` describing it.
Args:
idx (:obj:`int`): current index into :obj:`tokens`
case_fold_match (:obj:`bool`, optional): if set, :obj:`casefold()` identifiers before matching;
in a :obj:`ObjTablesToken`, :obj:`token_string` retains the original expression text, while :obj:`model_id`
contains the casefold'ed value; identifier keys in :obj:`self._objs` must already be casefold'ed;
default=False
Returns:
:obj:`object`: If tokens do not match, return :obj:`None`. If tokens match,
but their values are wrong, return an error :obj:`str`.
If a disambugated id is found, return a :obj:`LexMatch` describing it.
"""
disambig_model_match = self._match_tokens(self.MODEL_TYPE_DISAMBIG_PATTERN, idx)
if disambig_model_match:
disambig_model_type = self._py_tokens[idx].string
possible_model_id = self._py_tokens[idx + 2].string
if case_fold_match:
possible_model_id = possible_model_id.casefold()
# the disambiguation model type must be in self.term_models
model_type = self._get_model_type(disambig_model_type)
if model_type is None:
return ("'{}', a {}.{}, contains '{}', but the disambiguation model type '{}' "
"cannot be referenced by '{}' expressions".format(
self.expression, self.model_cls.__name__,
self.attr, disambig_model_match, disambig_model_type,
self.model_cls.__name__))
if possible_model_id not in self._objs.get(model_type, {}):
return "'{}', a {}.{}, contains '{}', but '{}' is not the id of a '{}'".format(
self.expression, self.model_cls.__name__, self.attr, disambig_model_match,
possible_model_id, disambig_model_type)
return LexMatch([ObjTablesToken(ObjTablesTokenCodes.obj_id, disambig_model_match, model_type,
possible_model_id, self._objs[model_type][possible_model_id])],
len(self.MODEL_TYPE_DISAMBIG_PATTERN))
# no match
return None
def _get_related_obj_id(self, idx, case_fold_match=False):
""" Try to parse a related object `ObjTables` id from :obj:`self._py_tokens` at :obj:`idx`
Different `ObjTables` objects match different Python token patterns. The default pattern
is (token.NAME, ), but an object of type :obj:`model_type` can define a custom pattern in
:obj:`model_type.Meta.expression_term_token_pattern`, as :obj:`Species` does. Some patterns may consume
multiple Python tokens.
Args:
idx (:obj:`int`): current index into :obj:`_py_tokens`
case_fold_match (:obj:`bool`, optional): if set, casefold identifiers before matching;
identifier keys in :obj:`self._objs` must already be casefold'ed; default=False
Returns:
:obj:`object`: If tokens do not match, return :obj:`None`. If tokens match,
but their values are wrong, return an error :obj:`str`.
If a related object id is found, return a :obj:`LexMatch` describing it.
"""
token_matches = set()
id_matches = set()
for model_type in self.term_models:
token_pattern = model_type.Meta.expression_term_token_pattern
match_string = self._match_tokens(token_pattern, idx)
if match_string:
token_matches.add(match_string)
# is match_string the ID of an instance in model_type?
if case_fold_match:
if match_string.casefold() in self._objs.get(model_type, {}):
id_matches.add(IdMatch(model_type, token_pattern, match_string))
else:
if match_string in self._objs.get(model_type, {}):
id_matches.add(IdMatch(model_type, token_pattern, match_string))
if not id_matches:
if token_matches:
return ("'{}', a {}.{}, contains the identifier(s) '{}', which aren't "
"the id(s) of an object".format(
self.expression, self.model_cls.__name__,
self.attr, "', '".join(token_matches)))
return None
if 1 < len(id_matches):
# as lexers always do, pick the longest match
id_matches_by_length = sorted(id_matches, key=lambda id_match: len(id_match.match_string))
longest_length = len(id_matches_by_length[-1].match_string)
longest_matches = set()
while id_matches_by_length and len(id_matches_by_length[-1].match_string) == longest_length:
longest_matches.add(id_matches_by_length.pop())
id_matches = longest_matches
if 1 < len(id_matches):
# error: multiple, maximal length matches
matches_error = ["'{}' as a {} id".format(id_val, model_type.__name__)
for model_type, _, id_val in sorted(id_matches, key=lambda id_match: id_match.model_type.__name__)]
matches_error = ', '.join(matches_error)
return "'{}', a {}.{}, contains multiple model object id matches: {}".format(
self.expression, self.model_cls.__name__, self.attr, matches_error)
else:
# return a lexical match about a related id
match = id_matches.pop()
right_case_match_string = match.match_string
if case_fold_match:
right_case_match_string = match.match_string.casefold()
return LexMatch(
[ObjTablesToken(ObjTablesTokenCodes.obj_id, match.match_string, match.model_type, right_case_match_string,
self._objs[match.model_type][right_case_match_string])],
len(match.token_pattern))
def _get_func_call_id(self, idx, case_fold_match='unused'):
""" Try to parse a Python math function call from :obj:`self._py_tokens` at :obj:`idx`
Each `ObjTables` object :obj:`model_cls` that contains an expression which can use Python math
functions must define the set of allowed functions in :obj:`Meta.expression_valid_functions` of the
model_cls Expression Model.
Args:
idx (:obj:`int`): current index into :obj:`self._py_tokens`
case_fold_match (:obj:`str`, optional): ignored keyword; makes :obj:`ParsedExpression.tokenize()` simpler
Returns:
:obj:`object`: If tokens do not match, return :obj:`None`. If tokens match,
but their values are wrong, return an error :obj:`str`.
If a function call is found, return a :obj:`LexMatch` describing it.
"""
func_match = self._match_tokens(self.FUNC_PATTERN, idx)
if func_match:
func_name = self._py_tokens[idx].string
# FUNC_PATTERN is "identifier ("
# the closing paren ")" will simply be encoded as a ObjTablesToken with code == op
# are Python math functions defined?
if not hasattr(self.model_cls.Meta, 'expression_valid_functions'):
return ("'{}', a {}.{}, contains the func name '{}', but {}.Meta doesn't "
"define 'expression_valid_functions'".format(self.expression,
self.model_cls.__name__,
self.attr, func_name,
self.model_cls.__name__))
function_ids = set([f.__name__ for f in self.model_cls.Meta.expression_valid_functions])
# is the function allowed?
if func_name not in function_ids:
return ("'{}', a {}.{}, contains the func name '{}', but it isn't in "
"{}.Meta.expression_valid_functions: {}".format(self.expression,
self.model_cls.__name__,
self.attr, func_name,
self.model_cls.__name__,
', '.join(function_ids)))
# return a lexical match about a math function
return LexMatch(
[ObjTablesToken(ObjTablesTokenCodes.math_func_id, func_name), ObjTablesToken(ObjTablesTokenCodes.op, '(')],
len(self.FUNC_PATTERN))
# no match
return None
def tokenize(self, case_fold_match=False):
""" Tokenize a Python expression in :obj:`self.expression`
Args:
case_fold_match (:obj:`bool`, optional): if set, casefold identifiers before matching;
identifier keys in :obj:`self._objs` must already be casefold'ed; default = False