-
Notifications
You must be signed in to change notification settings - Fork 237
/
data_generator_runtime_dom.py
330 lines (277 loc) · 10.8 KB
/
data_generator_runtime_dom.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
from abc import abstractmethod, ABC
from datetime import date, datetime
from .data_generator_runtime import evaluate_function, ObjectRow, RuntimeContext
from contextlib import contextmanager
from typing import Union, Dict, Sequence, Optional, cast
from numbers import Number
from fastnumbers import fast_real
import jinja2
from .data_gen_exceptions import (
DataGenError,
DataGenNameError,
DataGenSyntaxError,
DataGenValueError,
fix_exception,
)
# objects that represent the hierarchy of a data generator.
# roughly similar to the YAML structure but with domain-specific objects
Scalar = Union[str, Number, date, datetime]
FieldValue = Union[None, Scalar, ObjectRow, tuple]
Definition = Union["ObjectTemplate", "SimpleValue", "StructuredValue"]
class FieldDefinition(ABC):
"""Base class for things that render fields
Abstract base class for everything that can fulfill the role of X in
- object: abc
fields:
fieldname: X
"""
def __init__(self):
self.definition = None
self.filename = None
self.line_num = None
@abstractmethod
def render(self, context: RuntimeContext) -> FieldValue:
pass
class ObjectTemplate:
"""A factory that generates rows.
The runtime equivalent of
- object: tablename
count: count_expr # counts can be dynamic so they are expressions
fields: list of FieldFactories
friends: list of other ObjectTemplates
nickname: string
"""
def __init__(
self,
tablename: str,
filename: str,
line_num: int,
nickname: str = None,
count_expr: FieldDefinition = None, # counts can be dynamic so they are expressions
fields: Sequence = (),
friends: Sequence = (),
):
self.tablename = tablename
self.nickname = nickname
self.count_expr = count_expr
self.filename = filename
self.line_num = line_num
self.fields = fields
self.friends = friends
def render(self, context: RuntimeContext) -> Optional[ObjectRow]:
return self.generate_rows(context.output_stream, context)
def generate_rows(
self, storage, parent_context: RuntimeContext
) -> Optional[ObjectRow]:
"""Generate several rows"""
rc = None
context = RuntimeContext(parent_context, self.tablename)
count = self._evaluate_count(context)
with self.exception_handling(f"Cannot generate {self.name}"):
for i in range(count):
rc = self._generate_row(storage, context)
return rc # return last row
@contextmanager
def exception_handling(self, message: str):
try:
yield
except DataGenError:
raise
except Exception as e:
raise DataGenError(f"{message} : {str(e)}", self.filename, self.line_num)
def _evaluate_count(self, context: RuntimeContext) -> int:
"""Evaluate the count expression to an integer"""
if not self.count_expr:
return 1
else:
try:
return int(float(cast(str, self.count_expr.render(context))))
except (ValueError, TypeError) as e:
raise DataGenValueError(
f"Cannot evaluate {self.count_expr.definition} as number",
self.count_expr.filename,
self.count_expr.line_num,
) from e
@property
def name(self) -> str:
name = self.tablename
if self.nickname:
name += f" (self.nickname)"
return name
def _generate_row(self, storage, context: RuntimeContext) -> ObjectRow:
"""Generate an individual row"""
context.incr()
row = {"id": context.generate_id()}
sobj = ObjectRow(self.tablename, row)
context.register_object(sobj, self.nickname)
context.obj = sobj
self._generate_fields(context, row)
try:
# both of these lines loop over the fields so they could maybe
# be combined but it kind of messes with the modularity of the
# code.
self.register_row_intertable_references(row, context)
storage.write_row(self.tablename, row)
except Exception as e:
raise DataGenError(str(e), self.filename, self.line_num) from e
for i, childobj in enumerate(self.friends):
childobj.generate_rows(storage, context)
return sobj
def _generate_fields(self, context: RuntimeContext, row: Dict) -> None:
"""Generate all of the fields of a row"""
for field in self.fields:
with self.exception_handling(f"Problem rendering value"):
row[field.name] = field.generate_value(context)
self._check_type(field, row[field.name], context)
context.register_field(field.name, row[field.name])
def _check_type(self, field, generated_value, context: RuntimeContext):
"""Check the type of a field value"""
allowed_types = (int, str, bool, date, float, type(None), ObjectRow)
if not isinstance(generated_value, allowed_types):
raise DataGenValueError(
f"Field '{field.name}' generated unexpected object: {generated_value} {type(generated_value)}",
self.filename,
self.line_num,
)
def register_row_intertable_references(
self, row: dict, context: RuntimeContext
) -> None:
"""Before serializing we need to convert objects to flat ID integers."""
for fieldname, fieldvalue in row.items():
if isinstance(fieldvalue, ObjectRow):
context.register_intertable_reference(
self.tablename, fieldvalue._tablename, fieldname
)
class SimpleValue(FieldDefinition):
"""A value with no sub-structure (although it could hold a template)
- object: abc
fields:
fieldname: XXXXX
fieldname2: <<XXXXX>>
fieldname3: 42
"""
def __init__(self, definition: Scalar, filename: str, line_num: int):
self.filename = filename
self.line_num = line_num
self.definition: Scalar = definition
assert isinstance(filename, str)
assert isinstance(line_num, int), line_num
self._evaluator = None
def evaluator(self, context):
"""Populate the evaluator property once."""
if self._evaluator is None:
if isinstance(self.definition, str):
try:
self._evaluator = context.get_evaluator(self.definition)
except Exception as e:
fix_exception(f"Cannot parse value {self.definition}", self, e)
else:
self._evaluator = False
return self._evaluator
def render(self, context: RuntimeContext) -> FieldValue:
"""Render the value: rendering a template if necessary."""
evaluator = self.evaluator(context)
if evaluator:
try:
val = evaluator(context)
except jinja2.exceptions.UndefinedError as e:
raise DataGenNameError(e.message, self.filename, self.line_num) from e
except Exception as e:
raise DataGenValueError(str(e), self.filename, self.line_num) from e
else:
val = self.definition
return fast_real(val) if isinstance(val, str) else val
def __repr__(self):
return f"<{self.__class__.__name__ , self.definition}>"
class StructuredValue(FieldDefinition):
"""A value with substructure which will call a handler function.
- object: abc
fields:
fieldname:
- reference:
foo
fieldname2:
- random_number:
min: 10
max: 20
fieldname3:
- reference:
...
"""
def __init__(self, function_name, args, filename, line_num):
self.function_name = function_name
self.filename = filename
self.line_num = line_num
if isinstance(args, list): # lists will represent your arguments
self.args = args
self.kwargs = {}
elif isinstance(args, dict): # dicts will represent named arguments
self.args = []
self.kwargs = args
else: # scalars will be turned into a one-argument list
self.args = [args]
self.kwargs = {}
def render(self, context: RuntimeContext) -> FieldValue:
if "." in self.function_name:
objname, method, *rest = self.function_name.split(".")
if rest:
raise DataGenSyntaxError(
f"Function names should have only one '.' in them: {self.function_name}",
self.filename,
self.line_num,
)
obj = context.field_vars().get(objname)
if not obj:
raise DataGenNameError(
f"Cannot find definition for: {objname}",
self.filename,
self.line_num,
)
func = getattr(obj, method)
if not func:
raise DataGenNameError(
f"Cannot find definition for: {method} on {objname}",
self.filename,
self.line_num,
)
value = evaluate_function(func, self.args, self.kwargs, context)
else:
try:
func = context.executable_blocks()[self.function_name]
except KeyError:
raise DataGenNameError(
f"Cannot find function named {self.function_name} to handle field value",
self.filename,
self.line_num,
)
value = evaluate_function(func, self.args, self.kwargs, context)
return value
def __repr__(self):
return (
f"<StructuredValue: {self.function_name} (*{self.args}, **{self.kwargs})>"
)
class ReferenceValue(StructuredValue):
""" - object: foo
fields:
- reference:
Y"""
class FieldFactory:
"""Represents a single data field (name, value) to be rendered
- object:
fields:
name: value # this part
"""
def __init__(self, name: str, definition: Definition, filename: str, line_num: int):
self.name = name
self.filename = filename
self.line_num = line_num
self.definition = definition
def generate_value(self, context) -> FieldValue:
try:
return self.definition.render(context)
except Exception as e:
raise fix_exception(
f"Problem rendering field {self.name}:\n {str(e)}", self, e
)
def __repr__(self):
return f"<{self.__class__.__name__, self.name, self.definition.__class__.__name__}>"