-
Notifications
You must be signed in to change notification settings - Fork 1.5k
/
flow_runner.py
594 lines (502 loc) · 22.5 KB
/
flow_runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
from typing import (
Any,
Callable,
Dict,
Iterable,
List,
NamedTuple,
Optional,
Set,
Tuple,
Union,
)
import pendulum
import prefect
from prefect.core import Edge, Flow, Task
from prefect.engine import signals
from prefect.engine.result import Result
from prefect.engine.result_handlers import ConstantResultHandler
from prefect.engine.runner import ENDRUN, Runner, call_state_handlers
from prefect.engine.state import (
Cancelled,
Failed,
Mapped,
Pending,
Retrying,
Running,
Scheduled,
State,
Success,
)
from prefect.engine.task_runner import TaskRunner
from prefect.utilities.collections import flatten_seq
from prefect.utilities.executors import run_with_heartbeat
FlowRunnerInitializeResult = NamedTuple(
"FlowRunnerInitializeResult",
[
("state", State),
("task_states", Dict[Task, State]),
("context", Dict[str, Any]),
("task_contexts", Dict[Task, Dict[str, Any]]),
],
)
class FlowRunner(Runner):
"""
FlowRunners handle the execution of Flows and determine the State of a Flow
before, during and after the Flow is run.
In particular, through the FlowRunner you can specify which tasks should be
the first tasks to run, which tasks should be returned after the Flow is finished,
and what states each task should be initialized with.
Args:
- flow (Flow): the `Flow` to be run
- task_runner_cls (TaskRunner, optional): The class used for running
individual Tasks. Defaults to [TaskRunner](task_runner.html)
- state_handlers (Iterable[Callable], optional): A list of state change handlers
that will be called whenever the flow changes state, providing an
opportunity to inspect or modify the new state. The handler
will be passed the flow runner instance, the old (prior) state, and the new
(current) state, with the following signature:
`state_handler(fr: FlowRunner, old_state: State, new_state: State) -> Optional[State]`
If multiple functions are passed, then the `new_state` argument will be the
result of the previous handler.
Note: new FlowRunners are initialized within the call to `Flow.run()` and in general,
this is the endpoint through which FlowRunners will be interacted with most frequently.
Example:
```python
@task
def say_hello():
print('hello')
with Flow("My Flow") as f:
say_hello()
fr = FlowRunner(flow=f)
flow_state = fr.run()
```
"""
def __init__(
self,
flow: Flow,
task_runner_cls: type = None,
state_handlers: Iterable[Callable] = None,
):
self.context = prefect.context.to_dict()
self.flow = flow
if task_runner_cls is None:
task_runner_cls = prefect.engine.get_default_task_runner_class()
self.task_runner_cls = task_runner_cls
super().__init__(state_handlers=state_handlers)
def __repr__(self) -> str:
return "<{}: {}>".format(type(self).__name__, self.flow.name)
def call_runner_target_handlers(self, old_state: State, new_state: State) -> State:
"""
A special state handler that the FlowRunner uses to call its flow's state handlers.
This method is called as part of the base Runner's `handle_state_change()` method.
Args:
- old_state (State): the old (previous) state
- new_state (State): the new (current) state
Returns:
- State: the new state
"""
self.logger.debug(
"Flow '{name}': Handling state change from {old} to {new}".format(
name=self.flow.name,
old=type(old_state).__name__,
new=type(new_state).__name__,
)
)
for handler in self.flow.state_handlers:
new_state = handler(self.flow, old_state, new_state) or new_state
return new_state
def initialize_run( # type: ignore
self,
state: Optional[State],
task_states: Dict[Task, State],
context: Dict[str, Any],
task_contexts: Dict[Task, Dict[str, Any]],
parameters: Dict[str, Any],
) -> FlowRunnerInitializeResult:
"""
Initializes the Task run by initializing state and context appropriately.
If the provided state is a Submitted state, the state it wraps is extracted.
Args:
- state (Optional[State]): the initial state of the run
- task_states (Dict[Task, State]): a dictionary of any initial task states
- context (Dict[str, Any], optional): prefect.Context to use for execution
to use for each Task run
- task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be provided to each task
- parameters(dict): the parameter values for the run
Returns:
- NamedTuple: a tuple of initialized objects:
`(state, task_states, context, task_contexts)`
"""
# overwrite context parameters one-by-one
if parameters:
context_params = context.setdefault("parameters", {})
for param, value in parameters.items():
context_params[param] = value
context.update(flow_name=self.flow.name)
context.setdefault("scheduled_start_time", pendulum.now("utc"))
# add various formatted dates to context
now = pendulum.now("utc")
dates = {
"date": now,
"today": now.strftime("%Y-%m-%d"),
"yesterday": now.add(days=-1).strftime("%Y-%m-%d"),
"tomorrow": now.add(days=1).strftime("%Y-%m-%d"),
"today_nodash": now.strftime("%Y%m%d"),
"yesterday_nodash": now.add(days=-1).strftime("%Y%m%d"),
"tomorrow_nodash": now.add(days=1).strftime("%Y%m%d"),
}
for key, val in dates.items():
context.setdefault(key, val)
for task in self.flow.tasks:
task_contexts.setdefault(task, {}).update(
task_name=task.name, task_slug=task.slug
)
state, context = super().initialize_run(state=state, context=context)
return FlowRunnerInitializeResult(
state=state,
task_states=task_states,
context=context,
task_contexts=task_contexts,
)
def run(
self,
state: State = None,
task_states: Dict[Task, State] = None,
return_tasks: Iterable[Task] = None,
parameters: Dict[str, Any] = None,
task_runner_state_handlers: Iterable[Callable] = None,
executor: "prefect.engine.executors.Executor" = None,
context: Dict[str, Any] = None,
task_contexts: Dict[Task, Dict[str, Any]] = None,
) -> State:
"""
The main endpoint for FlowRunners. Calling this method will perform all
computations contained within the Flow and return the final state of the Flow.
Args:
- state (State, optional): starting state for the Flow. Defaults to
`Pending`
- task_states (dict, optional): dictionary of task states to begin
computation with, with keys being Tasks and values their corresponding state
- return_tasks ([Task], optional): list of Tasks to include in the
final returned Flow state. Defaults to `None`
- parameters (dict, optional): dictionary of any needed Parameter
values, with keys being strings representing Parameter names and values being
their corresponding values
- task_runner_state_handlers (Iterable[Callable], optional): A list of state change
handlers that will be provided to the task_runner, and called whenever a task changes
state.
- executor (Executor, optional): executor to use when performing
computation; defaults to the executor specified in your prefect configuration
- context (Dict[str, Any], optional): prefect.Context to use for execution
to use for each Task run
- task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be provided to each task
Returns:
- State: `State` representing the final post-run state of the `Flow`.
"""
self.logger.info("Beginning Flow run for '{}'".format(self.flow.name))
# make copies to avoid modifying user inputs
task_states = dict(task_states or {})
context = dict(context or {})
task_contexts = dict(task_contexts or {})
parameters = dict(parameters or {})
if executor is None:
executor = prefect.engine.get_default_executor_class()()
try:
state, task_states, context, task_contexts = self.initialize_run(
state=state,
task_states=task_states,
context=context,
task_contexts=task_contexts,
parameters=parameters,
)
with prefect.context(context):
state = self.check_flow_is_pending_or_running(state)
state = self.check_flow_reached_start_time(state)
state = self.set_flow_to_running(state)
state = self.get_flow_run_state(
state,
task_states=task_states,
task_contexts=task_contexts,
return_tasks=return_tasks,
task_runner_state_handlers=task_runner_state_handlers,
executor=executor,
)
except ENDRUN as exc:
state = exc.state
except KeyboardInterrupt:
self.logger.debug("Interrupt signal raised, cancelling Flow run.")
state = Cancelled(message="Interrupt signal raised, cancelling flow run.")
# All other exceptions are trapped and turned into Failed states
except Exception as exc:
self.logger.exception(
"Unexpected error while running flow: {}".format(repr(exc))
)
if prefect.context.get("raise_on_exception"):
raise exc
new_state = Failed(
message="Unexpected error while running flow: {}".format(repr(exc)),
result=exc,
)
state = self.handle_state_change(state or Pending(), new_state)
return state
@call_state_handlers
def check_flow_reached_start_time(self, state: State) -> State:
"""
Checks if the Flow is in a Scheduled state and, if it is, ensures that the scheduled
time has been reached.
Args:
- state (State): the current state of this Flow
Returns:
- State: the state of the flow after performing the check
Raises:
- ENDRUN: if the flow is Scheduled with a future scheduled time
"""
if isinstance(state, Scheduled):
if state.start_time and state.start_time > pendulum.now("utc"):
self.logger.debug(
"Flow '{name}': start_time has not been reached; ending run.".format(
name=self.flow.name
)
)
raise ENDRUN(state)
return state
@call_state_handlers
def check_flow_is_pending_or_running(self, state: State) -> State:
"""
Checks if the flow is in either a Pending state or Running state. Either are valid
starting points (because we allow simultaneous runs of the same flow run).
Args:
- state (State): the current state of this flow
Returns:
- State: the state of the flow after running the check
Raises:
- ENDRUN: if the flow is not pending or running
"""
# the flow run is already finished
if state.is_finished() is True:
self.logger.info("Flow run has already finished.")
raise ENDRUN(state)
# the flow run must be either pending or running (possibly redundant with above)
elif not (state.is_pending() or state.is_running()):
self.logger.info("Flow is not ready to run.")
raise ENDRUN(state)
return state
@call_state_handlers
def set_flow_to_running(self, state: State) -> State:
"""
Puts Pending flows in a Running state; leaves Running flows Running.
Args:
- state (State): the current state of this flow
Returns:
- State: the state of the flow after running the check
Raises:
- ENDRUN: if the flow is not pending or running
"""
if state.is_pending():
self.logger.info("Starting flow run.")
return Running(message="Running flow.")
elif state.is_running():
return state
else:
raise ENDRUN(state)
@run_with_heartbeat
@call_state_handlers
def get_flow_run_state(
self,
state: State,
task_states: Dict[Task, State],
task_contexts: Dict[Task, Dict[str, Any]],
return_tasks: Set[Task],
task_runner_state_handlers: Iterable[Callable],
executor: "prefect.engine.executors.base.Executor",
) -> State:
"""
Runs the flow.
Args:
- state (State): starting state for the Flow. Defaults to
`Pending`
- task_states (dict): dictionary of task states to begin
computation with, with keys being Tasks and values their corresponding state
- task_contexts (Dict[Task, Dict[str, Any]]): contexts that will be provided to each task
- return_tasks ([Task], optional): list of Tasks to include in the
final returned Flow state. Defaults to `None`
- task_runner_state_handlers (Iterable[Callable]): A list of state change
handlers that will be provided to the task_runner, and called whenever a task changes
state.
- executor (Executor): executor to use when performing
computation; defaults to the executor provided in your prefect configuration
Returns:
- State: `State` representing the final post-run state of the `Flow`.
"""
if not state.is_running():
self.logger.info("Flow is not in a Running state.")
raise ENDRUN(state)
if return_tasks is None:
return_tasks = set()
if set(return_tasks).difference(self.flow.tasks):
raise ValueError("Some tasks in return_tasks were not found in the flow.")
# -- process each task in order
with executor.start():
for task in self.flow.sorted_tasks():
task_state = task_states.get(task)
if task_state is None and isinstance(
task, prefect.tasks.core.constants.Constant
):
task_states[task] = task_state = Success(result=task.value)
# if the state is finished, don't run the task, just use the provided state
if (
isinstance(task_state, State)
and task_state.is_finished()
and not task_state.is_cached()
and not task_state.is_mapped()
):
continue
upstream_states = {} # type: Dict[Edge, Union[State, Iterable]]
# -- process each edge to the task
for edge in self.flow.edges_to(task):
upstream_states[edge] = task_states.get(
edge.upstream_task, Pending(message="Task state not available.")
)
# augment edges with upstream constants
for key, val in self.flow.constants[task].items():
edge = Edge(
upstream_task=prefect.tasks.core.constants.Constant(val),
downstream_task=task,
key=key,
)
upstream_states[edge] = Success(
"Auto-generated constant value",
result=Result(val, result_handler=ConstantResultHandler(val)),
)
# -- run the task
with prefect.context(task_full_name=task.name, task_tags=task.tags):
task_states[task] = executor.submit(
self.run_task,
task=task,
state=task_state,
upstream_states=upstream_states,
context=dict(prefect.context, **task_contexts.get(task, {})),
task_runner_state_handlers=task_runner_state_handlers,
executor=executor,
)
# ---------------------------------------------
# Collect results
# ---------------------------------------------
# terminal tasks determine if the flow is finished
terminal_tasks = self.flow.terminal_tasks()
# reference tasks determine flow state
reference_tasks = self.flow.reference_tasks()
# wait until all terminal tasks are finished
final_tasks = terminal_tasks.union(reference_tasks).union(return_tasks)
final_states = executor.wait(
{
t: task_states.get(t, Pending("Task not evaluated by FlowRunner."))
for t in final_tasks
}
)
# also wait for any children of Mapped tasks to finish, and add them
# to the dictionary to determine flow state
all_final_states = final_states.copy()
for t, s in list(final_states.items()):
if s.is_mapped():
s.map_states = executor.wait(s.map_states)
s.result = [ms.result for ms in s.map_states]
all_final_states[t] = s.map_states
assert isinstance(final_states, dict)
key_states = set(flatten_seq([all_final_states[t] for t in reference_tasks]))
terminal_states = set(
flatten_seq([all_final_states[t] for t in terminal_tasks])
)
return_states = {t: final_states[t] for t in return_tasks}
state = self.determine_final_state(
state=state,
key_states=key_states,
return_states=return_states,
terminal_states=terminal_states,
)
return state
def determine_final_state(
self,
state: State,
key_states: Set[State],
return_states: Dict[Task, State],
terminal_states: Set[State],
) -> State:
"""
Implements the logic for determining the final state of the flow run.
Args:
- state (State): the current state of the Flow
- key_states (Set[State]): the states which will determine the success / failure of the flow run
- return_states (Dict[Task, State]): states to return as results
- terminal_states (Set[State]): the states of the terminal tasks for this flow
Returns:
- State: the final state of the flow run
"""
# check that the flow is finished
if not all(s.is_finished() for s in terminal_states):
self.logger.info("Flow run RUNNING: terminal tasks are incomplete.")
state.result = return_states
# check if any key task failed
elif any(s.is_failed() for s in key_states):
self.logger.info("Flow run FAILED: some reference tasks failed.")
state = Failed(message="Some reference tasks failed.", result=return_states)
# check if all reference tasks succeeded
elif all(s.is_successful() for s in key_states):
self.logger.info("Flow run SUCCESS: all reference tasks succeeded")
state = Success(
message="All reference tasks succeeded.", result=return_states
)
# check for any unanticipated state that is finished but neither success nor failed
else:
self.logger.info("Flow run SUCCESS: no reference tasks failed")
state = Success(message="No reference tasks failed.", result=return_states)
return state
def run_task(
self,
task: Task,
state: State,
upstream_states: Dict[Edge, State],
context: Dict[str, Any],
task_runner_state_handlers: Iterable[Callable],
executor: "prefect.engine.executors.Executor",
) -> State:
"""
Runs a specific task. This method is intended to be called by submitting it to
an executor.
Args:
- task (Task): the task to run
- state (State): starting state for the Flow. Defaults to
`Pending`
- upstream_states (Dict[Edge, State]): dictionary of upstream states
- context (Dict[str, Any]): a context dictionary for the task run
- task_runner_state_handlers (Iterable[Callable]): A list of state change
handlers that will be provided to the task_runner, and called whenever a task changes
state.
- executor (Executor): executor to use when performing
computation; defaults to the executor provided in your prefect configuration
Returns:
- State: `State` representing the final post-run state of the `Flow`.
"""
with prefect.context(self.context):
default_handler = task.result_handler or self.flow.result_handler
task_runner = self.task_runner_cls(
task=task,
state_handlers=task_runner_state_handlers,
result_handler=default_handler,
)
# if this task reduces over a mapped state, make sure its children have finished
for edge, upstream_state in upstream_states.items():
# if the upstream state is Mapped, wait until its results are all available
if not edge.mapped and upstream_state.is_mapped():
assert isinstance(upstream_state, Mapped) # mypy assert
upstream_state.map_states = executor.wait(upstream_state.map_states)
upstream_state.result = [
s.result for s in upstream_state.map_states
]
return task_runner.run(
state=state,
upstream_states=upstream_states,
context=context,
executor=executor,
)