Skip to content

Commit

Permalink
fixup! Composer core patch
Browse files Browse the repository at this point in the history
cherry-picked change from the community
apache/airflow#26161

Internal bug

Change-Id: I62478c4c1142a00f1f984e5d14d1af7754946b82
GitOrigin-RevId: c582c826563065ef2c7c37213bc2f7a4fdcb81d8
  • Loading branch information
Cloud Composer Team committed Dec 8, 2022
1 parent da4b68b commit 68ba70a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 15 deletions.
22 changes: 8 additions & 14 deletions airflow/jobs/backfill_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def _get_dag_run(self, dagrun_info: DagRunInfo, dag: DAG, session: Session = Non
return run

@provide_session
def _task_instances_for_dag_run(self, dag_run, session=None):
def _task_instances_for_dag_run(self, dag, dag_run, session=None):
"""
Returns a map of task instance key to task instance object for the tasks to
run in the given dag run.
Expand All @@ -368,18 +368,19 @@ def _task_instances_for_dag_run(self, dag_run, session=None):
dag_run.refresh_from_db()
make_transient(dag_run)

dag_run.dag = dag
info = dag_run.task_instance_scheduling_decisions(session=session)
schedulable_tis = info.schedulable_tis
try:
for ti in dag_run.get_task_instances():
# all tasks part of the backfill are scheduled to run
if ti.state == State.NONE:
ti.set_state(State.SCHEDULED, session=session)
for ti in dag_run.get_task_instances(session=session):
if ti in schedulable_tis:
ti.set_state(State.SCHEDULED)
if ti.state != State.REMOVED:
tasks_to_run[ti.key] = ti
session.commit()
except Exception:
session.rollback()
raise

return tasks_to_run

def _log_progress(self, ti_status):
Expand Down Expand Up @@ -464,13 +465,6 @@ def _per_task_process(key, ti: TaskInstance, session=None):
ti_status.running.pop(key)
return

# guard against externally modified tasks instances or
# in case max concurrency has been reached at task runtime
elif ti.state == State.NONE:
self.log.warning(
"FIXME: Task instance %s state was set to None externally. This should not happen", ti
)
ti.set_state(State.SCHEDULED, session=session)
if self.rerun_failed_tasks:
# Rerun failed tasks or upstreamed failed tasks
if ti.state in (State.FAILED, State.UPSTREAM_FAILED):
Expand Down Expand Up @@ -724,7 +718,7 @@ def _execute_dagruns(self, dagrun_infos, ti_status, executor, pickle_id, start_d
for dagrun_info in dagrun_infos:
for dag in [self.dag] + self.dag.subdags:
dag_run = self._get_dag_run(dagrun_info, dag, session=session)
tis_map = self._task_instances_for_dag_run(dag_run, session=session)
tis_map = self._task_instances_for_dag_run(dag, dag_run, session=session)
if dag_run is None:
continue

Expand Down
31 changes: 30 additions & 1 deletion tests/jobs/test_backfill_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
from airflow.operators.dummy import DummyOperator
from airflow.utils import timezone
from airflow.utils.session import create_session
from airflow.utils.state import State
from airflow.utils.state import State, TaskInstanceState
from airflow.utils.timeout import timeout
from airflow.utils.types import DagRunType
from tests.test_utils.db import clear_db_dags, clear_db_pools, clear_db_runs, set_default_pool_slots
Expand Down Expand Up @@ -1517,3 +1517,32 @@ def test_backfill_has_job_id(self):
)
job.run()
assert executor.job_id is not None

def test_task_instances_are_not_set_to_scheduled_when_dagrun_reset(self, dag_maker, session):
"""Test that when dagrun is reset, task instances are not set to scheduled"""

with dag_maker() as dag:
task1 = DummyOperator(task_id='task1')
task2 = DummyOperator(task_id='task2')
task3 = DummyOperator(task_id='task3')
task1 >> task2 >> task3

for i in range(1, 4):
dag_maker.create_dagrun(
run_id=f'test_dagrun_{i}', execution_date=DEFAULT_DATE + datetime.timedelta(days=i)
)

dag.clear()

job = BackfillJob(
dag=dag,
start_date=DEFAULT_DATE + datetime.timedelta(days=1),
end_date=DEFAULT_DATE + datetime.timedelta(days=4),
executor=MockExecutor(),
donot_pickle=True,
)
for dr in DagRun.find(dag_id=dag.dag_id, session=session):
tasks_to_run = job._task_instances_for_dag_run(dag, dr, session=session)
states = [ti.state for _, ti in tasks_to_run.items()]
assert TaskInstanceState.SCHEDULED in states
assert State.NONE in states

0 comments on commit 68ba70a

Please sign in to comment.