# Running DAGs

There are times when tasks need to be run in a certain order that may be more complex than having a simple dependency. The best way to run these tasks is with a DAG, or directed acyclic graph. 

In [2]:
# import CloudClient
from cfa.cloudops import CloudClient

# import Task
from cfa.cloudops.batch_helpers import Task

# initialize CloudClient
cc = CloudClient()

In [4]:
# create job for example DAG
cc.create_job(job_name="example-dag", pool_name="rr-test-pool")

At this point we create our Tasks. The main component included in each Task is the command that will run when it gets executed in Azure.

In [5]:
# define tasks in the DAG
data1 = Task("python3 pull_data1.py")
data2_nat = Task("python3 pull_data2.py --location national")
data2_state = Task("python3 pull_data2.py --location state")
proc_data = Task("python3 process_data.py")
model_1000 = Task("python3 model_data.py --iterations 1000")
model_5000 = Task("python3 model_data.py --iterations 5000")
gen_report = Task("python3 generate_report.py")

In [6]:
# set dependencies
proc_data.after([data1, data2_nat, data2_state])
model_1000.after(proc_data)
model_5000.after(proc_data)
gen_report.after([model_1000, model_5000])

In [11]:
# generate a file representing the DAG
cc.generate_dag(
    data1,
    data2_nat,
    data2_state,
    proc_data,
    model_1000,
    model_5000,
    gen_report,
    file_name="example_dag.txt",
)

In [None]:
# submit the DAG to the job
cc.run_dag(
    data1,
    data2_nat,
    data2_state,
    proc_data,
    model_1000,
    model_5000,
    gen_report,
    job_name="example-dag",
)