# **DAG anatomy**

**Imports**

In [None]:
from datetime import timedelta
# The DAG object; we'll need this to instantiate a DAG
from airflow import DAG
# Operators; we need this to write tasks!
from airflow.operators.bash_operator import BashOperator
# This makes scheduling easy
from airflow.utils.dates import days_ago

**Defining Dag arguments**

In [None]:
# You can override them on a per-task basis during operator initialization
default_args = {
    'owner': 'Ramesh Sannareddy', # owner's name
    'start_date': days_ago(0), #when this DAG should run from: days_age(0) means today,
    'email': ['ramesh@somemail.com'], # email to receive alerts
    'email_on_failure': True, # whether email to be sent on failure
    'email_on_retry': True, # or on retry
    'retries': 2, # no. of retries in case of failure
    'retry_delay': timedelta(minutes=5), # time delay between retries
}

**Dag definition**

In [None]:
# instantiating the DAG class with the following parameters

dag = DAG(
    dag_id='sample-etl-dag', # dag identifier
    default_args=default_args, # passing the defined args above
    description='Sample ETL DAG using Bash', # what the dag is about
    schedule_interval=timedelta(days=1), # how frequently this DAG runs (daily)
)

**Task definition**

Contains:


*   task_id : string to identify the task
*   bash_command : the command it represents

*   dag : the dag it belongs to





In [None]:
# define the first task named extract
extract = BashOperator(
    task_id='extract',
    bash_command='echo "extract"',
    dag=dag,
)

# define the second task named transform
transform = BashOperator(
    task_id='transform',
    bash_command='echo "transform"',
    dag=dag,
)

# define the third task named load

load = BashOperator(
    task_id='load',
    bash_command='echo "load"',
    dag=dag,
)

**Task pipeline**

In [None]:
# order : extract first, then transform and finally load

extract >> transform >> load