In [None]:
'''
How to install Apache Airflow?
- https://www.youtube.com/watch?v=SYOUbiGtGiU
- https://coding-stream-of-consciousness.com/2018/11/06/apache-airflow-windows-10-install-ubuntu/

How to start Apache Airflow?
1. create airflow/dags folder on any directory
2. open Ubuntu bash:
   2.1 type 'cd airflow' to go to airflow directory
   2.2 type 'ls' to view files
   2.3 type 'nano airflow.cfg' to edit config file
       >> change all directory to point to airflow directory created: /mnt/c/<your_workspace>/airflow
       
3. stop all airflow services:
   3.1 type 'airflow kerberos -D'
   3.2 type 'airflow scheduler -D'
   3.3 type 'airflow webserver -D'
   
4. initialize and start airflow services:
   4.1 type 'airflow initdb'
   4.2 type 'airflow webserver -p 8080'
   4.3 type 'airflow scheduler'
   
5. http://localhost:8080/admin
'''

In [None]:
import airflow
from airflow import DAG
from airflow.operators.bash_operator import BashOperator

import pytz
from datetime import timedelta, datetime

In [None]:
start_date = airflow.utils.dates.days_ago(0)
start_date = start_date.replace(tzinfo=pytz.utc).astimezone(pytz.timezone('Asia/Kuala_Lumpur'))

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': start_date,
    'email': ['tan.lim@opcbiz.com'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}

In [None]:
# Create script1.py, script2.py and script3.py at 'D:\_workspace\airflow\dags\scripts' folder

t1 = BashOperator(
    task_id='run_script1',
    bash_command='python /mnt/d/_workspace/airflow/dags/scripts/script1.py',
    dag=dag
)

t2 = BashOperator(
    task_id='run_script2',
    bash_command='python /mnt/d/_workspace/airflow/dags/scripts/script2.py',
    dag=dag
)

t3 = BashOperator(
    task_id='run_script3',
    bash_command='python /mnt/d/_workspace/airflow/dags/scripts/script3.py',
    dag=dag
)

# Sequential Execution

In [None]:
dag = DAG(
    'Sequential_Airflow_Tutorial',
    default_args=default_args,
    schedule_interval='*/1 * * * *',
    catchup=False
)

In [None]:
# t1 runs first, t2 runs second, followed by t3
t1 >> t2 >> t3

# Parallel Execution

In [None]:
dag = DAG(
    'Parallel_Airflow_Tutorial',
    default_args=default_args,
    schedule_interval='*/1 * * * *',
    catchup=False
)

In [None]:
# NOTE:
# - required to change 'executor = LocalExecutor' at airflow.cfg file
# - required to change 'sql_alchemy_conn' to use actual database

# t1 runs first, and t2, t3 runs parallely
t1 >> t2
t1 >> t3