In [None]:
from datetime import timedelta, datetime
import pytz
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from twitter_etl_hujan import run_twitter_extract, run_twitter_transform, run_twitter_load

# Set default arguments for the DAG
default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2024, 11, 20, tzinfo=pytz.timezone('Asia/Jakarta')),
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

# Create the DAG instance
dag_twitter = DAG(
    'twitter_etl_dag',
    default_args=default_args,
    description='Tweets ETL DAG',
    schedule_interval='@daily',
    catchup=False  # to avoid backfilling if there's a delay in running
)

# Create the PythonOperator for running the Extract task
run_extract = PythonOperator(
    task_id='complete_twitter_extract',
    python_callable=run_twitter_extract,
    provide_context=True,  # Make sure that we can access XCom
    dag=dag_twitter
)

# Create the PythonOperator for running the Transform task
run_transform = PythonOperator(
    task_id='complete_twitter_transform',
    python_callable=run_twitter_transform,
    provide_context=True,  # Ensure the transformed data is passed using XCom
    dag=dag_twitter
)

# Create the PythonOperator for running the Load task
run_load = PythonOperator(
    task_id='complete_twitter_load',
    python_callable=run_twitter_load,
    provide_context=True,  # Allow access to XCom data
    dag=dag_twitter
)

# Define the task dependencies
run_extract >> run_transform >> run_load
