In [None]:
from airflow import DAG
from airflow.providers.google.cloud.operators.bigquery import BigQueryOperator
from airflow.providers.google.cloud.operators.cloud_storage import GCSFileTransferOperator
from datetime import datetime

default_args = {
    'owner': 'airflow',
    'start_date': datetime(2023, 1, 1),
    'retries': 3,
}

dag = DAG(
    'hospital_readmission_etl',
    default_args=default_args,
    schedule_interval='@daily',  # run daily
    catchup=False,
)

# Task to upload the CSV file from GCS to BigQuery
upload_data_to_bq = BigQueryOperator(
    task_id='upload_data_to_bq',
    sql='LOAD DATA INFILE "gs://[BUCKET_NAME]/hospital_readmissions_preprocessed.csv" INTO TABLE `your_project_id.your_dataset_id.hospital_readmissions`;',
    destination_dataset_table='your_project_id.your_dataset_id.hospital_readmissions',
    write_disposition='WRITE_APPEND',
    create_disposition='CREATE_IF_NEEDED',
    dag=dag,
)

# Task to run transformation queries in BigQuery
run_transformation = BigQueryOperator(
    task_id='run_transformation_queries',
    sql='your_sql_transformation_queries.sql',
    destination_dataset_table='your_project_id.your_dataset_id.transformed_data',
    use_legacy_sql=False,
    dag=dag,
)

# Define the task sequence
upload_data_to_bq >> run_transformation
