Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 16 additions & 15 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,12 @@ env:
HOST_GID: 1000
_AIRFLOW_WWW_USER_USERNAME: ci
_AIRFLOW_WWW_USER_PASSWORD: ci
# PSQL_USER_MAIN: ${{ secrets.CI_PSQL_USER }}
# PSQL_PASSWORD_MAIN: ${{ secrets.CI_PSQL_PASSWORD }}
# PSQL_HOST_MAIN: ${{ secrets.CI_PSQL_HOST }}
# PSQL_PORT_MAIN: ${{ secrets.CI_PSQL_PORT }}
# PSQL_DB_MAIN: ${{ secrets.CI_PSQL_DB }}
AIRFLOW_PSQL_USER_MAIN: ${{ secrets.CI_PSQL_USER }}
AIRFLOW_PSQL_PASSWORD_MAIN: ${{ secrets.CI_PSQL_PASSWORD }}
AIRFLOW_PSQL_HOST_MAIN: ${{ secrets.CI_PSQL_HOST }}
AIRFLOW_PSQL_PORT_MAIN: ${{ secrets.CI_PSQL_PORT }}
AIRFLOW_PSQL_DB_MAIN: ${{ secrets.CI_PSQL_DB }}
EPISCANNER_HOST_DATA: /home/runner/work/AlertFlow/AlertFlow/alertflow/episcanner-downloader/epi_scanner/data/

jobs:
main:
Expand All @@ -40,17 +41,17 @@ jobs:
- name: Check if repository is a fork
run: |
if [[ "${{ github.event.repository.fork }}" == "true" ]]; then
export PSQL_USER_MAIN="${{ secrets.CI_PSQL_USER_FORK }}"
export PSQL_PASSWORD_MAIN="${{ secrets.CI_PSQL_PASSWORD_FORK }}"
export PSQL_HOST_MAIN="${{ secrets.CI_PSQL_HOST_FORK }}"
export PSQL_PORT_MAIN="${{ secrets.CI_PSQL_PORT_FORK }}"
export PSQL_DB_MAIN="${{ secrets.CI_PSQL_DB_FORK }}"
export AIRFLOW_PSQL_USER_MAIN="${{ secrets.CI_PSQL_USER_FORK }}"
export AIRFLOW_PSQL_PASSWORD_MAIN="${{ secrets.CI_PSQL_PASSWORD_FORK }}"
export AIRFLOW_PSQL_HOST_MAIN="${{ secrets.CI_PSQL_HOST_FORK }}"
export AIRFLOW_PSQL_PORT_MAIN="${{ secrets.CI_PSQL_PORT_FORK }}"
export AIRFLOW_PSQL_DB_MAIN="${{ secrets.CI_PSQL_DB_FORK }}"
else
export PSQL_USER_MAIN="${{ secrets.CI_PSQL_USER }}"
export PSQL_PASSWORD_MAIN="${{ secrets.CI_PSQL_PASSWORD }}"
export PSQL_HOST_MAIN="${{ secrets.CI_PSQL_HOST }}"
export PSQL_PORT_MAIN="${{ secrets.CI_PSQL_PORT }}"
export PSQL_DB_MAIN="${{ secrets.CI_PSQL_DB }}"
export AIRFLOW_PSQL_USER_MAIN="${{ secrets.CI_PSQL_USER }}"
export AIRFLOW_PSQL_PASSWORD_MAIN="${{ secrets.CI_PSQL_PASSWORD }}"
export AIRFLOW_PSQL_HOST_MAIN="${{ secrets.CI_PSQL_HOST }}"
export AIRFLOW_PSQL_PORT_MAIN="${{ secrets.CI_PSQL_PORT }}"
export AIRFLOW_PSQL_DB_MAIN="${{ secrets.CI_PSQL_DB }}"
fi

- name: Semantic Release Title Check
Expand Down
103 changes: 103 additions & 0 deletions alertflow/dags/episcanner/episcanner_export_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import os
from datetime import datetime, timedelta

from airflow import DAG
from airflow.models import Variable
from airflow.operators.bash import BashOperator
from airflow.operators.python import PythonOperator
from dotenv import dotenv_values, set_key


def set_airflow_variables():
"""
Set Airflow variables from environment and write them to the .env file.
"""
# Set Airflow variables from environment variables
PSQL_USER = os.environ.get('AIRFLOW_PSQL_USER_MAIN')
PSQL_PASSWORD = os.environ.get('AIRFLOW_PSQL_PASSWORD_MAIN')
PSQL_HOST = os.environ.get('AIRFLOW_PSQL_HOST_MAIN')
PSQL_PORT = os.environ.get('AIRFLOW_PSQL_PORT_MAIN')
PSQL_DB = os.environ.get('AIRFLOW_PSQL_DB_MAIN')

Variable.set('PSQL_USER', PSQL_USER)
Variable.set('PSQL_PASSWORD', PSQL_PASSWORD)
Variable.set('PSQL_HOST', PSQL_HOST)
Variable.set('PSQL_PORT', PSQL_PORT)
Variable.set('PSQL_DB', PSQL_DB)

# Write variables to .env file
dotenv_path = '/opt/airflow/episcanner-downloader/.env'
env_vars = dotenv_values(dotenv_path)
env_vars['PSQL_USER'] = PSQL_USER
env_vars['PSQL_PASSWORD'] = PSQL_PASSWORD
env_vars['PSQL_HOST'] = PSQL_HOST
env_vars['PSQL_PORT'] = PSQL_PORT
env_vars['PSQL_DB'] = PSQL_DB
for key, value in env_vars.items():
set_key(dotenv_path, key, value)


default_args = {
'owner': 'airflow',
'depends_on_past': False,
'start_date': datetime(2023, 5, 21),
'retries': 1,
'retry_delay': timedelta(minutes=5),
}

with DAG(
'EPISCANNER_DOWNLOADER',
default_args=default_args,
schedule_interval='0 3 * * 0', # Every Sunday at 3 AM
catchup=False,
) as dag:

# clone the repository from GitHub
clone_repository = BashOperator(
task_id='clone_repository',
bash_command='git clone --branch main --single-branch --depth 1 '
'https://github.com/AlertaDengue/episcanner-downloader.git '
'/opt/airflow/episcanner-downloader',
dag=dag,
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When using the DAG context manager, it is not necessary to set the dag variable to the tasks

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When using the DAG context manager, it is not necessary to set the dag variable to the tasks

)

# Set variables for Episcanner-PostgreSQL connection
set_connection_variables = PythonOperator(
task_id='set_connection_variables',
python_callable=set_airflow_variables,
dag=dag,
)

# Install the Episcanner package using Poetry
install_episcanner = BashOperator(
task_id='install_episcanner',
bash_command='source /home/airflow/mambaforge/bin/activate episcanner-downloader && ' # NOQA E501
'cd /opt/airflow/episcanner-downloader && '
'poetry install',
dag=dag,
)

# Download all data to the specified directory
episcanner_downloader = BashOperator(
task_id='episcanner_downloader',
bash_command='source /home/airflow/mambaforge/bin/activate episcanner-downloader && ' # NOQA E501
'cd /opt/airflow/episcanner-downloader &&'
'python epi_scanner/downloader/export_data.py '
'-s all -d dengue chikungunya -o /opt/airflow/episcanner_data',
dag=dag,
)

# Remove the episcanner-downloader repository
remove_repository = BashOperator(
task_id='remove_repository',
bash_command='rm -rf /opt/airflow/episcanner-downloader',
dag=dag,
)

(
clone_repository
>> set_connection_variables
>> install_episcanner
>> episcanner_downloader
>> remove_repository
)
14 changes: 12 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ RUN apt-get update \
vim \
sed \
tar \
gcc \
make \
lzma \
libssl-dev \
libtk8.6 \
Expand All @@ -34,6 +32,8 @@ RUN apt-get update \
libsqlite3-dev \
postgresql-client \
wget \
gettext \
build-essential \
&& rm -rf /var/lib/apt/lists/*


Expand Down Expand Up @@ -61,6 +61,7 @@ COPY --chown=airflow alertflow/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg
COPY --chown=airflow docker/scripts/entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh


USER airflow

WORKDIR ${AIRFLOW_HOME}
Expand All @@ -73,4 +74,13 @@ RUN /usr/local/bin/python -m virtualenv /opt/envs/py310 --python="/opt/py310/bin
"satellite-weather-downloader >= 1.8.2" \
psycopg2

# Install conda and create environment
RUN curl -LO https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh \
&& bash Mambaforge-Linux-x86_64.sh -b -p /home/airflow/mambaforge \
&& rm Mambaforge-Linux-x86_64.sh \
&& /home/airflow/mambaforge/bin/mamba create -y -n episcanner-downloader python=3.11 poetry psycopg2 python-dotenv \
&& chown -R ${HOST_UID}:${HOST_GID} ${AIRFLOW_HOME}/ /home/airflow/mambaforge/

RUN echo "alias activate_episcanner='source /home/airflow/mambaforge/bin/activate episcanner-downloader'" >> /home/airflow/.bashrc

ENTRYPOINT [ "/entrypoint.sh" ]
10 changes: 10 additions & 0 deletions docker/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,22 @@ x-airflow-common:
AIRFLOW_HOME: ${AIRFLOW_HOME:-/opt/airflow}
AIRFLOW_VAR_PSQL_MAIN_URI: '{"PSQL_MAIN_URI":"${PSQL_URI_MAIN}"}'
AIRFLOW_VAR_CDSAPI_KEY: '{"CDSAPI_KEY":"${CDSAPI_KEY}"}'
#
HOST_UID: ${HOST_UID}
HOST_GID: ${HOST_GID}
# Episcsanner variables
AIRFLOW_PSQL_USER_MAIN: ${AIRFLOW_PSQL_USER_MAIN}
AIRFLOW_PSQL_PASSWORD_MAIN: ${AIRFLOW_PSQL_PASSWORD_MAIN}
AIRFLOW_PSQL_HOST_MAIN: ${AIRFLOW_PSQL_HOST_MAIN}
AIRFLOW_PSQL_PORT_MAIN: ${AIRFLOW_PSQL_PORT_MAIN}
AIRFLOW_PSQL_DB_MAIN: ${AIRFLOW_PSQL_DB_MAIN}
volumes:
- ${AIRFLOW_PROJ_DIR:-.}/alertflow/dags:${AIRFLOW_HOME}/dags
- ${AIRFLOW_PROJ_DIR:-.}/alertflow/logs:${AIRFLOW_HOME}/logs
- ${AIRFLOW_PROJ_DIR:-.}/alertflow/plugins:${AIRFLOW_HOME}/plugins
# Episcanner
- ${EPISCANNER_HOST_DATA}:${AIRFLOW_HOME}/episcanner_data

user: "${AIRFLOW_UID:-50000}:0"
depends_on:
&airflow-common-depends-on
Expand Down