Skip to content

Commit

Permalink
(fix) Bump to airflow 1.10.12 based on PR puckel#623
Browse files Browse the repository at this point in the history
  • Loading branch information
Melle Boersma authored and MelleB committed Jun 23, 2021
1 parent bed7779 commit 0efc863
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 16 deletions.
5 changes: 3 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# VERSION 1.10.9
# VERSION 1.10.12
# AUTHOR: Matthieu "Puckel_" Roisil
# UPGRADE BY David Wong
# DESCRIPTION: Basic Airflow container
# BUILD: docker build --rm -t puckel/docker-airflow .
# SOURCE: https://github.com/puckel/docker-airflow
Expand All @@ -12,7 +13,7 @@ ENV DEBIAN_FRONTEND noninteractive
ENV TERM linux

# Airflow
ARG AIRFLOW_VERSION=1.10.9
ARG AIRFLOW_VERSION=1.10.12
ARG AIRFLOW_USER_HOME=/usr/local/airflow
ARG AIRFLOW_DEPS=""
ARG PYTHON_DEPS=""
Expand Down
96 changes: 90 additions & 6 deletions config/airflow.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,12 @@ sql_alchemy_pool_pre_ping = True
# SqlAlchemy supports databases with the concept of multiple schemas.
sql_alchemy_schema =

# Import path for connect args in SqlAlchemy. Default to an empty dict.
# This is useful when you want to configure db engine args that SqlAlchemy won't parse
# in connection string.
# See https://docs.sqlalchemy.org/en/13/core/engines.html#sqlalchemy.create_engine.params.connect_args
# sql_alchemy_connect_args =

# The amount of parallelism as a setting to the executor. This defines
# the max number of task instances that should run simultaneously
# on this airflow installation
Expand All @@ -124,11 +130,16 @@ dags_are_paused_at_creation = True
# The maximum number of active DAG runs per DAG
max_active_runs_per_dag = 16

# Whether to load the examples that ship with Airflow. It's good to
# Whether to load the DAG examples that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_examples = True

# Whether to load the default connections that ship with Airflow. It's good to
# get started, but you probably want to set this to False in a production
# environment
load_default_connections = False

# Where your Airflow plugins are stored
plugins_folder = /usr/local/airflow/plugins

Expand Down Expand Up @@ -184,17 +195,51 @@ dag_discovery_safe_mode = True
# The number of retries each task is going to have by default. Can be overridden at dag or task level.
default_task_retries = 0

# Whether to serialises DAGs and persist them in DB.
# Whether to serialise DAGs and persist them in DB.
# If set to True, Webserver reads from DB instead of parsing DAG files
# More details: https://airflow.apache.org/docs/stable/dag-serialization.html
store_serialized_dags = False

# Updating serialized DAG can not be faster than a minimum interval to reduce database write rate.
min_serialized_dag_update_interval = 30

# Fetching serialized DAG can not be faster than a minimum interval to reduce database
# read rate. This config controls when your DAGs are updated in the Webserver
min_serialized_dag_fetch_interval = 10

# Whether to persist DAG files code in DB.
# If set to True, Webserver reads file contents from DB instead of
# trying to access files in a DAG folder. Defaults to same as the
# ``store_serialized_dags`` setting.
# Example: store_dag_code = False
# store_dag_code =

# Maximum number of Rendered Task Instance Fields (Template Fields) per task to store
# in the Database.
# When Dag Serialization is enabled (``store_serialized_dags=True``), all the template_fields
# for each of Task Instance are stored in the Database.
# Keeping this number small may cause an error when you try to view ``Rendered`` tab in
# TaskInstance view for older tasks.
max_num_rendered_ti_fields_per_task = 30

# On each dagrun check against defined SLAs
check_slas = True

# Path to custom XCom class that will be used to store and resolve operators results
# Example: xcom_backend = path.to.CustomXCom
xcom_backend = airflow.models.xcom.BaseXCom

[secrets]
# Full class name of secrets backend to enable (will precede env vars and metastore in search path)
# Example: backend = airflow.contrib.secrets.aws_systems_manager.SystemsManagerParameterStoreBackend
backend =

# The backend_kwargs param is loaded into a dictionary and passed to __init__ of secrets backend class.
# See documentation for the secrets backend you are using. JSON is expected.
# Example for AWS Systems Manager ParameterStore:
# ``{{"connections_prefix": "/airflow/connections", "profile_name": "default"}}``
backend_kwargs =

[cli]
# In what way should the cli access the API. The LocalClient will use the
# database directly, while the json_client will use the api running on the
Expand All @@ -212,7 +257,9 @@ endpoint_url = http://localhost:8080
fail_fast = False

[api]
# How to authenticate users of the API
# How to authenticate users of the API. See
# https://airflow.apache.org/docs/stable/security.html for possible values.
# ("airflow.api.auth.backend.default" allows all requests for historic reasons)
auth_backend = airflow.api.auth.backend.default

[lineage]
Expand Down Expand Up @@ -245,6 +292,12 @@ default_hive_mapred_queue =
# airflow sends to point links to the right web server
base_url = http://localhost:8080

# Default timezone to display all dates in the RBAC UI, can be UTC, system, or
# any IANA timezone string (e.g. Europe/Amsterdam). If left empty the
# default value of core/default_timezone will be used
# Example: default_ui_timezone = America/New_York
default_ui_timezone =

# The ip specified when starting the web server
web_server_host = 0.0.0.0

Expand Down Expand Up @@ -273,6 +326,10 @@ worker_refresh_batch_size = 1
# Number of seconds to wait before refreshing a batch of workers.
worker_refresh_interval = 30

# If set to True, Airflow will track files in plugins_folder directory. When it detects changes,
# then reload the gunicorn.
reload_on_plugin_change = False

# Secret key used to run your flask app
# It should be as random as possible
secret_key = temporary_key
Expand Down Expand Up @@ -734,18 +791,30 @@ verify_certs = True
[kubernetes]
# The repository, tag and imagePullPolicy of the Kubernetes Image for the Worker to Run
worker_container_repository =

# Path to the YAML pod file. If set, all other kubernetes-related fields are ignored.
# (This feature is experimental)
pod_template_file =
worker_container_tag =
worker_container_image_pull_policy = IfNotPresent

# If True (default), worker pods will be deleted upon termination
# If True, all worker pods will be deleted upon termination
delete_worker_pods = True

# If False (and delete_worker_pods is True),
# failed worker pods will not be deleted so users can investigate them.
delete_worker_pods_on_failure = False

# Number of Kubernetes Worker Pod creation calls per scheduler loop
worker_pods_creation_batch_size = 1

# The Kubernetes namespace where airflow workers should be created. Defaults to ``default``
namespace = default

# Allows users to launch pods in multiple namespaces.
# Will require creating a cluster-role for the scheduler
multi_namespace_mode = False

# The name of the Kubernetes ConfigMap containing the Airflow Configuration (this file)
# Example: airflow_configmap = airflow-configmap
airflow_configmap =
Expand Down Expand Up @@ -782,6 +851,9 @@ dags_in_image = False
# For either git sync or volume mounted DAGs, the worker will look in this subpath for DAGs
dags_volume_subpath =

# For either git sync or volume mounted DAGs, the worker will mount the volume in this path
dags_volume_mount_point =

# For DAGs mounted via a volume claim (mutually exclusive with git-sync and host path)
dags_volume_claim =

Expand Down Expand Up @@ -810,6 +882,10 @@ env_from_secret_ref =
# Git credentials and repository for DAGs mounted via Git (mutually exclusive with volume claim)
git_repo =
git_branch =

# Use a shallow clone with a history truncated to the specified number of commits.
# 0 - do not use shallow clone.
git_sync_depth = 1
git_subpath =

# The specific rev or hash the git_sync init container will checkout
Expand Down Expand Up @@ -931,10 +1007,18 @@ tolerations =
# Note that if no _request_timeout is specified, the kubernetes client will wait indefinitely
# for kubernetes api responses, which will cause the scheduler to hang.
# The timeout is specified as [connect timeout, read timeout]
kube_client_request_args = {{"_request_timeout" : [60,60] }}
kube_client_request_args =

# Optional keyword arguments to pass to the ``delete_namespaced_pod`` kubernetes client
# ``core_v1_api`` method when using the Kubernetes Executor.
# This should be an object and can contain any of the options listed in the ``v1DeleteOptions``
# class defined here:
# https://github.com/kubernetes-client/python/blob/41f11a09995efcd0142e25946adc7591431bfb2f/kubernetes/client/models/v1_delete_options.py#L19
# Example: delete_option_kwargs = {{"grace_period_seconds": 10}}
delete_option_kwargs =

# Specifies the uid to run the first process of the worker pods containers as
run_as_user =
run_as_user = 50000

# Specifies a gid to associate with all containers in the worker pods
# if using a git_ssh_key_secret_name use an fs_group
Expand Down
8 changes: 4 additions & 4 deletions docker-compose-CeleryExecutor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ services:
# - ./pgdata:/var/lib/postgresql/data/pgdata

webserver:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- postgres
Expand All @@ -43,7 +43,7 @@ services:
retries: 3

flower:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- redis
Expand All @@ -55,7 +55,7 @@ services:
command: flower

scheduler:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- webserver
Expand All @@ -74,7 +74,7 @@ services:
command: scheduler

worker:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- scheduler
Expand Down
2 changes: 1 addition & 1 deletion docker-compose-LocalExecutor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ services:
max-file: "3"

webserver:
image: puckel/docker-airflow:1.10.9
image: puckel/docker-airflow:1.10.12
restart: always
depends_on:
- postgres
Expand Down
6 changes: 3 additions & 3 deletions script/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ wait_for_port() {
echo >&2 "$(date) - $host:$port still not reachable, giving up"
exit 1
fi
echo "$(date) - waiting for $name... $j/$TRY_LOOP"
echo "$(date) - waiting for $name($host:$port)... $j/$TRY_LOOP"
sleep 5
done
}
Expand Down Expand Up @@ -100,8 +100,8 @@ if [ "$AIRFLOW__CORE__EXECUTOR" = "CeleryExecutor" ]; then
else
# Derive useful variables from the AIRFLOW__ variables provided explicitly by the user
REDIS_ENDPOINT=$(echo -n "$AIRFLOW__CELERY__BROKER_URL" | cut -d '/' -f3 | sed -e 's,.*@,,')
REDIS_HOST=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f1)
REDIS_PORT=$(echo -n "$POSTGRES_ENDPOINT" | cut -d ':' -f2)
REDIS_HOST=$(echo -n "$REDIS_ENDPOINT" | cut -d ':' -f1)
REDIS_PORT=$(echo -n "$REDIS_ENDPOINT" | cut -d ':' -f2)
fi

wait_for_port "Redis" "$REDIS_HOST" "$REDIS_PORT"
Expand Down

0 comments on commit 0efc863

Please sign in to comment.