diff --git a/.circleci/config.yml b/.circleci/config.yml
index b5ebaaea5f..9abada0936 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -83,7 +83,7 @@ executors:
         default: "2015-01-01"
       disaster_end_date:
         type: string
-        default: "2015-01-01" 
+        default: "2015-01-01"
     environment:
         POSTGRES_PASSWORD: flowflow
         MPLBACKEND: "agg"
@@ -560,8 +560,8 @@ jobs:
             docker push flowminder/<<parameters.component>>:$CIRCLE_SHA1
 
   run_flowetl_tests:
-    docker:
-      - image: circleci/python:3.7
+    machine:
+      image: circleci/classic:201808-01
     environment:
       AIRFLOW_HOME: ./test_airflow_home
       TESTING: "true"
@@ -569,7 +569,6 @@ jobs:
     steps:
       - checkout:
           path: /home/circleci/project/
-      - setup_remote_docker
       - restore_cache:
           key: flowetl-deps-1-{{ checksum "flowetl/Pipfile.lock"}}
       - run:
@@ -583,7 +582,7 @@ jobs:
           name: run flowetl integration tests
           command: |
             cd flowetl
-            FLOWETL_TAG=$CIRCLE_SHA1 pipenv run pytest --junit-xml=test_results/pytest/results_integration.xml ./tests
+            TAG=$CIRCLE_SHA1 pipenv run pytest --junit-xml=test_results/pytest/results_integration.xml ./tests
       - run:
           name: run etl module unit tests
           command: |
@@ -874,6 +873,7 @@ workflows:
       - run_flowetl_tests:
           requires:
             - build_flowetl
+            - build_flowdb
           <<: *run_always_org_context
       - build_python_wheel:
           name: build_flowclient_wheel
diff --git a/development_environment b/development_environment
index 77cdcd114a..46e68ec77e 100644
--- a/development_environment
+++ b/development_environment
@@ -4,15 +4,15 @@
 
 ## FlowAuth settings
 # Database that stores users and servers, defaults to temporary sqlite
-#SQLALCHEMY_DATABASE_URI 
+#SQLALCHEMY_DATABASE_URI
 # Flask secret key for CSRF protection
-SECRET_KEY=secret 
+SECRET_KEY=secret
 # At rest encryption of server secret keys
-FLOWAUTH_FERNET_KEY="XU-J5xNOtkaUKAoqWT7_VoT3zk2OTuoqKPBN3l0pOFg=" 
+FLOWAUTH_FERNET_KEY="XU-J5xNOtkaUKAoqWT7_VoT3zk2OTuoqKPBN3l0pOFg="
 # Creates demo data when running
 DEMO_MODE=true
 # Required by flask
-FLASK_APP=flowauth 
+FLASK_APP=flowauth
 # Enable/disable flask debug/autoreload
 FLASK_DEBUG=1
 FLOWAUTH_PORT=9091
@@ -22,13 +22,13 @@ FLOWAUTH_LOG_LEVEL=debug
 
 ## FlowAPI settings
 # JWT secret key
-JWT_SECRET_KEY=secret 
+JWT_SECRET_KEY=secret
 # Shouldn't be relevant as serving now done by hypercorn?
-QUART_APP="flowapi.main:create_app()" 
+QUART_APP="flowapi.main:create_app()"
 # Shouldn't be relevant as serving now done by hypercorn?
-QUART_DEBUG=1 
+QUART_DEBUG=1
 # Error & debug log level
-FLOWAPI_LOG_LEVEL=debug 
+FLOWAPI_LOG_LEVEL=debug
 # Flowmachine ZMQ host
 FLOWMACHINE_HOST=localhost
 FLOWAPI_PORT=9090
@@ -38,25 +38,25 @@ FLOWAPI_IDENTIFIER=TEST_SERVER
 # Hostname to connect to flowdb
 FLOWDB_HOST=localhost
 # Port to connect to flowdb
-FLOWDB_PORT=9000 
+FLOWDB_PORT=9000
 
 ## FlowMachine settings
 # To avoid mpl errors
-MPLBACKEND="agg" 
+MPLBACKEND="agg"
 # Password for redis
-REDIS_PASSWORD=fm_redis 
+REDIS_PASSWORD=fm_redis
 # Error and debugging log level
 FLOWMACHINE_LOG_LEVEL=debug
 
 # Number of connections to keep open to flowdb
-#DB_CONNECTION_POOL_SIZE 
+#DB_CONNECTION_POOL_SIZE
 # Number of connections to open in addition if needed
-#DB_CONNECTION_POOL_OVERFLOW 
+#DB_CONNECTION_POOL_OVERFLOW
 
 # Hostname of redis
-REDIS_HOST=localhost 
+REDIS_HOST=localhost
 # Port to connect to redis
-REDIS_PORT=6379 
+REDIS_PORT=6379
 
 ## FlowMachine server settings
 #asyncio debug mode
@@ -67,41 +67,41 @@ FLOWMACHINE_PORT=5555
 
 ## FlowDB Settings
 # Superuser
-POSTGRES_USER=flowdb 
+POSTGRES_USER=flowdb
 # Superuser password
-POSTGRES_PASSWORD=flowflow 
+POSTGRES_PASSWORD=flowflow
 # Flowmachine user username
-FLOWMACHINE_FLOWDB_USER=flowmachine 
+FLOWMACHINE_FLOWDB_USER=flowmachine
 # Flowapi user username
-FLOWAPI_FLOWDB_USER=flowapi 
+FLOWAPI_FLOWDB_USER=flowapi
 # Off by default, enables pldebugger
 FLOWDB_ENABLE_POSTGRES_DEBUG_MODE=False
 # Password for flowmachine user
-FLOWMACHINE_FLOWDB_PASSWORD=foo 
+FLOWMACHINE_FLOWDB_PASSWORD=foo
 # Password for flowapi user
-FLOWAPI_FLOWDB_PASSWORD=foo 
+FLOWAPI_FLOWDB_PASSWORD=foo
 # Size in bytes to limit cache to
 CACHE_SIZE=""
 # Decay rate for cache records
 CACHE_HALF_LIFE=10000
 # Max number of CPUs to use
-#MAX_CPUS 
+#MAX_CPUS
 # Max number of worker processes to use
-#MAX_WORKERS 
+#MAX_WORKERS
 # Max number of worker processes to use per query
-#MAX_WORKERS_PER_GATHER 
+#MAX_WORKERS_PER_GATHER
 # Size of postgres shared memory buffers
 #SHARED_BUFFERS_SIZE
 # Set the postgres 'effective_cache_size' parameter
 #EFFECTIVE_CACHE_SIZE
 # Enable/disable postgres JIT
-#JIT 
+#JIT
 # Default stats target for tables
-#STATS_TARGET 
+#STATS_TARGET
 
 # Synthetic data settings
 # Which data generator to use
-SYNTHETIC_DATA_GENERATOR=sql 
+SYNTHETIC_DATA_GENERATOR=sql
 N_SITES=200
 N_CELLS=1000
 N_SUBSCRIBERS=50000
@@ -133,6 +133,14 @@ FLOWETL_POSTGRES_PASSWORD=flowetl
 FLOWETL_POSTGRES_USER=flowetl
 FLOWETL_POSTGRES_HOST=flowetl_db
 FLOWETL_POSTGRES_DB=flowetl
+FLOWETL_POSTGRES_PORT=9001
+
+MOUNT_HOME=/mounts
+HOST_CONFIG_DIR=./flowetl/mounts/config
+HOST_DUMP_DIR=./flowetl/mounts/dump
+HOST_ARCHIVE_DIR=./flowetl/mounts/archive
+HOST_QUARANTINE_DIR=./flowetl/mounts/quarantine
+HOST_INGEST_DIR=./flowetl/mounts/ingest
 
 # Worked examples
 WORKED_EXAMPLES_PORT=8888
diff --git a/docker-compose.yml b/docker-compose.yml
index b9200c639c..0c322a55de 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -14,7 +14,7 @@ networks:
   flowetl_db:
 
 
-    
+
 services:
 
   flowdb:
@@ -193,15 +193,23 @@ services:
     stdin_open: true
     ports:
       - ${FLOWETL_PORT:?Must set FLOWETL_PORT env var}:8080
+    volumes:
+      - ${HOST_CONFIG_DIR:?Must set HOST_CONFIG_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/config:ro
+      - ${HOST_DUMP_DIR:?Must set HOST_DUMP_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/dump:rw
+      - ${HOST_ARCHIVE_DIR:?Must set HOST_ARCHIVE_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/archive:rw
+      - ${HOST_QUARANTINE_DIR:?Must set HOST_QUARANTINE_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/quarantine:rw
+      - ${HOST_INGEST_DIR:?Must set HOST_INGEST_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/ingest:rw
 
     environment:
       AIRFLOW__CORE__EXECUTOR: LocalExecutor
       AIRFLOW__CORE__SQL_ALCHEMY_CONN: ${SQL_ALCHEMY_CONN:?Must set SQL_ALCHEMY_CONN env var}
+      AIRFLOW_CONN_FLOWDB: postgres://$POSTGRES_USER:$POSTGRES_PASSWORD@flowdb:5432/flowdb
       AIRFLOW__CORE__FERNET_KEY:  ${FLOWETL_FERNET_KEY:?Must set FLOWETL_FERNET_KEY env var}
       POSTGRES_USER: ${FLOWETL_POSTGRES_USER:?Must set FLOWETL_POSTGRES_USER env var}
       POSTGRES_PASSWORD: ${FLOWETL_POSTGRES_PASSWORD:?Must set FLOWETL_POSTGRES_PASSWORD env var}
       POSTGRES_HOST: ${FLOWETL_POSTGRES_HOST:?Must set FLOWETL_POSTGRES_HOST env var}
       POSTGRES_DB: ${FLOWETL_POSTGRES_DB:?Must set FLOWETL_POSTGRES_DB env var}
+      MOUNT_HOME: ${MOUNT_HOME:?Must set MOUNT_HOME env var}
 
     networks:
       - db
@@ -216,8 +224,8 @@ services:
     restart: always
 
     ports:
-      - 5433:5432
-    
+      - ${FLOWETL_POSTGRES_PORT:?Must set FLOWETL_POSTGRES_PORT env var}:5432
+
     environment:
       POSTGRES_USER: ${FLOWETL_POSTGRES_USER:?Must set FLOWETL_POSTGRES_USER env var}
       POSTGRES_PASSWORD: ${FLOWETL_POSTGRES_PASSWORD:?Must set FLOWETL_POSTGRES_PASSWORD env var}
diff --git a/flowdb/sql/04_schema_other.sql b/flowdb/sql/04_schema_other.sql
index bf8084eccf..d73a189102 100644
--- a/flowdb/sql/04_schema_other.sql
+++ b/flowdb/sql/04_schema_other.sql
@@ -69,15 +69,12 @@ Schema used for temp storage during etl.
 
 CREATE SCHEMA IF NOT EXISTS etl;
 
-CREATE TYPE cdrtype AS ENUM ('voice', 'sms', 'mds', 'topups');
-CREATE TYPE etl_status AS ENUM ('in_process', 'done', 'quarantine');
-CREATE TABLE etl.etl (
+CREATE TABLE etl.etl_records (
 	id SERIAL NOT NULL,
-	file_name VARCHAR,
-	cdr_type cdrtype,
+	cdr_type VARCHAR,
 	cdr_date DATE,
-	status etl_status,
-	time_stamp TIMESTAMP WITH TIME ZONE,
+	state VARCHAR,
+	timestamp TIMESTAMP WITH TIME ZONE,
 	PRIMARY KEY (id)
 );
 
diff --git a/flowetl/Pipfile b/flowetl/Pipfile
index d4d883bb31..aecaec1cab 100644
--- a/flowetl/Pipfile
+++ b/flowetl/Pipfile
@@ -5,12 +5,13 @@ verify_ssl = true
 
 [dev-packages]
 black = "==19.3b0"
-apache-airflow = "==1.10.3"
+apache-airflow = {extras = ["postgres"],version = "==1.10.3"}
 pylint = "*"
 pytest = "*"
 docker = "*"
 ipython = "*"
 etl = {editable = true,path = "./etl"}
+pytest-cov = "*"
 
 [packages]
 
diff --git a/flowetl/Pipfile.lock b/flowetl/Pipfile.lock
index 3a0474106a..e8f5262fd1 100644
--- a/flowetl/Pipfile.lock
+++ b/flowetl/Pipfile.lock
@@ -1,7 +1,7 @@
 {
     "_meta": {
         "hash": {
-            "sha256": "c9bac173295496664e2fc55ae75ee8e2b96c7f9604d9bd0c728a8f396fe3801d"
+            "sha256": "77a73b1040d7ed3b315e08413f50ede39822c98b24eda3ea9b9558290bc6c61a"
         },
         "pipfile-spec": 6,
         "requires": {
@@ -24,6 +24,9 @@
             "version": "==0.9.10"
         },
         "apache-airflow": {
+            "extras": [
+                "postgres"
+            ],
             "hashes": [
                 "sha256:73840fa3d2f3a523010ce685aa15714d01a4cf42bd6bd77716c4321f82bab140",
                 "sha256:b90eeed0439c055d3d3eedfb20d1366aeffdb679f8df8685a625883d4bfe5d14"
@@ -125,6 +128,42 @@
             ],
             "version": "==3.5.3"
         },
+        "coverage": {
+            "hashes": [
+                "sha256:3684fabf6b87a369017756b551cef29e505cb155ddb892a7a29277b978da88b9",
+                "sha256:39e088da9b284f1bd17c750ac672103779f7954ce6125fd4382134ac8d152d74",
+                "sha256:3c205bc11cc4fcc57b761c2da73b9b72a59f8d5ca89979afb0c1c6f9e53c7390",
+                "sha256:465ce53a8c0f3a7950dfb836438442f833cf6663d407f37d8c52fe7b6e56d7e8",
+                "sha256:48020e343fc40f72a442c8a1334284620f81295256a6b6ca6d8aa1350c763bbe",
+                "sha256:5296fc86ab612ec12394565c500b412a43b328b3907c0d14358950d06fd83baf",
+                "sha256:5f61bed2f7d9b6a9ab935150a6b23d7f84b8055524e7be7715b6513f3328138e",
+                "sha256:68a43a9f9f83693ce0414d17e019daee7ab3f7113a70c79a3dd4c2f704e4d741",
+                "sha256:6b8033d47fe22506856fe450470ccb1d8ba1ffb8463494a15cfc96392a288c09",
+                "sha256:7ad7536066b28863e5835e8cfeaa794b7fe352d99a8cded9f43d1161be8e9fbd",
+                "sha256:7bacb89ccf4bedb30b277e96e4cc68cd1369ca6841bde7b005191b54d3dd1034",
+                "sha256:839dc7c36501254e14331bcb98b27002aa415e4af7ea039d9009409b9d2d5420",
+                "sha256:8f9a95b66969cdea53ec992ecea5406c5bd99c9221f539bca1e8406b200ae98c",
+                "sha256:932c03d2d565f75961ba1d3cec41ddde00e162c5b46d03f7423edcb807734eab",
+                "sha256:988529edadc49039d205e0aa6ce049c5ccda4acb2d6c3c5c550c17e8c02c05ba",
+                "sha256:998d7e73548fe395eeb294495a04d38942edb66d1fa61eb70418871bc621227e",
+                "sha256:9de60893fb447d1e797f6bf08fdf0dbcda0c1e34c1b06c92bd3a363c0ea8c609",
+                "sha256:9e80d45d0c7fcee54e22771db7f1b0b126fb4a6c0a2e5afa72f66827207ff2f2",
+                "sha256:a545a3dfe5082dc8e8c3eb7f8a2cf4f2870902ff1860bd99b6198cfd1f9d1f49",
+                "sha256:a5d8f29e5ec661143621a8f4de51adfb300d7a476224156a39a392254f70687b",
+                "sha256:aca06bfba4759bbdb09bf52ebb15ae20268ee1f6747417837926fae990ebc41d",
+                "sha256:bb23b7a6fd666e551a3094ab896a57809e010059540ad20acbeec03a154224ce",
+                "sha256:bfd1d0ae7e292105f29d7deaa9d8f2916ed8553ab9d5f39ec65bcf5deadff3f9",
+                "sha256:c62ca0a38958f541a73cf86acdab020c2091631c137bd359c4f5bddde7b75fd4",
+                "sha256:c709d8bda72cf4cd348ccec2a4881f2c5848fd72903c185f363d361b2737f773",
+                "sha256:c968a6aa7e0b56ecbd28531ddf439c2ec103610d3e2bf3b75b813304f8cb7723",
+                "sha256:df785d8cb80539d0b55fd47183264b7002077859028dfe3070cf6359bf8b2d9c",
+                "sha256:f406628ca51e0ae90ae76ea8398677a921b36f0bd71aab2099dfed08abd0322f",
+                "sha256:f46087bbd95ebae244a0eda01a618aff11ec7a069b15a3ef8f6b520db523dcf1",
+                "sha256:f8019c5279eb32360ca03e9fac40a12667715546eed5c5eb59eb381f2f501260",
+                "sha256:fc5f4d209733750afd2714e9109816a29500718b32dd9a5db01c0cb3a019b96a"
+            ],
+            "version": "==4.5.3"
+        },
         "croniter": {
             "hashes": [
                 "sha256:0d905dbe6f131a910fd3dde792f0129788cd2cb3a8048c5f7aaa212670b0cef2",
@@ -591,6 +630,41 @@
             ],
             "version": "==5.6.3"
         },
+        "psycopg2": {
+            "hashes": [
+                "sha256:02445ebbb3a11a3fe8202c413d5e6faf38bb75b4e336203ee144ca2c46529f94",
+                "sha256:0e9873e60f98f0c52339abf8f0339d1e22bfe5aae0bcf7aabd40c055175035ec",
+                "sha256:1148a5eb29073280bf9057c7fc45468592c1bb75a28f6df1591adb93c8cb63d0",
+                "sha256:259a8324e109d4922b0fcd046e223e289830e2568d6f4132a3702439e5fd532b",
+                "sha256:28dffa9ed4595429e61bacac41d3f9671bb613d1442ff43bcbec63d4f73ed5e8",
+                "sha256:314a74302d4737a3865d40ea50e430ce1543c921ba10f39d562e807cfe2edf2a",
+                "sha256:36b60201b6d215d7658a71493fdf6bd5e60ad9a0cffed39906627ff9f4f3afd3",
+                "sha256:3f9d532bce54c4234161176ff3b8688ff337575ca441ea27597e112dfcd0ee0c",
+                "sha256:5d222983847b40af989ad96c07fc3f07e47925e463baa5de716be8f805b41d9b",
+                "sha256:6757a6d2fc58f7d8f5d471ad180a0bd7b4dd3c7d681f051504fbea7ae29c8d6f",
+                "sha256:6a0e0f1e74edb0ab57d89680e59e7bfefad2bfbdf7c80eb38304d897d43674bb",
+                "sha256:6ca703ccdf734e886a1cf53eb702261110f6a8b0ed74bcad15f1399f74d3f189",
+                "sha256:8513b953d8f443c446aa79a4cc8a898bd415fc5e29349054f03a7d696d495542",
+                "sha256:9262a5ce2038570cb81b4d6413720484cb1bc52c064b2f36228d735b1f98b794",
+                "sha256:97441f851d862a0c844d981cbee7ee62566c322ebb3d68f86d66aa99d483985b",
+                "sha256:a07feade155eb8e69b54dd6774cf6acf2d936660c61d8123b8b6b1f9247b67d6",
+                "sha256:a9b9c02c91b1e3ec1f1886b2d0a90a0ea07cc529cb7e6e472b556bc20ce658f3",
+                "sha256:ae88216f94728d691b945983140bf40d51a1ff6c7fe57def93949bf9339ed54a",
+                "sha256:b360ffd17659491f1a6ad7c928350e229c7b7bd83a2b922b6ee541245c7a776f",
+                "sha256:b4221957ceccf14b2abdabef42d806e791350be10e21b260d7c9ce49012cc19e",
+                "sha256:b90758e49d5e6b152a460d10b92f8a6ccf318fcc0ee814dcf53f3a6fc5328789",
+                "sha256:c669ea986190ed05fb289d0c100cc88064351f2b85177cbfd3564c4f4847d18c",
+                "sha256:d1b61999d15c79cf7f4f7cc9021477aef35277fc52452cf50fd13b713c84424d",
+                "sha256:de7bb043d1adaaf46e38d47e7a5f703bb3dab01376111e522b07d25e1a79c1e1",
+                "sha256:e393568e288d884b94d263f2669215197840d097c7e5b0acd1a51c1ea7d1aba8",
+                "sha256:ed7e0849337bd37d89f2c2b0216a0de863399ee5d363d31b1e5330a99044737b",
+                "sha256:f153f71c3164665d269a5d03c7fa76ba675c7a8de9dc09a4e2c2cdc9936a7b41",
+                "sha256:f1fb5a8427af099beb7f65093cbdb52e021b8e6dbdfaf020402a623f4181baf5",
+                "sha256:f36b333e9f86a2fba960c72b90c34be6ca71819e300f7b1fc3d2b0f0b2c546cd",
+                "sha256:f4526d078aedd5187d0508aa5f9a01eae6a48a470ed678406da94b4cd6524b7e"
+            ],
+            "version": "==2.7.7"
+        },
         "ptyprocess": {
             "hashes": [
                 "sha256:923f299cc5ad920c68f2bc0bc98b75b9f838b93b599941a6b63ddbc2476394c0",
@@ -635,6 +709,14 @@
             "index": "pypi",
             "version": "==4.6.3"
         },
+        "pytest-cov": {
+            "hashes": [
+                "sha256:2b097cde81a302e1047331b48cadacf23577e431b61e9c6f49a1170bbe3d3da6",
+                "sha256:e00ea4fdde970725482f1f35630d12f074e121a23801aabf2ae154ec6bdd343a"
+            ],
+            "index": "pypi",
+            "version": "==2.7.1"
+        },
         "python-daemon": {
             "hashes": [
                 "sha256:261c859be5c12ae7d4286dc6951e87e9e1a70a882a8b41fd926efc1ec4214f73",
diff --git a/flowetl/dags/etl.py b/flowetl/dags/etl.py
index 7087346650..02344ef716 100644
--- a/flowetl/dags/etl.py
+++ b/flowetl/dags/etl.py
@@ -9,26 +9,47 @@
 import logging
 import os
 
+from pathlib import Path
+
 # need to import and not use so that airflow looks here for a DAG
 from airflow import DAG  # pylint: disable=unused-import
 
 from pendulum import parse
 from etl.dag_task_callable_mappings import (
-    TEST_TASK_CALLABLES,
-    PRODUCTION_TASK_CALLABLES,
+    TEST_ETL_TASK_CALLABLES,
+    PRODUCTION_ETL_TASK_CALLABLES,
 )
-from etl.etl_utils import construct_etl_dag
+from etl.etl_utils import construct_etl_dag, CDRType
+from etl.config_parser import validate_config, get_config_from_file
 
 default_args = {"owner": "flowminder", "start_date": parse("1900-01-01")}
 
 # Determine if we are in a testing environment - use dummy callables if so
 if os.environ.get("TESTING", "") == "true":
-    task_callable_mapping = TEST_TASK_CALLABLES
+    task_callable_mapping = TEST_ETL_TASK_CALLABLES
     logging.info("running in testing environment")
+    dag = construct_etl_dag(
+        task_callable_mapping=task_callable_mapping,
+        default_args=default_args,
+        cdr_type="testing",
+    )
 else:
-    task_callable_mapping = PRODUCTION_TASK_CALLABLES
+    task_callable_mapping = PRODUCTION_ETL_TASK_CALLABLES
     logging.info("running in production environment")
 
-dag = construct_etl_dag(
-    task_callable_mapping=task_callable_mapping, default_args=default_args
-)
+    # read and validate the config file before creating the DAGs
+    global_config_dict = get_config_from_file(
+        config_filepath=os.environ["MOUNT_HOME"] / Path("config/config.yml")
+    )
+    validate_config(global_config_dict=global_config_dict)
+
+    default_args = global_config_dict["default_args"]
+
+    # create DAG for each cdr_type
+    for cdr_type in CDRType:
+
+        globals()[f"etl_{cdr_type}"] = construct_etl_dag(
+            task_callable_mapping=task_callable_mapping,
+            default_args=default_args,
+            cdr_type=cdr_type,
+        )
diff --git a/flowetl/dags/etl_sensor.py b/flowetl/dags/etl_sensor.py
index bdd292a141..e2a9d6d99e 100644
--- a/flowetl/dags/etl_sensor.py
+++ b/flowetl/dags/etl_sensor.py
@@ -3,32 +3,27 @@
 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
 
 # -*- coding: utf-8 -*-
-"""
-Skeleton specification for ETL sensor DAG
-"""
 import logging
-import uuid
+import os
 
 from airflow import DAG
-from airflow.models import DagRun
-from airflow.operators.python_operator import PythonOperator
-from airflow.api.common.experimental.trigger_dag import trigger_dag
+from pendulum import parse
 
-from pendulum import now, parse
+from etl.etl_utils import construct_etl_sensor_dag
+from etl.dag_task_callable_mappings import (
+    TEST_ETL_SENSOR_TASK_CALLABLE,
+    PRODUCTION_ETL_SENSOR_TASK_CALLABLE,
+)
 
 default_args = {"owner": "flowminder", "start_date": parse("1900-01-01")}
 
-# pylint: disable=unused-argument
-def dummy_trigger_callable(*, dag_run: DagRun, **kwargs):
-    """
-    Dummy callable that triggers ETL dag
-    """
-    logging.info(dag_run)
-    trigger_dag("etl", run_id=str(uuid.uuid1()), execution_date=now())
-
-
-with DAG(dag_id="etl_sensor", schedule_interval=None, default_args=default_args) as dag:
-
-    sense = PythonOperator(
-        task_id="sense", python_callable=dummy_trigger_callable, provide_context=True
+if os.environ.get("TESTING", "") == "true":
+    logging.info("running in testing environment")
+    dag = construct_etl_sensor_dag(
+        callable=TEST_ETL_SENSOR_TASK_CALLABLE, default_args=default_args
+    )
+else:
+    logging.info("running in production environment")
+    dag = construct_etl_sensor_dag(
+        callable=PRODUCTION_ETL_SENSOR_TASK_CALLABLE, default_args=default_args
     )
diff --git a/flowetl/etl/etl/config_constant.py b/flowetl/etl/etl/config_constant.py
new file mode 100644
index 0000000000..a593926cac
--- /dev/null
+++ b/flowetl/etl/etl/config_constant.py
@@ -0,0 +1,23 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# -*- coding: utf-8 -*-
+"""
+Canonical specification of sample config
+"""
+
+calls_config = {"pattern": r"CALLS_(\d{4}[01]\d[0123]\d).csv.gz", "concurrency": 4}
+sms_config = {"pattern": r"SMS_(\d{4}[01]\d[0123]\d).csv.gz", "concurrency": 4}
+mds_config = {"pattern": r"MDS_(\d{4}[01]\d[0123]\d).csv.gz", "concurrency": 4}
+topups_config = {"pattern": r"TOPUPS_(\d{4}[01]\d[0123]\d).csv.gz", "concurrency": 4}
+
+config = {
+    "default_args": {"owner": "flowminder", "start_date": "1900-01-01"},
+    "etl": {
+        "calls": calls_config,
+        "sms": sms_config,
+        "mds": mds_config,
+        "topups": topups_config,
+    },
+}
diff --git a/flowetl/etl/etl/config_parser.py b/flowetl/etl/etl/config_parser.py
new file mode 100644
index 0000000000..2ec032937f
--- /dev/null
+++ b/flowetl/etl/etl/config_parser.py
@@ -0,0 +1,72 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# -*- coding: utf-8 -*-
+"""
+functions used for parsing global config
+"""
+import yaml
+
+from pathlib import Path
+
+from etl.etl_utils import CDRType
+
+
+def validate_config(*, global_config_dict: dict) -> Exception:
+    """
+    Function used to validate the config.yml file. Makes sure we
+    have entries for each CDR type in CDRType enum and that each
+    entry has expected information. Either raises Exceptions or
+    passes silently.
+
+    Parameters
+    ----------
+    global_config_dict : dict
+        dict containing global config for ETL
+    """
+    keys = global_config_dict.keys()
+
+    exceptions = []
+    if "etl" not in keys:
+        exceptions.append(ValueError("etl must be a toplevel key in the config file"))
+
+    if "default_args" not in keys:
+        exceptions.append(
+            ValueError("default_args must be a toplevel key in the config file")
+        )
+
+    etl_keys = global_config_dict.get("etl", {}).keys()
+    if etl_keys != CDRType._value2member_map_.keys():
+        exceptions.append(
+            ValueError(f"etl section must contain subsections for {list(CDRType)}")
+        )
+
+    for key, value in global_config_dict.get("etl", {}).items():
+        if set(list(value.keys())) != set(["pattern", "concurrency"]):
+            exc_msg = f"""
+                    Each etl subsection must contain a pattern and concurrency
+                    subsection - not present for {key}.
+            """
+            exceptions.append(ValueError(exc_msg))
+
+    if exceptions != []:
+        raise ValueError(exceptions)
+
+
+def get_config_from_file(*, config_filepath: Path) -> dict:
+    """
+    Function used to load configuration from YAML file.
+
+    Parameters
+    ----------
+    config_filepath : Path
+        Location of the file config.yml
+
+    Returns
+    -------
+    dict
+        Yaml config loaded into a python dict
+    """
+    content = open(config_filepath, "r").read()
+    return yaml.load(content, Loader=yaml.FullLoader)
diff --git a/flowetl/etl/etl/dag_task_callable_mappings.py b/flowetl/etl/etl/dag_task_callable_mappings.py
index 700bafefc6..5a7e75cb3f 100644
--- a/flowetl/etl/etl/dag_task_callable_mappings.py
+++ b/flowetl/etl/etl/dag_task_callable_mappings.py
@@ -7,11 +7,43 @@
 Mapping task id to a python callable. Allows for the specification of a set of
 dummy callables to be used for testing.
 """
-from etl.dummy_task_callables import dummy__callable, dummy_failing__callable
-from etl.production_task_callables import success_branch__callable
+import os
+from functools import partial
+from pathlib import Path
+
+from airflow.hooks.postgres_hook import PostgresHook
+
+from etl.config_parser import get_config_from_file
+from etl.dummy_task_callables import (
+    dummy__callable,
+    dummy_failing__callable,
+    dummy_trigger__callable,
+)
+from etl.production_task_callables import (
+    move_file_and_record_ingestion_state__callable,
+    render_and_run_sql__callable,
+    success_branch__callable,
+    trigger__callable,
+)
+
+mount_paths = {
+    "dump": Path(f"{os.environ.get('MOUNT_HOME','')}/dump"),
+    "ingest": Path(f"{os.environ.get('MOUNT_HOME','')}/ingest"),
+    "archive": Path(f"{os.environ.get('MOUNT_HOME','')}/archive"),
+    "quarantine": Path(f"{os.environ.get('MOUNT_HOME','')}/quarantine"),
+}
+
+db_hook = PostgresHook(postgres_conn_id="flowdb")
+config_path = Path(f"{os.environ.get('MOUNT_HOME','')}/config")
+try:
+    config = get_config_from_file(config_filepath=config_path / "config.yml")
+except FileNotFoundError:
+    # If we are testing then there will be no config file and we
+    # don't actually need any!
+    config = {}
 
 # callables to be used when testing the structure of the ETL DAG
-TEST_TASK_CALLABLES = {
+TEST_ETL_TASK_CALLABLES = {
     "init": dummy__callable,
     "extract": dummy__callable,
     "transform": dummy__callable,
@@ -24,14 +56,58 @@
 }
 
 # callables to be used in production
-PRODUCTION_TASK_CALLABLES = {
-    "init": dummy__callable,
-    "extract": dummy__callable,
-    "transform": dummy__callable,
-    "load": dummy__callable,
+PRODUCTION_ETL_TASK_CALLABLES = {
+    "init": partial(
+        move_file_and_record_ingestion_state__callable,
+        mount_paths=mount_paths,
+        from_dir="dump",
+        to_dir="ingest",
+    ),
+    "extract": partial(
+        render_and_run_sql__callable,
+        db_hook=db_hook,
+        config_path=config_path,
+        template_name="extract",
+    ),
+    "transform": partial(
+        render_and_run_sql__callable,
+        db_hook=db_hook,
+        config_path=config_path,
+        template_name="transform",
+    ),
+    "load": partial(
+        render_and_run_sql__callable,
+        db_hook=db_hook,
+        config_path=config_path,
+        template_name="load",
+        fixed_sql=True,
+    ),
     "success_branch": success_branch__callable,
-    "archive": dummy__callable,
-    "quarantine": dummy__callable,
-    "clean": dummy__callable,
+    "archive": partial(
+        move_file_and_record_ingestion_state__callable,
+        mount_paths=mount_paths,
+        from_dir="ingest",
+        to_dir="archive",
+    ),
+    "quarantine": partial(
+        move_file_and_record_ingestion_state__callable,
+        mount_paths=mount_paths,
+        from_dir="ingest",
+        to_dir="quarantine",
+    ),
+    "clean": partial(
+        render_and_run_sql__callable,
+        db_hook=db_hook,
+        config_path=config_path,
+        template_name="clean",
+        fixed_sql=True,
+    ),
     "fail": dummy_failing__callable,
 }
+
+TEST_ETL_SENSOR_TASK_CALLABLE = dummy_trigger__callable
+PRODUCTION_ETL_SENSOR_TASK_CALLABLE = partial(
+    trigger__callable,
+    dump_path=mount_paths["dump"],
+    cdr_type_config=config.get("etl", {}),
+)
diff --git a/flowetl/etl/etl/dummy_task_callables.py b/flowetl/etl/etl/dummy_task_callables.py
index e88e3fc3cf..5927f6d0c1 100644
--- a/flowetl/etl/etl/dummy_task_callables.py
+++ b/flowetl/etl/etl/dummy_task_callables.py
@@ -10,7 +10,10 @@
 import os
 import logging
 
+from uuid import uuid1
+from pendulum import utcnow
 from airflow.models import DagRun, TaskInstance
+from airflow.api.common.experimental.trigger_dag import trigger_dag
 
 # pylint: disable=unused-argument
 def dummy__callable(*, dag_run: DagRun, task_instance: TaskInstance, **kwargs):
@@ -20,7 +23,6 @@ def dummy__callable(*, dag_run: DagRun, task_instance: TaskInstance, **kwargs):
     silently.
     """
     logging.info(dag_run)
-    logging.info(kwargs)
     if os.environ.get("TASK_TO_FAIL", "") == task_instance.task_id:
         raise Exception
 
@@ -31,3 +33,12 @@ def dummy_failing__callable(*, dag_run: DagRun, **kwargs):
     """
     logging.info(dag_run)
     raise Exception
+
+
+def dummy_trigger__callable(*, dag_run: DagRun, **kwargs):
+    """
+    In test env we just want to trigger the etl_testing DAG with
+    no config.
+    """
+    logging.info(dag_run)
+    trigger_dag("etl_testing", run_id=str(uuid1()), execution_date=utcnow())
diff --git a/flowetl/etl/etl/etl_utils.py b/flowetl/etl/etl/etl_utils.py
index 27c36a615a..3101023b15 100644
--- a/flowetl/etl/etl/etl_utils.py
+++ b/flowetl/etl/etl/etl_utils.py
@@ -6,13 +6,57 @@
 """
 Contains utility functions for use in the ETL dag and it's callables
 """
+import re
+import os
+
+from uuid import UUID
+from typing import List, Callable
 from enum import Enum
+from pathlib import Path
+from pendulum import parse
+from pendulum.date import Date as pendulumDate
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
 
 from airflow import DAG
+from airflow.hooks.postgres_hook import PostgresHook
 from airflow.operators.python_operator import BranchPythonOperator, PythonOperator
 
+from etl import model
+
+
+def construct_etl_sensor_dag(*, callable: Callable, default_args: dict) -> DAG:
+    """
+    This function constructs the sensor single task DAG that triggers ETL
+    DAGS with correct config based on filename.
+
+    Parameters
+    ----------
+    callable : Callable
+        The sense callable that deals with finding files and triggering
+        ETL DAGs
+    default_args : dict
+        Default arguments for DAG
 
-def construct_etl_dag(*, task_callable_mapping: dict, default_args: dict) -> DAG:
+    Returns
+    -------
+    DAG
+        Airflow DAG
+    """
+    with DAG(
+        dag_id=f"etl_sensor", schedule_interval=None, default_args=default_args
+    ) as dag:
+        sense = PythonOperator(
+            task_id="sense", python_callable=callable, provide_context=True
+        )
+
+    return dag
+
+
+def construct_etl_dag(
+    *, task_callable_mapping: dict, default_args: dict, cdr_type: str
+) -> DAG:
     """
     This function returns an Airflow DAG object of the structure
     required for ETL. By passing a dictionary mapping task ID to
@@ -39,6 +83,8 @@ def construct_etl_dag(*, task_callable_mapping: dict, default_args: dict) -> DAG
         A set of default args to pass to all callables.
         Must containt at least "owner" key and "start" key (which must be a
         pendulum date object)
+    cdr_type : str
+        The type of CDR that this ETL DAG will process.
 
     Returns
     -------
@@ -69,7 +115,9 @@ def construct_etl_dag(*, task_callable_mapping: dict, default_args: dict) -> DAG
     if set(task_callable_mapping.keys()) != expected_keys:
         raise TypeError("task_callable_mapping argument does not contain correct keys")
 
-    with DAG(dag_id="etl", schedule_interval=None, default_args=default_args) as dag:
+    with DAG(
+        dag_id=f"etl_{cdr_type}", schedule_interval=None, default_args=default_args
+    ) as dag:
 
         init = PythonOperator(
             task_id="init",
@@ -128,14 +176,37 @@ def construct_etl_dag(*, task_callable_mapping: dict, default_args: dict) -> DAG
     return dag
 
 
-def get_session():
+def get_session(*, postgres_conn_id="flowdb"):
     """
-    Dummy for now will get us a session to flowdb
+    Constructs a sqlalchmy session for use with
+    the ETLRecord model in flowdb. Can construct
+    connection to other DB's specified by
+    postgres_conn_id and reflected in the ENV by
+    variables of the form AIRFLOW_CONN_${postgres_conn_id}
+
+    Parameters
+    ----------
+    postgres_conn_id : str, optional
+        The ID of a connection known to airflow.
+        Should exist as an ENV var of the form
+        AIRFLOW_CONN_${postgres_conn_id}.
+        default "flowdb"
     """
-    return "session"
+    conn_env_var = f"AIRFLOW_CONN_{postgres_conn_id.upper()}"
+    if conn_env_var not in os.environ:
+        raise ValueError(f"{conn_env_var} not set")
+
+    engine = create_engine(
+        "postgresql://", creator=PostgresHook(postgres_conn_id).get_conn
+    )
+    return sessionmaker(bind=engine)()
 
 
 class CDRType(str, Enum):
+    """
+    CDR type enum
+    """
+
     CALLS = "calls"
     SMS = "sms"
     MDS = "mds"
@@ -143,6 +214,171 @@ class CDRType(str, Enum):
 
 
 class State(str, Enum):
+    """
+    ETL state enum
+    """
+
     INGEST = "ingest"
     ARCHIVE = "archive"
     QUARANTINE = "quarantine"
+
+
+def find_files(*, dump_path: Path, ignore_filenames=["README.md"]) -> List[Path]:
+    """
+    Returns a list of Path objects for all files
+    found in the dump location.
+
+    Parameters
+    ----------
+    dump_path : Path
+        The location of the dump path
+
+    ignore_filenames : Path
+        List of filenames to ignore
+
+    Returns
+    -------
+    List[Path]
+        List of files found
+    """
+    files = filter(lambda file: file.name not in ignore_filenames, dump_path.glob("*"))
+    return list(files)
+
+
+def filter_files(*, found_files: List, cdr_type_config: dict):
+    """
+    Takes a list of files and filters them based on two
+    factors;
+    1. Does the filename match any CDR type pattern if not remove
+    2. Has the file been successfully ingested if so remove
+
+    Parameters
+    ----------
+    found_files : List
+        List of found files should be Path objects
+    cdr_type_config : dict
+        config dict containing patterns for
+        each cdr type
+
+
+    Returns
+    -------
+    List
+        Files that can be processed by ETL DAG
+    """
+    filtered_files = []
+    for file in found_files:
+        try:
+            # try to parse file name
+            parsed_file_name_config = parse_file_name(
+                file_name=file.name, cdr_type_config=cdr_type_config
+            )
+        except ValueError:
+            # couldnt parse moving on
+            continue
+
+        cdr_type = parsed_file_name_config["cdr_type"]
+        cdr_date = parsed_file_name_config["cdr_date"]
+
+        session = get_session()
+        if model.ETLRecord.can_process(
+            cdr_type=cdr_type, cdr_date=cdr_date, session=session
+        ):
+            filtered_files.append(file)
+
+    return filtered_files
+
+
+def get_config(*, file_name: str, cdr_type_config: dict) -> dict:
+    """
+    Create DAG config that is based on filename
+
+    Parameters
+    ----------
+    file_name : str
+        name of file to construct config for
+    cdr_type_config : dict
+        config dict containing patterns for
+        each cdr type
+
+    Returns
+    -------
+    dict
+        Dictionary with config for this filename
+    """
+    parsed_file_name_config = parse_file_name(
+        file_name=file_name, cdr_type_config=cdr_type_config
+    )
+    template_path = f"etl/{parsed_file_name_config['cdr_type']}"
+    other_config = {"file_name": file_name, "template_path": template_path}
+    return {**parsed_file_name_config, **other_config}
+
+
+def generate_table_names(
+    *, cdr_type: CDRType, cdr_date: pendulumDate, uuid: UUID
+) -> dict:
+    """
+    Generates table names for the various stages of the ETL process.
+
+    Parameters
+    ----------
+    cdr_type : CDRType
+        The type of CDR we are dealing with - used to
+        construct table name for the data's final
+        resting place.
+    cdr_date: pendulumDate
+        The date associated to this files data
+    uuid : UUID
+        A uuid to be used in generating table names
+
+    Returns
+    -------
+    dict
+        [description]
+    """
+    uuid_sans_underscore = str(uuid).replace("-", "")
+
+    extract_table = f"etl.x{uuid_sans_underscore}"
+    transform_table = f"etl.t{uuid_sans_underscore}"
+    load_table = f"events.{cdr_type}_{str(cdr_date.date()).replace('-','')}"
+
+    return {
+        "extract_table": extract_table,
+        "transform_table": transform_table,
+        "load_table": load_table,
+    }
+
+
+def parse_file_name(*, file_name: str, cdr_type_config: dict) -> dict:
+    """
+    Function to parse date of data and cdr type from filename.
+    Makes use of patterns specified in global config.
+
+    Parameters
+    ----------
+    file_name : str
+        The file name to parse
+    cdr_type_config : dict
+        The config for each CDR type which contains
+        patterns to match against.
+
+    Returns
+    -------
+    dict
+        contains files cdr type and the date associated
+        to the data
+    """
+    file_cdr_type, file_cdr_date = None, None
+    for cdr_type in CDRType:
+        pattern = cdr_type_config[cdr_type]["pattern"]
+        m = re.fullmatch(pattern, file_name)
+        if m:
+            file_cdr_type = cdr_type
+            file_cdr_date = parse(m.groups()[0])
+
+    if file_cdr_type and file_cdr_date:
+        parsed_file_info = {"cdr_type": file_cdr_type, "cdr_date": file_cdr_date}
+    else:
+        raise ValueError("No pattern match found")
+
+    return parsed_file_info
diff --git a/flowetl/etl/etl/model.py b/flowetl/etl/etl/model.py
index 50375fdb3a..e0c8f12763 100644
--- a/flowetl/etl/etl/model.py
+++ b/flowetl/etl/etl/model.py
@@ -14,7 +14,7 @@
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm.session import Session
 
-from etl.etl_utils import CDRType, State
+from etl import etl_utils
 
 Base = declarative_base()
 
@@ -32,13 +32,13 @@ class ETLRecord(Base):
     cdr_type = Column(String)
     cdr_date = Column(Date)
     state = Column(String)
-    timestamp = Column(DateTime)
+    timestamp = Column(DateTime(timezone=True))
 
     def __init__(self, *, cdr_type: str, cdr_date: pendulumDate, state: str):
 
-        self.cdr_type = CDRType(cdr_type)
+        self.cdr_type = etl_utils.CDRType(cdr_type)
         self.cdr_date = cdr_date
-        self.state = State(state)
+        self.state = etl_utils.State(state)
         self.timestamp = pendulum.utcnow()
 
     @classmethod
@@ -63,3 +63,38 @@ def set_state(
         row = cls(cdr_type=cdr_type, cdr_date=cdr_date, state=state)
         session.add(row)
         session.commit()
+
+    @classmethod
+    def can_process(cls, *, cdr_type: str, cdr_date: pendulumDate, session: Session):
+        """
+        Method that determines if a given cdr_type, cdr_date pair is ok to process.
+        If we have never seen the pair then should process or if pair has been seen but
+        its current state is quarantine.
+
+        Parameters
+        ----------
+        cdr_type : str
+            The type of the CDR data
+        cdr_date : pendulumDate
+            The date of the CDR data
+        session : Session
+            A sqlalchemy session for a DB in which this model exists.
+
+        Returns
+        -------
+        bool
+            OK to process the pair?
+        """
+        res = (
+            session.query(cls)
+            .filter(cls.cdr_type == cdr_type, cls.cdr_date == cdr_date)
+            .order_by(cls.timestamp.desc())
+            .first()
+        )
+
+        if (res is None) or (res.state == "quarantine"):
+            process = True
+        else:
+            process = False
+
+        return process
diff --git a/flowetl/etl/etl/production_task_callables.py b/flowetl/etl/etl/production_task_callables.py
index 2ee4b7df0c..f90efc6616 100644
--- a/flowetl/etl/etl/production_task_callables.py
+++ b/flowetl/etl/etl/production_task_callables.py
@@ -10,12 +10,21 @@
 import shutil
 
 from pathlib import Path
+from uuid import uuid1
+from pendulum import utcnow
 
 from airflow.models import DagRun, BaseOperator
 from airflow.hooks.dbapi_hook import DbApiHook
+from airflow.api.common.experimental.trigger_dag import trigger_dag
 
 from etl.model import ETLRecord
-from etl.etl_utils import get_session
+from etl.etl_utils import (
+    get_session,
+    find_files,
+    filter_files,
+    get_config,
+    generate_table_names,
+)
 
 # pylint: disable=unused-argument
 def render_and_run_sql__callable(
@@ -25,6 +34,7 @@ def render_and_run_sql__callable(
     db_hook: DbApiHook,
     config_path: Path,
     template_name: str,
+    fixed_sql=False,
     **kwargs,
 ):
     """
@@ -48,16 +58,22 @@ def render_and_run_sql__callable(
         The file name sans .sql that we wish to template. Most likely the
         same as the task_id.
     """
-    # dag_run.conf["template_path"] -> where the sql templates
-    # for this dag run live. Determined by the type of the CDR
-    # this dag is ingesting. If this is voice then template_path
-    # will be 'etl/voice'.
-    template_path = config_path / dag_run.conf["template_path"]
-
-    # template name matches the task_id this is being used
-    # in. If this is the transform task then it will be 'transform'
-    # and thus the template we use will be 'etl/voice/transform.sql'
-    template_path = template_path / f"{template_name}.sql"
+    if fixed_sql:
+        # for clean and load the sql used will always be the same
+        # so here we just read that fixed file...
+        template_path = config_path / f"fixed_sql/{template_name}.sql"
+    else:
+        # dag_run.conf["template_path"] -> where the sql templates
+        # for this dag run live. Determined by the type of the CDR
+        # this dag is ingesting. If this is voice then template_path
+        # will be 'etl/voice'.
+        template_path = config_path / dag_run.conf["template_path"]
+
+        # template name matches the task_id this is being used
+        # in. If this is the transform task then it will be 'transform'
+        # and thus the template we use will be 'etl/voice/transform.sql'
+        template_path = template_path / f"{template_name}.sql"
+
     template = open(template_path).read()
 
     # make use of the operator's templating functionality
@@ -130,3 +146,57 @@ def success_branch__callable(*, dag_run: DagRun, **kwargs):
         branch = "archive"
 
     return branch
+
+
+def trigger__callable(
+    *, dag_run: DagRun, dump_path: Path, cdr_type_config: dict, **kwargs
+):
+    """
+    Function that determines which files in dump should be processed
+    and triggers the correct ETL dag with config based on filename.
+
+    Parameters
+    ----------
+    dag_run : DagRun
+        Passed as part of the Dag context - contains the config.
+    dump_path : Path
+        Location of dump directory
+    cdr_type_config : dict
+        ETL config for each cdr type
+    """
+
+    found_files = find_files(dump_path=dump_path)
+    logging.info(found_files)
+
+    # remove files that either do not match a pattern
+    # or have been processed successfully allready...
+    filtered_files = filter_files(
+        found_files=found_files, cdr_type_config=cdr_type_config
+    )
+    logging.info(filtered_files)
+
+    # what to do with these!?
+    bad_files = list(set(found_files) - set(filtered_files))
+    logging.info(bad_files)
+
+    configs = [
+        (file, get_config(file_name=file.name, cdr_type_config=cdr_type_config))
+        for file in filtered_files
+    ]
+    logging.info(configs)
+
+    for file, config in configs:
+
+        cdr_type = config["cdr_type"]
+        cdr_date = config["cdr_date"]
+        uuid = uuid1()
+        table_names = generate_table_names(
+            cdr_type=cdr_type, cdr_date=cdr_date, uuid=uuid
+        )
+        trigger_dag(
+            f"etl_{cdr_type}",
+            execution_date=utcnow(),
+            run_id=f"{file.name}-{str(uuid)}",
+            conf={**config, **table_names},
+            replace_microseconds=False,
+        )
diff --git a/flowetl/etl/setup.py b/flowetl/etl/setup.py
index 6fe96d52ec..8544230c6a 100644
--- a/flowetl/etl/setup.py
+++ b/flowetl/etl/setup.py
@@ -13,5 +13,5 @@
     include_package_data=True,
     zip_safe=False,
     # pinning airflow version so that it is the same as in the Dockerfile
-    install_requires=["apache-airflow==1.10.3"],
+    install_requires=["apache-airflow[postgres]==1.10.3"],
 )
diff --git a/flowetl/etl/tests/conftest.py b/flowetl/etl/tests/conftest.py
index 754b55c747..38bcd87bb8 100644
--- a/flowetl/etl/tests/conftest.py
+++ b/flowetl/etl/tests/conftest.py
@@ -28,8 +28,9 @@ class FakeTaskInstance:
     A fake TaskInstance object
     """
 
-    def __init__(self, task_id):
+    def __init__(self, task_id=None, state=None):
         self.task_id = task_id
+        self.state = state
 
 
 @pytest.fixture(scope="function")
@@ -52,8 +53,8 @@ def create_fake_task_instance():
     with specific task_id
     """
 
-    def fake_task_instance(*, task_id):
-        return FakeTaskInstance(task_id=task_id)
+    def fake_task_instance(*, task_id=None, state=None):
+        return FakeTaskInstance(task_id=task_id, state=state)
 
     return fake_task_instance
 
diff --git a/flowetl/etl/tests/test_config.py b/flowetl/etl/tests/test_config.py
new file mode 100644
index 0000000000..c97125127c
--- /dev/null
+++ b/flowetl/etl/tests/test_config.py
@@ -0,0 +1,236 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# -*- coding: utf-8 -*-
+"""
+Tests for configuration parsing
+"""
+import pytest
+import yaml
+
+from copy import deepcopy
+from pathlib import Path
+from unittest.mock import Mock, patch
+from uuid import uuid1
+from pendulum import parse
+
+from etl.model import ETLRecord
+from etl.config_constant import config
+from etl.config_parser import validate_config, get_config_from_file
+from etl.etl_utils import (
+    CDRType,
+    find_files,
+    generate_table_names,
+    parse_file_name,
+    filter_files,
+)
+
+
+def test_config_validation():
+    """
+    Check that with valid config dict we get no exception
+    """
+    validate_config(global_config_dict=config)
+
+
+def test_config_validation_fails_no_etl_section():
+    """
+    Check that we get an exception raised if etl subsection
+    missing. The exception will also contain two other exceptions.
+    One for missing etl section and one for missing etl subsections.
+    """
+    bad_config = deepcopy(config)
+    bad_config.pop("etl")
+
+    with pytest.raises(ValueError) as raised_exception:
+        validate_config(global_config_dict=bad_config)
+
+    assert len(raised_exception.value.args[0]) == 2
+
+
+def test_config_validation_fails_no_default_args_section():
+    """
+    Check that we get an exception raised if default args
+    subsection missing.
+    """
+    bad_config = deepcopy(config)
+    bad_config.pop("default_args")
+
+    with pytest.raises(ValueError) as raised_exception:
+        validate_config(global_config_dict=bad_config)
+
+    assert len(raised_exception.value.args[0]) == 1
+
+
+def test_config_validation_fails_bad_etl_subsection():
+    """
+    Check that we get an exception raised if an etl subsection
+    does not contain correct keys.
+    """
+    bad_config = deepcopy(config)
+    bad_config["etl"]["calls"].pop("pattern")
+
+    with pytest.raises(ValueError) as raised_exception:
+        validate_config(global_config_dict=bad_config)
+
+    assert len(raised_exception.value.args[0]) == 1
+
+
+def test_find_files_default_filter(tmpdir):
+    """
+    Test that find files returns correct files
+    with default filter argument.
+    """
+    tmpdir.join("A.txt").write("content")
+    tmpdir.join("B.txt").write("content")
+    tmpdir.join("README.md").write("content")
+
+    tmpdir_path_obj = Path(dump_path=tmpdir)
+
+    files = find_files(tmpdir_path_obj)
+
+    assert set([file.name for file in files]) == set(["A.txt", "B.txt"])
+
+
+def test_find_files_default_filter(tmpdir):
+    """
+    Test that find files returns correct files
+    with non-default filter argument.
+    """
+    tmpdir.join("A.txt").write("content")
+    tmpdir.join("B.txt").write("content")
+    tmpdir.join("README.md").write("content")
+
+    tmpdir_path_obj = Path(tmpdir)
+
+    files = find_files(dump_path=tmpdir_path_obj, ignore_filenames=["B.txt", "A.txt"])
+
+    assert set([file.name for file in files]) == set(["README.md"])
+
+
+@pytest.mark.parametrize(
+    "cdr_type", [CDRType("calls"), CDRType("sms"), CDRType("mds"), CDRType("topups")]
+)
+def test_generate_table_names(cdr_type):
+    """
+    Test that we are able to generate correct temp table names for each cdr_type
+    """
+    uuid = uuid1()
+    cdr_date = parse("2016-01-01")
+
+    table_names = generate_table_names(uuid=uuid, cdr_type=cdr_type, cdr_date=cdr_date)
+
+    uuid_sans_underscore = str(uuid).replace("-", "")
+    assert table_names == {
+        "extract_table": f"etl.x{uuid_sans_underscore}",
+        "transform_table": f"etl.t{uuid_sans_underscore}",
+        "load_table": f"events.{cdr_type}_{str(cdr_date.date()).replace('-','')}",
+    }
+
+
+@pytest.mark.parametrize(
+    "file_name,want",
+    [
+        (
+            "CALLS_20160101.csv.gz",
+            {"cdr_type": CDRType("calls"), "cdr_date": parse("20160101")},
+        ),
+        (
+            "SMS_20160101.csv.gz",
+            {"cdr_type": CDRType("sms"), "cdr_date": parse("20160101")},
+        ),
+        (
+            "MDS_20160101.csv.gz",
+            {"cdr_type": CDRType("mds"), "cdr_date": parse("20160101")},
+        ),
+        (
+            "TOPUPS_20160101.csv.gz",
+            {"cdr_type": CDRType("topups"), "cdr_date": parse("20160101")},
+        ),
+    ],
+)
+def test_parse_file_name(file_name, want):
+    """
+    Test we can parse cdr_type and cdr_date
+    from filenames based on cdr type config.
+    """
+    cdr_type_config = config["etl"]
+    got = parse_file_name(file_name=file_name, cdr_type_config=cdr_type_config)
+    assert got == want
+
+
+def test_parse_file_name_exception():
+    """
+    Test that we get a value error if filename does
+    not match any pattern
+    """
+    cdr_type_config = config["etl"]
+    file_name = "bob.csv"
+    with pytest.raises(ValueError):
+        parse_file_name(file_name=file_name, cdr_type_config=cdr_type_config)
+
+
+def test_filter_files(session, monkeypatch):
+    """
+    testing the filter files function
+
+    - set calls on 20160101 to quarantine so should not be filtered
+    - set calls on 20160102 to archive so should be filtered
+    - no record for sms 20160101 so should not be filtered
+    - bad_file.bad doesn't match any pattern so should  be filtered
+    """
+    cdr_type_config = config["etl"]
+
+    file1 = Path("./CALLS_20160101.csv.gz")
+    file2 = Path("./CALLS_20160102.csv.gz")
+    file3 = Path("./SMS_20160101.csv.gz")
+    file4 = Path("./bad_file.bad")
+
+    found_files = [file1, file2, file3, file4]
+
+    # add a some etl records
+    file1_data = {
+        "cdr_type": "calls",
+        "cdr_date": parse("2016-01-01").date(),
+        "state": "quarantine",
+    }
+
+    file2_data = {
+        "cdr_type": "calls",
+        "cdr_date": parse("2016-01-02").date(),
+        "state": "archive",
+    }
+
+    ETLRecord.set_state(
+        cdr_type=file1_data["cdr_type"],
+        cdr_date=file1_data["cdr_date"],
+        state=file1_data["state"],
+        session=session,
+    )
+
+    ETLRecord.set_state(
+        cdr_type=file2_data["cdr_type"],
+        cdr_date=file2_data["cdr_date"],
+        state=file2_data["state"],
+        session=session,
+    )
+
+    monkeypatch.setattr("etl.etl_utils.get_session", lambda: session)
+    filtered_files = filter_files(
+        found_files=found_files, cdr_type_config=cdr_type_config
+    )
+    assert filtered_files == [file1, file3]
+
+
+def test_get_config_from_file(tmpdir):
+    """
+    Test that we can load yaml to dict from file
+    """
+    sample_dict = {"A": 23, "B": [1, 2, 34], "C": {"A": "bob"}}
+    config_dir = tmpdir.mkdir("config")
+    config_file = config_dir.join("config.yml")
+    config_file.write(yaml.dump(sample_dict))
+
+    config = get_config_from_file(config_filepath=Path(config_file))
+    assert config == sample_dict
diff --git a/flowetl/etl/tests/test_construct_etl_dag.py b/flowetl/etl/tests/test_construct_etl_dag.py
index 433ca8d15a..46551c43ba 100644
--- a/flowetl/etl/tests/test_construct_etl_dag.py
+++ b/flowetl/etl/tests/test_construct_etl_dag.py
@@ -14,43 +14,53 @@
 
 from etl.etl_utils import construct_etl_dag
 from etl.dag_task_callable_mappings import (
-    TEST_TASK_CALLABLES,
-    PRODUCTION_TASK_CALLABLES,
+    TEST_ETL_TASK_CALLABLES,
+    PRODUCTION_ETL_TASK_CALLABLES,
 )
 
 
 def test_construct_etl_dag_with_test_callables():
     """
     Make sure that the DAG returned has the correct task callables as
-    specified in the task_callable_mapping argument. Use TEST_TASK_CALLABLES
+    specified in the task_callable_mapping argument. Use TEST_ETL_TASK_CALLABLES
     mapping.
     """
     default_args = {"owner": "bob", "start_date": parse("1900-01-01")}
-    task_callable_mapping = TEST_TASK_CALLABLES
+    task_callable_mapping = TEST_ETL_TASK_CALLABLES
+    cdr_type = "spaghetti"
 
     dag = construct_etl_dag(
-        task_callable_mapping=task_callable_mapping, default_args=default_args
+        task_callable_mapping=task_callable_mapping,
+        default_args=default_args,
+        cdr_type=cdr_type,
     )
 
+    assert dag.dag_id == f"etl_{cdr_type}"
+
     dag_task_callable_mapping = {t.task_id: t.python_callable for t in dag.tasks}
-    assert dag_task_callable_mapping == TEST_TASK_CALLABLES
+    assert dag_task_callable_mapping == TEST_ETL_TASK_CALLABLES
 
 
 def test_construct_etl_dag_with_production_callables():
     """
     Make sure that the DAG returned has the correct task callables as
-    specified in the task_callable_mapping argument. Use PRODUCTION_TASK_CALLABLES
+    specified in the task_callable_mapping argument. Use PRODUCTION_ETL_TASK_CALLABLES
     mapping.
     """
     default_args = {"owner": "bob", "start_date": parse("1900-01-01")}
-    task_callable_mapping = PRODUCTION_TASK_CALLABLES
+    task_callable_mapping = PRODUCTION_ETL_TASK_CALLABLES
+    cdr_type = "spaghetti"
 
     dag = construct_etl_dag(
-        task_callable_mapping=task_callable_mapping, default_args=default_args
+        task_callable_mapping=task_callable_mapping,
+        default_args=default_args,
+        cdr_type=cdr_type,
     )
 
+    assert dag.dag_id == f"etl_{cdr_type}"
+
     dag_task_callable_mapping = {t.task_id: t.python_callable for t in dag.tasks}
-    assert dag_task_callable_mapping == PRODUCTION_TASK_CALLABLES
+    assert dag_task_callable_mapping == PRODUCTION_ETL_TASK_CALLABLES
 
 
 def test_construct_etl_dag_fails_with_no_start_date():
@@ -58,12 +68,15 @@ def test_construct_etl_dag_fails_with_no_start_date():
     Make sure we get an exception if default_args does not contain a start_date
     """
     default_args = {"owner": "bob"}
-    task_callable_mapping = TEST_TASK_CALLABLES
+    task_callable_mapping = TEST_ETL_TASK_CALLABLES
+    cdr_type = "spaghetti"
 
     # pylint: disable=unused-variable
     with pytest.raises(AirflowException):
         dag = construct_etl_dag(
-            task_callable_mapping=task_callable_mapping, default_args=default_args
+            task_callable_mapping=task_callable_mapping,
+            default_args=default_args,
+            cdr_type=cdr_type,
         )
 
 
@@ -73,10 +86,13 @@ def test_construct_etl_dag_with_no_owner_defaults_to_airflow():
     Airflow.
     """
     default_args = {"start_date": parse("1900-01-01")}
-    task_callable_mapping = TEST_TASK_CALLABLES
+    task_callable_mapping = TEST_ETL_TASK_CALLABLES
+    cdr_type = "spaghetti"
 
     dag = construct_etl_dag(
-        task_callable_mapping=task_callable_mapping, default_args=default_args
+        task_callable_mapping=task_callable_mapping,
+        default_args=default_args,
+        cdr_type=cdr_type,
     )
 
     assert dag.owner == "Airflow"
@@ -87,12 +103,15 @@ def test_construct_etl_dag_fails_with_bad_start_date():
     If the start_date is not a valid date we get an error
     """
     default_args = {"owner": "bob", "start_date": "bob_time"}
-    task_callable_mapping = TEST_TASK_CALLABLES
+    task_callable_mapping = TEST_ETL_TASK_CALLABLES
+    cdr_type = "spaghetti"
 
     # pylint: disable=unused-variable
     with pytest.raises(ParserError):
         dag = construct_etl_dag(
-            task_callable_mapping=task_callable_mapping, default_args=default_args
+            task_callable_mapping=task_callable_mapping,
+            default_args=default_args,
+            cdr_type=cdr_type,
         )
 
 
@@ -103,9 +122,12 @@ def test_construct_etl_dag_fails_with_incorrect_mapping_keys():
     """
     default_args = {"owner": "bob", "start_date": "bob_time"}
     task_callable_mapping = {}
+    cdr_type = "spaghetti"
 
     # pylint: disable=unused-variable
     with pytest.raises(TypeError):
         dag = construct_etl_dag(
-            task_callable_mapping=task_callable_mapping, default_args=default_args
+            task_callable_mapping=task_callable_mapping,
+            default_args=default_args,
+            cdr_type=cdr_type,
         )
diff --git a/flowetl/etl/tests/test_construct_etl_sensor_dag.py b/flowetl/etl/tests/test_construct_etl_sensor_dag.py
new file mode 100644
index 0000000000..660b9263a2
--- /dev/null
+++ b/flowetl/etl/tests/test_construct_etl_sensor_dag.py
@@ -0,0 +1,45 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# -*- coding: utf-8 -*-
+from pendulum import parse
+from etl.etl_utils import construct_etl_sensor_dag
+from etl.dag_task_callable_mappings import (
+    TEST_ETL_SENSOR_TASK_CALLABLE,
+    PRODUCTION_ETL_SENSOR_TASK_CALLABLE,
+)
+
+
+def test_construct_etl_sensor_dag_with_test_callable():
+    """
+    Make sure we get the python callables we expect when using
+    construct_etl_sensor_dag with testing callables.
+    """
+    default_args = {"owner": "bob", "start_date": parse("1900-01-01")}
+    task_callable = TEST_ETL_SENSOR_TASK_CALLABLE
+
+    dag = construct_etl_sensor_dag(callable=task_callable, default_args=default_args)
+
+    assert dag.dag_id == f"etl_sensor"
+
+    assert len(dag.tasks) == 1
+
+    assert dag.tasks[0].python_callable is task_callable
+
+
+def test_construct_etl_sensor_dag_with_production_callable():
+    """
+    Make sure we get the python callables we expect when using
+    construct_etl_sensor_dag with production callables.
+    """
+    default_args = {"owner": "bob", "start_date": parse("1900-01-01")}
+    task_callable = PRODUCTION_ETL_SENSOR_TASK_CALLABLE
+
+    dag = construct_etl_sensor_dag(callable=task_callable, default_args=default_args)
+
+    assert dag.dag_id == f"etl_sensor"
+
+    assert len(dag.tasks) == 1
+
+    assert dag.tasks[0].python_callable is task_callable
diff --git a/flowetl/etl/tests/test_dummy_callables.py b/flowetl/etl/tests/test_dummy_callables.py
index 7613d7bde0..1ffb1fddf9 100644
--- a/flowetl/etl/tests/test_dummy_callables.py
+++ b/flowetl/etl/tests/test_dummy_callables.py
@@ -6,14 +6,20 @@
 """
 Tests that the testing dummy callables behave as expected.
 """
-from unittest.mock import patch
-
 import pytest
+from unittest.mock import patch, Mock
+from uuid import uuid1
+from pendulum import utcnow
+from airflow.exceptions import DagNotFound
 
-from etl.dummy_task_callables import dummy__callable, dummy_failing__callable
+from etl.dummy_task_callables import (
+    dummy__callable,
+    dummy_failing__callable,
+    dummy_trigger__callable,
+)
 
 
-def test_dummy_callable_succeeds_with_no_TASK_TO_FAIL_env_var_set(
+def test_dummy__callable_succeeds_with_no_TASK_TO_FAIL_env_var_set(
     create_fake_task_instance
 ):
     """
@@ -27,7 +33,7 @@ def test_dummy_callable_succeeds_with_no_TASK_TO_FAIL_env_var_set(
     dummy__callable(dag_run=dag_run, task_instance=task_instance)
 
 
-def test_dummy_callable_succeeds_when_TASK_TO_FAIL_env_var_is_not_same_as_task_id(
+def test_dummy__callable_succeeds_when_TASK_TO_FAIL_env_var_is_not_same_as_task_id(
     create_fake_task_instance
 ):
     """
@@ -43,7 +49,7 @@ def test_dummy_callable_succeeds_when_TASK_TO_FAIL_env_var_is_not_same_as_task_i
         dummy__callable(dag_run=dag_run, task_instance=task_instance)
 
 
-def test_dummy_callable_fails_when_TASK_TO_FAIL_env_var_is_same_as_task_id(
+def test_dummy__callable_fails_when_TASK_TO_FAIL_env_var_is_same_as_task_id(
     create_fake_task_instance
 ):
     """
@@ -60,9 +66,33 @@ def test_dummy_callable_fails_when_TASK_TO_FAIL_env_var_is_same_as_task_id(
             dummy__callable(dag_run=dag_run, task_instance=task_instance)
 
 
-def test_dummy_failing_callable():
+def test_dummy_failing__callable():
     """
     Test that the dummy_failing_callable raises an Exception
     """
     with pytest.raises(Exception):
         dummy_failing__callable(dag_run={})
+
+
+def test_dummy_trigger__callable(monkeypatch):
+    """
+    Test that the dummy trigger callable works as expected.
+    """
+    mock_trigger_dag = Mock()
+    monkeypatch.setattr("etl.dummy_task_callables.trigger_dag", mock_trigger_dag)
+
+    uuid = uuid1()
+    monkeypatch.setattr("etl.dummy_task_callables.uuid1", lambda: uuid)
+
+    now = utcnow()
+    monkeypatch.setattr("etl.dummy_task_callables.utcnow", lambda: now)
+
+    try:
+        dummy_trigger__callable(dag_run={})
+    except DagNotFound:
+        # the testing etl dag doesn't exist!
+        pass
+
+    mock_trigger_dag.assert_called_once_with(
+        "etl_testing", run_id=str(uuid), execution_date=now
+    )
diff --git a/flowetl/etl/tests/test_get_session.py b/flowetl/etl/tests/test_get_session.py
new file mode 100644
index 0000000000..fda108e43e
--- /dev/null
+++ b/flowetl/etl/tests/test_get_session.py
@@ -0,0 +1,106 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# -*- coding: utf-8 -*-
+import os
+import pytest
+
+from unittest.mock import Mock
+from etl.etl_utils import get_session
+
+
+def test_get_session_default(monkeypatch):
+    """
+    Make sure we are picking up the flowdb connection string from env
+    """
+    db_name = "bob"
+    host = "steve"
+    password = "jimmy"
+    port = 6666
+    user = "sarah"
+
+    monkeypatch.setattr(
+        "os.environ",
+        {
+            **os.environ,
+            **{
+                "AIRFLOW_CONN_FLOWDB": f"postgres://{user}:{password}@{host}:{port}/{db_name}"
+            },
+        },
+    )
+    mock_psycopg2_connect = Mock()
+    monkeypatch.setattr("psycopg2.connect", mock_psycopg2_connect)
+
+    s = get_session()
+
+    try:
+        s.connection()
+    except TypeError:
+        # we get an exception because not a real
+        # connection catching and ignoring
+        pass
+
+    mock_psycopg2_connect.assert_called_once_with(
+        dbname=db_name, host=host, password=password, port=port, user=user
+    )
+
+
+def test_get_session_fails_if_env_not_set(monkeypatch):
+    """
+    Make sure if env not set we get ValueError
+    """
+    mock_psycopg2_connect = Mock()
+    monkeypatch.setattr("psycopg2.connect", mock_psycopg2_connect)
+
+    with pytest.raises(ValueError):
+        s = get_session()
+
+
+def test_get_session_non_default(monkeypatch):
+    """
+    Make sure we are picking up the connection string from env
+    when non default postgres_conn_id is used
+    """
+    db_name = "bob"
+    host = "steve"
+    password = "jimmy"
+    port = 6666
+    user = "sarah"
+
+    monkeypatch.setattr(
+        "os.environ",
+        {
+            **os.environ,
+            **{
+                "AIRFLOW_CONN_SOMEID": f"postgres://{user}:{password}@{host}:{port}/{db_name}"
+            },
+        },
+    )
+    mock_psycopg2_connect = Mock()
+    monkeypatch.setattr("psycopg2.connect", mock_psycopg2_connect)
+
+    s = get_session(postgres_conn_id="someid")
+
+    try:
+        s.connection()
+    except TypeError:
+        # we get an exception because not a real
+        # connection catching and ignoring
+        pass
+
+    mock_psycopg2_connect.assert_called_once_with(
+        dbname=db_name, host=host, password=password, port=port, user=user
+    )
+
+
+def test_get_session_non_default_fails_if_env_not_set(monkeypatch):
+    """
+    Make sure if env not set we get ValueError when using non default
+    postgres_conn_id
+    """
+    mock_psycopg2_connect = Mock()
+    monkeypatch.setattr("psycopg2.connect", mock_psycopg2_connect)
+
+    with pytest.raises(ValueError):
+        s = get_session(postgres_conn_id="someid")
diff --git a/flowetl/etl/tests/test_render_and_run_sql__callable.py b/flowetl/etl/tests/test_render_and_run_sql__callable.py
index 10b2d96994..97a079dcc3 100644
--- a/flowetl/etl/tests/test_render_and_run_sql__callable.py
+++ b/flowetl/etl/tests/test_render_and_run_sql__callable.py
@@ -69,3 +69,46 @@ def test_render_and_run_sql__callable(tmpdir, create_fake_dag_run):
     # assert that the db_hook was called with correct sql
     _, _, kwargs = mock_pghook.mock_calls[0]
     assert kwargs == {"sql": f"select {conf['number']}"}
+
+
+def test_render_and_run_sql__callable_with_fixed_sql(tmpdir, create_fake_dag_run):
+    """
+    Test that the render sql callable, when fixed_sql=True, is able to
+    construct the correct sql from a template in the correct
+    location and issues this sql to the db_hook run command.
+    """
+    config_dir = tmpdir.mkdir("config")
+    fixed_sql_dir = config_dir.mkdir("fixed_sql")
+
+    template_name = "clean"
+    init_template_path = fixed_sql_dir.join(f"{template_name}.sql")
+    init_template = "select {{number}}"
+    init_template_path.write(init_template)
+
+    # Mocks so we can use the templating feature of the operator this
+    # callable runs in and a mock of the db_hook which will run the sql
+    fake_dag = DAG(dag_id="testing", default_args={"start_date": "2016-01-01"})
+    fake_task_op = BaseOperator(task_id="testing", dag=fake_dag)
+    mock_pghook = Mock()
+
+    # Mock of the config passed to the dag_run
+    conf = {"number": 23}
+    fake_dag_run = create_fake_dag_run(conf=conf)
+
+    # make sure the mock has not been called some other way
+    assert mock_pghook.mock_calls == []
+
+    render_and_run_sql__callable(
+        dag_run=fake_dag_run,
+        db_hook=mock_pghook,
+        task=fake_task_op,
+        config_path=Path(config_dir),
+        template_name=template_name,
+        fixed_sql=True,
+    )
+
+    assert len(mock_pghook.mock_calls) == 1
+
+    # assert that the db_hook was called with correct sql
+    _, _, kwargs = mock_pghook.mock_calls[0]
+    assert kwargs == {"sql": f"select {conf['number']}"}
diff --git a/flowetl/etl/tests/test_success_branch__callable.py b/flowetl/etl/tests/test_success_branch__callable.py
new file mode 100644
index 0000000000..fd2eefd663
--- /dev/null
+++ b/flowetl/etl/tests/test_success_branch__callable.py
@@ -0,0 +1,53 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# -*- coding: utf-8 -*-
+from unittest.mock import Mock
+
+from etl.production_task_callables import success_branch__callable
+
+
+def test_success_branch__callable_success(create_fake_task_instance):
+    """
+    Test that we get the archive branch when all downstream tasks
+    have been successful
+    """
+    init_ti = create_fake_task_instance(state="success")
+    extract_ti = create_fake_task_instance(state="success")
+    transform_ti = create_fake_task_instance(state="success")
+    load_ti = create_fake_task_instance(state="success")
+    task_dict = {
+        "init": init_ti,
+        "extract": extract_ti,
+        "transform": transform_ti,
+        "load": load_ti,
+    }
+
+    fake_dag_run = Mock()
+    fake_dag_run.get_task_instance.side_effect = lambda task_id: task_dict[task_id]
+
+    branch = success_branch__callable(dag_run=fake_dag_run)
+    assert branch == "archive"
+
+
+def test_success_branch__callable_fail(create_fake_task_instance):
+    """
+    Test that if one downstream task fails we get the quarantine branch
+    """
+    init_ti = create_fake_task_instance(state="success")
+    extract_ti = create_fake_task_instance(state="success")
+    transform_ti = create_fake_task_instance(state="failed")
+    load_ti = create_fake_task_instance(state="success")
+    task_dict = {
+        "init": init_ti,
+        "extract": extract_ti,
+        "transform": transform_ti,
+        "load": load_ti,
+    }
+
+    fake_dag_run = Mock()
+    fake_dag_run.get_task_instance.side_effect = lambda task_id: task_dict[task_id]
+
+    branch = success_branch__callable(dag_run=fake_dag_run)
+    assert branch == "quarantine"
diff --git a/flowetl/etl/tests/test_trigger__callable.py b/flowetl/etl/tests/test_trigger__callable.py
new file mode 100644
index 0000000000..44efd71983
--- /dev/null
+++ b/flowetl/etl/tests/test_trigger__callable.py
@@ -0,0 +1,267 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# -*- coding: utf-8 -*-
+from unittest.mock import Mock
+from pathlib import Path
+from pendulum import utcnow, parse
+from uuid import uuid1
+
+from etl.model import ETLRecord
+from etl.etl_utils import CDRType
+from etl.config_constant import config
+from etl.production_task_callables import trigger__callable
+
+
+def test_trigger__callable_bad_file_filtered(tmpdir, session, monkeypatch):
+    """
+    Test that the trigger callable picks up files in dump and suitably filters
+    them. In this case we have one unseen file and one file that matches no
+    pattern. We expect a single call of the trigger_dag_mock for the unseen file.
+    """
+    dump = tmpdir.mkdir("dump")
+
+    file1 = dump.join("SMS_20160101.csv.gz")
+    file1.write("blah")
+    file2 = dump.join("bad_file.bad")
+    file2.write("blah")
+
+    cdr_type_config = config["etl"]
+    fake_dag_run = {}
+    trigger_dag_mock = Mock()
+    now = utcnow()
+    uuid = uuid1()
+    uuid_sans_underscore = str(uuid).replace("-", "")
+
+    monkeypatch.setattr("etl.production_task_callables.uuid1", lambda: uuid)
+    monkeypatch.setattr("etl.production_task_callables.utcnow", lambda: now)
+    monkeypatch.setattr("etl.etl_utils.get_session", lambda: session)
+    monkeypatch.setattr("etl.production_task_callables.trigger_dag", trigger_dag_mock)
+
+    trigger__callable(
+        dag_run=fake_dag_run, cdr_type_config=cdr_type_config, dump_path=Path(dump)
+    )
+
+    assert trigger_dag_mock.call_count == 1
+
+    cdr_type = CDRType("sms")
+    cdr_date = parse("2016-01-01")
+    expected_conf = {
+        "cdr_type": cdr_type,
+        "cdr_date": cdr_date,
+        "file_name": "SMS_20160101.csv.gz",
+        "template_path": "etl/sms",
+        "extract_table": f"etl.x{uuid_sans_underscore}",
+        "transform_table": f"etl.t{uuid_sans_underscore}",
+        "load_table": f"events.{cdr_type}_{str(cdr_date.date()).replace('-','')}",
+    }
+
+    trigger_dag_mock.assert_called_with(
+        "etl_sms",
+        conf=expected_conf,
+        execution_date=now,
+        run_id=f"SMS_20160101.csv.gz-{uuid}",
+        replace_microseconds=False,
+    )
+
+
+def test_trigger__callable_quarantined_file_not_filtered(tmpdir, session, monkeypatch):
+    """
+    Test that the trigger callable picks up files in dump and suitably filters
+    them. In this case we have one previously seen and quarantined file.
+    We expect a single call of the trigger_dag_mock for the quarantined file.
+    """
+    dump = tmpdir.mkdir("dump")
+
+    file1 = dump.join("SMS_20160101.csv.gz")
+    file1.write("blah")
+
+    # add a some etl records
+    file1_data = {
+        "cdr_type": "sms",
+        "cdr_date": parse("2016-01-01").date(),
+        "state": "quarantine",
+    }
+
+    ETLRecord.set_state(
+        cdr_type=file1_data["cdr_type"],
+        cdr_date=file1_data["cdr_date"],
+        state=file1_data["state"],
+        session=session,
+    )
+
+    cdr_type_config = config["etl"]
+    fake_dag_run = {}
+    trigger_dag_mock = Mock()
+    now = utcnow()
+    uuid = uuid1()
+    uuid_sans_underscore = str(uuid).replace("-", "")
+
+    monkeypatch.setattr("etl.production_task_callables.uuid1", lambda: uuid)
+    monkeypatch.setattr("etl.production_task_callables.utcnow", lambda: now)
+    monkeypatch.setattr("etl.etl_utils.get_session", lambda: session)
+    monkeypatch.setattr("etl.production_task_callables.trigger_dag", trigger_dag_mock)
+
+    trigger__callable(
+        dag_run=fake_dag_run, cdr_type_config=cdr_type_config, dump_path=Path(dump)
+    )
+
+    assert trigger_dag_mock.call_count == 1
+
+    cdr_type = CDRType("sms")
+    cdr_date = parse("2016-01-01")
+    expected_conf = {
+        "cdr_type": cdr_type,
+        "cdr_date": cdr_date,
+        "file_name": "SMS_20160101.csv.gz",
+        "template_path": "etl/sms",
+        "extract_table": f"etl.x{uuid_sans_underscore}",
+        "transform_table": f"etl.t{uuid_sans_underscore}",
+        "load_table": f"events.{cdr_type}_{str(cdr_date.date()).replace('-','')}",
+    }
+
+    trigger_dag_mock.assert_called_with(
+        "etl_sms",
+        conf=expected_conf,
+        execution_date=now,
+        run_id=f"SMS_20160101.csv.gz-{uuid}",
+        replace_microseconds=False,
+    )
+
+
+def test_trigger__callable_archive_file_filtered(tmpdir, session, monkeypatch):
+    """
+    Test that the trigger callable picks up files in dump and suitably filters
+    them. In this case we have one previously seen file and one never seen file.
+    We expect a single call of the trigger_dag_mock for the unseen file.
+    """
+    dump = tmpdir.mkdir("dump")
+
+    file1 = dump.join("SMS_20160101.csv.gz")
+    file1.write("blah")
+    file2 = dump.join("SMS_20160102.csv.gz")
+    file2.write("blah")
+
+    # add a some etl records
+    file1_data = {
+        "cdr_type": "sms",
+        "cdr_date": parse("2016-01-01").date(),
+        "state": "archive",
+    }
+
+    ETLRecord.set_state(
+        cdr_type=file1_data["cdr_type"],
+        cdr_date=file1_data["cdr_date"],
+        state=file1_data["state"],
+        session=session,
+    )
+
+    cdr_type_config = config["etl"]
+    fake_dag_run = {}
+    trigger_dag_mock = Mock()
+    now = utcnow()
+    uuid = uuid1()
+    uuid_sans_underscore = str(uuid).replace("-", "")
+
+    monkeypatch.setattr("etl.production_task_callables.uuid1", lambda: uuid)
+    monkeypatch.setattr("etl.production_task_callables.utcnow", lambda: now)
+    monkeypatch.setattr("etl.etl_utils.get_session", lambda: session)
+    monkeypatch.setattr("etl.production_task_callables.trigger_dag", trigger_dag_mock)
+
+    trigger__callable(
+        dag_run=fake_dag_run, cdr_type_config=cdr_type_config, dump_path=Path(dump)
+    )
+
+    assert trigger_dag_mock.call_count == 1
+
+    cdr_type = CDRType("sms")
+    cdr_date = parse("2016-01-02")
+    expected_conf = {
+        "cdr_type": cdr_type,
+        "cdr_date": cdr_date,
+        "file_name": "SMS_20160102.csv.gz",
+        "template_path": "etl/sms",
+        "extract_table": f"etl.x{uuid_sans_underscore}",
+        "transform_table": f"etl.t{uuid_sans_underscore}",
+        "load_table": f"events.{cdr_type}_{str(cdr_date.date()).replace('-','')}",
+    }
+
+    trigger_dag_mock.assert_called_with(
+        "etl_sms",
+        conf=expected_conf,
+        execution_date=now,
+        run_id=f"SMS_20160102.csv.gz-{uuid}",
+        replace_microseconds=False,
+    )
+
+
+def test_trigger__callable_multiple_triggers(tmpdir, session, monkeypatch):
+    """
+    Test that the trigger callable picks up files in dump and is able to trigger
+    multiple etl dag runs.
+    """
+    dump = tmpdir.mkdir("dump")
+
+    file1 = dump.join("SMS_20160101.csv.gz")
+    file1.write("blah")
+    file2 = dump.join("CALLS_20160102.csv.gz")
+    file2.write("blah")
+
+    cdr_type_config = config["etl"]
+    fake_dag_run = {}
+    trigger_dag_mock = Mock()
+    now = utcnow()
+    uuid = uuid1()
+    uuid_sans_underscore = str(uuid).replace("-", "")
+
+    monkeypatch.setattr("etl.production_task_callables.uuid1", lambda: uuid)
+    monkeypatch.setattr("etl.production_task_callables.utcnow", lambda: now)
+    monkeypatch.setattr("etl.etl_utils.get_session", lambda: session)
+    monkeypatch.setattr("etl.production_task_callables.trigger_dag", trigger_dag_mock)
+
+    trigger__callable(
+        dag_run=fake_dag_run, cdr_type_config=cdr_type_config, dump_path=Path(dump)
+    )
+
+    assert trigger_dag_mock.call_count == 2
+
+    cdr_type_file1 = CDRType("sms")
+    cdr_date_file1 = parse("2016-01-01")
+    expected_conf_file1 = {
+        "cdr_type": cdr_type_file1,
+        "cdr_date": cdr_date_file1,
+        "file_name": "SMS_20160101.csv.gz",
+        "template_path": "etl/sms",
+        "extract_table": f"etl.x{uuid_sans_underscore}",
+        "transform_table": f"etl.t{uuid_sans_underscore}",
+        "load_table": f"events.{cdr_type_file1}_{str(cdr_date_file1.date()).replace('-','')}",
+    }
+
+    cdr_type_file2 = CDRType("calls")
+    cdr_date_file2 = parse("2016-01-02")
+    expected_conf_file2 = {
+        "cdr_type": cdr_type_file2,
+        "cdr_date": cdr_date_file2,
+        "file_name": "CALLS_20160102.csv.gz",
+        "template_path": "etl/calls",
+        "extract_table": f"etl.x{uuid_sans_underscore}",
+        "transform_table": f"etl.t{uuid_sans_underscore}",
+        "load_table": f"events.{cdr_type_file2}_{str(cdr_date_file2.date()).replace('-','')}",
+    }
+
+    trigger_dag_mock.assert_any_call(
+        "etl_sms",
+        conf=expected_conf_file1,
+        execution_date=now,
+        run_id=f"SMS_20160101.csv.gz-{uuid}",
+        replace_microseconds=False,
+    )
+
+    trigger_dag_mock.assert_any_call(
+        "etl_calls",
+        conf=expected_conf_file2,
+        execution_date=now,
+        run_id=f"CALLS_20160102.csv.gz-{uuid}",
+        replace_microseconds=False,
+    )
diff --git a/flowetl/mounts/archive/README.md b/flowetl/mounts/archive/README.md
new file mode 100644
index 0000000000..b67dfb1358
--- /dev/null
+++ b/flowetl/mounts/archive/README.md
@@ -0,0 +1 @@
+# Archive location
diff --git a/flowetl/mounts/config/config.yml b/flowetl/mounts/config/config.yml
new file mode 100644
index 0000000000..67048d99d1
--- /dev/null
+++ b/flowetl/mounts/config/config.yml
@@ -0,0 +1,16 @@
+default_args:
+  owner: flowminder
+  start_date: '1900-01-01'
+etl:
+  calls:
+    concurrency: 4
+    pattern: CALLS_(\d{4}[01]\d[0123]\d).csv.gz
+  mds:
+    concurrency: 4
+    pattern: MDS_(\d{4}[01]\d[0123]\d).csv.gz
+  sms:
+    concurrency: 4
+    pattern: SMS_(\d{4}[01]\d[0123]\d).csv.gz
+  topups:
+    concurrency: 4
+    pattern: TOPUPS_(\d{4}[01]\d[0123]\d).csv.gz
diff --git a/flowetl/mounts/config/etl/calls/extract.sql b/flowetl/mounts/config/etl/calls/extract.sql
new file mode 100644
index 0000000000..5cb52db13e
--- /dev/null
+++ b/flowetl/mounts/config/etl/calls/extract.sql
@@ -0,0 +1,2 @@
+DROP TABLE IF EXISTS {{ extract_table }};
+CREATE TABLE {{ extract_table }} (LIKE events.{{cdr_type.name.lower()}} INCLUDING ALL)
diff --git a/flowetl/mounts/config/etl/calls/transform.sql b/flowetl/mounts/config/etl/calls/transform.sql
new file mode 100644
index 0000000000..607d1597ee
--- /dev/null
+++ b/flowetl/mounts/config/etl/calls/transform.sql
@@ -0,0 +1,11 @@
+DROP TABLE IF EXISTS {{ transform_table }};
+CREATE TABLE {{ transform_table }} AS (
+    SELECT
+        *
+    FROM
+        {{ extract_table }}
+);
+
+ALTER TABLE {{transform_table}}
+    ALTER COLUMN msisdn SET NOT NULL,
+    ALTER COLUMN datetime SET NOT NULL;
diff --git a/flowetl/mounts/config/etl/mds/extract.sql b/flowetl/mounts/config/etl/mds/extract.sql
new file mode 100644
index 0000000000..5cb52db13e
--- /dev/null
+++ b/flowetl/mounts/config/etl/mds/extract.sql
@@ -0,0 +1,2 @@
+DROP TABLE IF EXISTS {{ extract_table }};
+CREATE TABLE {{ extract_table }} (LIKE events.{{cdr_type.name.lower()}} INCLUDING ALL)
diff --git a/flowetl/mounts/config/etl/mds/transform.sql b/flowetl/mounts/config/etl/mds/transform.sql
new file mode 100644
index 0000000000..607d1597ee
--- /dev/null
+++ b/flowetl/mounts/config/etl/mds/transform.sql
@@ -0,0 +1,11 @@
+DROP TABLE IF EXISTS {{ transform_table }};
+CREATE TABLE {{ transform_table }} AS (
+    SELECT
+        *
+    FROM
+        {{ extract_table }}
+);
+
+ALTER TABLE {{transform_table}}
+    ALTER COLUMN msisdn SET NOT NULL,
+    ALTER COLUMN datetime SET NOT NULL;
diff --git a/flowetl/mounts/config/etl/sms/extract.sql b/flowetl/mounts/config/etl/sms/extract.sql
new file mode 100644
index 0000000000..5cb52db13e
--- /dev/null
+++ b/flowetl/mounts/config/etl/sms/extract.sql
@@ -0,0 +1,2 @@
+DROP TABLE IF EXISTS {{ extract_table }};
+CREATE TABLE {{ extract_table }} (LIKE events.{{cdr_type.name.lower()}} INCLUDING ALL)
diff --git a/flowetl/mounts/config/etl/sms/transform.sql b/flowetl/mounts/config/etl/sms/transform.sql
new file mode 100644
index 0000000000..607d1597ee
--- /dev/null
+++ b/flowetl/mounts/config/etl/sms/transform.sql
@@ -0,0 +1,11 @@
+DROP TABLE IF EXISTS {{ transform_table }};
+CREATE TABLE {{ transform_table }} AS (
+    SELECT
+        *
+    FROM
+        {{ extract_table }}
+);
+
+ALTER TABLE {{transform_table}}
+    ALTER COLUMN msisdn SET NOT NULL,
+    ALTER COLUMN datetime SET NOT NULL;
diff --git a/flowetl/mounts/config/etl/topups/extract.sql b/flowetl/mounts/config/etl/topups/extract.sql
new file mode 100644
index 0000000000..5cb52db13e
--- /dev/null
+++ b/flowetl/mounts/config/etl/topups/extract.sql
@@ -0,0 +1,2 @@
+DROP TABLE IF EXISTS {{ extract_table }};
+CREATE TABLE {{ extract_table }} (LIKE events.{{cdr_type.name.lower()}} INCLUDING ALL)
diff --git a/flowetl/mounts/config/etl/topups/transform.sql b/flowetl/mounts/config/etl/topups/transform.sql
new file mode 100644
index 0000000000..607d1597ee
--- /dev/null
+++ b/flowetl/mounts/config/etl/topups/transform.sql
@@ -0,0 +1,11 @@
+DROP TABLE IF EXISTS {{ transform_table }};
+CREATE TABLE {{ transform_table }} AS (
+    SELECT
+        *
+    FROM
+        {{ extract_table }}
+);
+
+ALTER TABLE {{transform_table}}
+    ALTER COLUMN msisdn SET NOT NULL,
+    ALTER COLUMN datetime SET NOT NULL;
diff --git a/flowetl/mounts/config/fixed_sql/clean.sql b/flowetl/mounts/config/fixed_sql/clean.sql
new file mode 100644
index 0000000000..0fb3a2e60e
--- /dev/null
+++ b/flowetl/mounts/config/fixed_sql/clean.sql
@@ -0,0 +1,2 @@
+DROP TABLE IF EXISTS {{ extract_table }};
+DROP TABLE IF EXISTS {{ transform_table }};
diff --git a/flowetl/mounts/config/fixed_sql/load.sql b/flowetl/mounts/config/fixed_sql/load.sql
new file mode 100644
index 0000000000..d4b20e7f2c
--- /dev/null
+++ b/flowetl/mounts/config/fixed_sql/load.sql
@@ -0,0 +1,3 @@
+ALTER TABLE {{transform_table}} RENAME TO {{load_table.split(".")[-1]}};
+ALTER TABLE etl.{{load_table.split(".")[-1]}} SET SCHEMA events;
+ALTER TABLE events.{{load_table.split(".")[-1]}} INHERIT events.{{cdr_type.name.lower()}};
diff --git a/flowetl/mounts/dump/README.md b/flowetl/mounts/dump/README.md
new file mode 100644
index 0000000000..972dfe9dc3
--- /dev/null
+++ b/flowetl/mounts/dump/README.md
@@ -0,0 +1 @@
+# Dump location
diff --git a/flowetl/mounts/ingest/README.md b/flowetl/mounts/ingest/README.md
new file mode 100644
index 0000000000..42279ef1c0
--- /dev/null
+++ b/flowetl/mounts/ingest/README.md
@@ -0,0 +1 @@
+# Ingest location
diff --git a/flowetl/mounts/quarantine/README.md b/flowetl/mounts/quarantine/README.md
new file mode 100644
index 0000000000..04ba06b5ef
--- /dev/null
+++ b/flowetl/mounts/quarantine/README.md
@@ -0,0 +1 @@
+# Quarantine location
diff --git a/flowetl/tests/integration/conftest.py b/flowetl/tests/integration/conftest.py
index 7bfaf1472f..176a76f53a 100644
--- a/flowetl/tests/integration/conftest.py
+++ b/flowetl/tests/integration/conftest.py
@@ -8,30 +8,283 @@
 """
 import os
 import shutil
+import logging
+import pytest
 
+from itertools import chain
+from pathlib import Path
 from time import sleep
 from subprocess import DEVNULL, Popen
 from pendulum import now, Interval
 from airflow.models import DagRun
-
-import pytest
-import docker
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, scoped_session
+from docker import from_env, APIClient
+from docker.types import Mount
+from shutil import rmtree
 
 
 @pytest.fixture(scope="session")
 def docker_client():
     """
-    docker client object
+    docker client object - used to run containers
+    """
+    return from_env()
+
+
+@pytest.fixture(scope="session")
+def docker_APIClient():
+    """
+    docker APIClient object - needed to inspect containers
     """
-    return docker.from_env()
+    return APIClient()
 
 
 @pytest.fixture(scope="session")
-def flowetl_tag():
+def tag():
+    """
+    Get tag to use for containers
+    """
+    return os.environ.get("TAG", "latest")
+
+
+@pytest.fixture(scope="session")
+def container_env():
+    """
+    Environments for each container
+    """
+    flowdb = {
+        "POSTGRES_USER": "flowdb",
+        "POSTGRES_PASSWORD": "flowflow",
+        "POSTGRES_DB": "flowdb",
+        "FLOWMACHINE_FLOWDB_USER": "flowmachine",
+        "FLOWAPI_FLOWDB_USER": "flowapi",
+        "FLOWMACHINE_FLOWDB_PASSWORD": "flowmachine",
+        "FLOWAPI_FLOWDB_PASSWORD": "flowapi",
+    }
+
+    flowetl_db = {
+        "POSTGRES_USER": "flowetl",
+        "POSTGRES_PASSWORD": "flowetl",
+        "POSTGRES_DB": "flowetl",
+    }
+
+    flowetl = {
+        "AIRFLOW__CORE__EXECUTOR": "LocalExecutor",
+        "AIRFLOW__CORE__FERNET_KEY": "ssgBqImdmQamCrM9jNhxI_IXSzvyVIfqvyzES67qqVU=",
+        "AIRFLOW__CORE__SQL_ALCHEMY_CONN": f"postgres://{flowetl_db['POSTGRES_USER']}:{flowetl_db['POSTGRES_PASSWORD']}@flowetl_db:5432/{flowetl_db['POSTGRES_DB']}",
+        "AIRFLOW_CONN_FLOWDB": f"postgres://{flowdb['POSTGRES_USER']}:{flowdb['POSTGRES_PASSWORD']}@flowdb:5432/flowdb",
+        "MOUNT_HOME": "/mounts",
+        "POSTGRES_USER": "flowetl",
+        "POSTGRES_PASSWORD": "flowetl",
+        "POSTGRES_DB": "flowetl",
+        "POSTGRES_HOST": "flowetl_db",
+        "AIRFLOW__WEBSERVER__WEB_SERVER_HOST": "0.0.0.0",  # helpful for circle debugging
+    }
+
+    return {"flowetl": flowetl, "flowdb": flowdb, "flowetl_db": flowetl_db}
+
+
+@pytest.fixture(scope="session")
+def container_ports():
+    """
+    Exposed ports for flowetl_db and flowdb
+    """
+    flowetl_db_host_port = 9000
+    flowdb_host_port = 9001
+
+    return {"flowetl_db": flowetl_db_host_port, "flowdb": flowdb_host_port}
+
+
+@pytest.fixture(scope="function")
+def container_network(docker_client):
+    """
+    A docker network for containers to communicate on
+    """
+    network = docker_client.networks.create("testing", driver="bridge")
+    yield
+    network.remove()
+
+
+@pytest.fixture(scope="function")
+def data_dir():
+    """
+    Creates and cleans up a directory for storing pg data.
+    Used by Flowdb because on unix changing flowdb user is
+    incompatible with using a volume for the DB's data.
+    """
+    path = f"{os.getcwd()}/pg_data"
+    if not os.path.exists(path):
+        os.makedirs(path)
+    yield path
+    rmtree(path)
+
+
+@pytest.fixture(scope="function")
+def mounts(data_dir):
+    """
+    Various mount objects needed by containers
+    """
+    config_mount = Mount("/mounts/config", f"{os.getcwd()}/mounts/config", type="bind")
+    archive_mount = Mount(
+        "/mounts/archive", f"{os.getcwd()}/mounts/archive", type="bind"
+    )
+    dump_mount = Mount("/mounts/dump", f"{os.getcwd()}/mounts/dump", type="bind")
+    ingest_mount = Mount("/mounts/ingest", f"{os.getcwd()}/mounts/ingest", type="bind")
+    quarantine_mount = Mount(
+        "/mounts/quarantine", f"{os.getcwd()}/mounts/quarantine", type="bind"
+    )
+    flowetl_mounts = [
+        config_mount,
+        archive_mount,
+        dump_mount,
+        ingest_mount,
+        quarantine_mount,
+    ]
+
+    data_mount = Mount("/var/lib/postgresql/data", data_dir, type="bind")
+    ingest_mount = Mount("/ingest", f"{os.getcwd()}/mounts/ingest", type="bind")
+    flowdb_mounts = [data_mount, ingest_mount]
+
+    return {"flowetl": flowetl_mounts, "flowdb": flowdb_mounts}
+
+
+@pytest.fixture(scope="function")
+def flowdb_container(
+    docker_client,
+    docker_APIClient,
+    tag,
+    container_env,
+    container_ports,
+    container_network,
+    mounts,
+):
+    """
+    Starts flowdb (and cleans up) and waits until healthy
+    - so that we can be sure connections to the DB will work.
+    Setting user to uid/gid of user running the tests - necessary
+    for ingestion.
+    """
+    user = f"{os.getuid()}:{os.getgid()}"
+    container = docker_client.containers.run(
+        f"flowminder/flowdb:{tag}",
+        environment=container_env["flowdb"],
+        ports={"5432": container_ports["flowdb"]},
+        name="flowdb",
+        network="testing",
+        mounts=mounts["flowdb"],
+        healthcheck={"test": "pg_isready -h localhost -U flowdb"},
+        user=user,
+        detach=True,
+    )
+
+    healthy = False
+    while not healthy:
+        container_info = docker_APIClient.inspect_container(container.id)
+        healthy = container_info["State"]["Health"]["Status"] == "healthy"
+
+    yield
+    container.kill()
+    container.remove()
+
+
+@pytest.fixture(scope="function")
+def flowetl_db_container(
+    docker_client, container_env, container_ports, container_network
+):
+    """
+    Start (and clean up) flowetl_db just a vanilla pg 11
+    """
+    container = docker_client.containers.run(
+        f"postgres:11.0",
+        environment=container_env["flowetl_db"],
+        ports={"5432": container_ports["flowetl_db"]},
+        name="flowetl_db",
+        network="testing",
+        detach=True,
+    )
+    yield
+    container.kill()
+    container.remove()
+
+
+@pytest.fixture(scope="function")
+def flowetl_container(
+    flowdb_container,
+    flowetl_db_container,
+    docker_client,
+    tag,
+    container_env,
+    container_network,
+    mounts,
+):
+    """
+    Start (and clean up) flowetl. Setting user to uid/gid of
+    user running the tests - necessary for moving files between
+    host directories.
+    """
+    user = f"{os.getuid()}:{os.getgid()}"
+    container = docker_client.containers.run(
+        f"flowminder/flowetl:{tag}",
+        environment=container_env["flowetl"],
+        name="flowetl",
+        network="testing",
+        restart_policy={"Name": "always"},
+        ports={"8080": "8080"},
+        mounts=mounts["flowetl"],
+        user=user,
+        detach=True,
+    )
+    sleep(10)  # BADDD but no clear way to know that airflow scheduler is ready!
+    yield container
+    container.kill()
+    container.remove()
+
+
+@pytest.fixture(scope="function")
+def trigger_dags():
     """
-    Get flowetl tag to use
+    Returns a function that unpauses all DAGs and then triggers
+    the etl_sensor DAG.
     """
-    return os.environ.get("FLOWETL_TAG", "latest")
+
+    def trigger_dags_function(*, flowetl_container):
+
+        dags = ["etl_sensor", "etl_sms", "etl_mds", "etl_calls", "etl_topups"]
+
+        for dag in dags:
+            flowetl_container.exec_run(f"airflow unpause {dag}")
+
+        flowetl_container.exec_run("airflow trigger_dag etl_sensor")
+
+    return trigger_dags_function
+
+
+@pytest.fixture(scope="function")
+def write_files_to_dump():
+    """
+    Returns a function that allows for writing a list
+    of empty files to the dump location. Also cleans
+    up dump, archive and quarantine.
+    """
+    dump_dir = f"{os.getcwd()}/mounts/dump"
+    archive_dir = f"{os.getcwd()}/mounts/archive"
+    quarantine_dir = f"{os.getcwd()}/mounts/quarantine"
+
+    def write_files_to_dump_function(*, file_names):
+        for file_name in file_names:
+            Path(f"{dump_dir}/{file_name}").touch()
+
+    yield write_files_to_dump_function
+
+    files_to_remove = chain(
+        Path(dump_dir).glob("*"),
+        Path(archive_dir).glob("*"),
+        Path(quarantine_dir).glob("*"),
+    )
+    files_to_remove = filter(lambda file: file.name != "README.md", files_to_remove)
+
+    [file.unlink() for file in files_to_remove]
 
 
 @pytest.fixture(scope="module")
@@ -119,7 +372,7 @@ def run_func(extra_env):
         p.wait()
 
         p = Popen(
-            "airflow unpause etl".split(),
+            "airflow unpause etl_testing".split(),
             shell=False,
             stdout=DEVNULL,
             stderr=DEVNULL,
@@ -151,14 +404,82 @@ def wait_for_completion():
     seems OK...) three minutes raises a TimeoutError.
     """
 
-    def wait_func(end_state):
-        time_out = Interval(minutes=3)
-        t0 = now()
-        while not DagRun.find("etl", state=end_state):
-            sleep(1)
-            t1 = now()
-            if (t1 - t0) > time_out:
-                raise TimeoutError
-        return end_state
+    def wait_func(
+        end_state, dag_id, session=None, count=1, time_out=Interval(minutes=3)
+    ):
+        # if you actually pass None to DagRun.find it thinks None is the session
+        # you want to use - need to not pass at all if you want airflow to pick
+        # up correct session using it's provide_session decorator...
+        if session is None:
+            t0 = now()
+            while len(DagRun.find(dag_id, state=end_state)) != count:
+                sleep(1)
+                t1 = now()
+                if (t1 - t0) > time_out:
+                    raise TimeoutError
+            return end_state
+        else:
+            t0 = now()
+            while len(DagRun.find(dag_id, state=end_state, session=session)) != count:
+                sleep(1)
+                t1 = now()
+                if (t1 - t0) > time_out:
+                    raise TimeoutError
+            return end_state
 
     return wait_func
+
+
+@pytest.fixture(scope="function")
+def flowdb_connection_engine(container_env, container_ports):
+    """
+    Engine for flowdb
+    """
+    conn_str = f"postgresql://{container_env['flowdb']['POSTGRES_USER']}:{container_env['flowdb']['POSTGRES_PASSWORD']}@localhost:{container_ports['flowdb']}/flowdb"
+    engine = create_engine(conn_str)
+
+    return engine
+
+
+@pytest.fixture(scope="function")
+def flowdb_connection(flowdb_connection_engine):
+    """
+    Connection for flowdb - allowing for execution of
+    raw sql.
+    """
+    connection = flowdb_connection_engine.connect()
+    trans = connection.begin()
+    yield connection, trans
+    connection.close()
+
+
+@pytest.fixture(scope="function")
+def flowdb_session(flowdb_connection_engine):
+    """
+    sqlalchmy session for flowdb - used for ORM models
+    """
+    session = sessionmaker(bind=flowdb_connection_engine)()
+    yield session
+    session.close()
+
+
+@pytest.fixture(scope="function")
+def flowetl_db_connection_engine(container_env, container_ports):
+    """
+    Engine for flowetl_db
+    """
+    conn_str = f"postgresql://{container_env['flowetl_db']['POSTGRES_USER']}:{container_env['flowetl_db']['POSTGRES_PASSWORD']}@localhost:{container_ports['flowetl_db']}/{container_env['flowetl_db']['POSTGRES_DB']}"
+    logging.info(conn_str)
+    engine = create_engine(conn_str)
+
+    return engine
+
+
+@pytest.fixture(scope="function")
+def flowetl_db_session(flowetl_db_connection_engine):
+    """
+    sqlalchmy session for flowetl - used for ORM models
+    """
+    session = sessionmaker(bind=flowetl_db_connection_engine)()
+    yield session
+    session.close()
diff --git a/flowetl/tests/integration/test_dag_run.py b/flowetl/tests/integration/test_dag_run.py
index c8116cdf4b..4f1afbe949 100644
--- a/flowetl/tests/integration/test_dag_run.py
+++ b/flowetl/tests/integration/test_dag_run.py
@@ -12,33 +12,6 @@
 from airflow.models import DagRun
 
 
-def test_archive_branch(airflow_local_pipeline_run, wait_for_completion):
-    """
-    Tests that correct tasks run when ETL is succesful
-    """
-    end_state = "success"
-
-    # passing empty TASK_TO_FAIL to signal no task should fail
-    airflow_local_pipeline_run({"TASK_TO_FAIL": ""})
-    final_etl_state = wait_for_completion(end_state)
-    assert final_etl_state == end_state
-
-    etl_dag = DagRun.find("etl", state=end_state)[0]
-
-    task_states = {task.task_id: task.state for task in etl_dag.get_task_instances()}
-    assert task_states == {
-        "init": "success",
-        "extract": "success",
-        "transform": "success",
-        "success_branch": "success",
-        "load": "success",
-        "archive": "success",
-        "quarantine": "skipped",
-        "clean": "success",
-        "fail": "skipped",
-    }
-
-
 @pytest.mark.parametrize(
     "task_to_fail,expected_task_states",
     [
@@ -56,48 +29,48 @@ def test_archive_branch(airflow_local_pipeline_run, wait_for_completion):
                 "fail": "failed",
             },
         ),
-        (
-            "extract",
-            {
-                "init": "success",
-                "extract": "failed",
-                "transform": "upstream_failed",
-                "success_branch": "success",
-                "load": "upstream_failed",
-                "archive": "skipped",
-                "quarantine": "success",
-                "clean": "success",
-                "fail": "failed",
-            },
-        ),
-        (
-            "transform",
-            {
-                "init": "success",
-                "extract": "success",
-                "transform": "failed",
-                "success_branch": "success",
-                "load": "upstream_failed",
-                "archive": "skipped",
-                "quarantine": "success",
-                "clean": "success",
-                "fail": "failed",
-            },
-        ),
-        (
-            "load",
-            {
-                "init": "success",
-                "extract": "success",
-                "transform": "success",
-                "success_branch": "success",
-                "load": "failed",
-                "archive": "skipped",
-                "quarantine": "success",
-                "clean": "success",
-                "fail": "failed",
-            },
-        ),
+        # (
+        #     "extract",
+        #     {
+        #         "init": "success",
+        #         "extract": "failed",
+        #         "transform": "upstream_failed",
+        #         "success_branch": "success",
+        #         "load": "upstream_failed",
+        #         "archive": "skipped",
+        #         "quarantine": "success",
+        #         "clean": "success",
+        #         "fail": "failed",
+        #     },
+        # ),
+        # (
+        #     "transform",
+        #     {
+        #         "init": "success",
+        #         "extract": "success",
+        #         "transform": "failed",
+        #         "success_branch": "success",
+        #         "load": "upstream_failed",
+        #         "archive": "skipped",
+        #         "quarantine": "success",
+        #         "clean": "success",
+        #         "fail": "failed",
+        #     },
+        # ),
+        # (
+        #     "load",
+        #     {
+        #         "init": "success",
+        #         "extract": "success",
+        #         "transform": "success",
+        #         "success_branch": "success",
+        #         "load": "failed",
+        #         "archive": "skipped",
+        #         "quarantine": "success",
+        #         "clean": "success",
+        #         "fail": "failed",
+        #     },
+        # ),
     ],
 )
 def test_quarantine_branch(
@@ -109,10 +82,10 @@ def test_quarantine_branch(
     """
     end_state = "failed"
     airflow_local_pipeline_run({"TASK_TO_FAIL": task_to_fail})
-    final_etl_state = wait_for_completion(end_state)
+    final_etl_state = wait_for_completion(end_state, dag_id="etl_testing")
     assert final_etl_state == end_state
 
-    etl_dag = DagRun.find("etl", state=end_state)[0]
+    etl_dag = DagRun.find("etl_testing", state=end_state)[0]
 
     task_states = {task.task_id: task.state for task in etl_dag.get_task_instances()}
     assert task_states == expected_task_states
diff --git a/flowetl/tests/integration/test_dags_present.py b/flowetl/tests/integration/test_dags_present.py
index d3bc19cd52..89803e2893 100644
--- a/flowetl/tests/integration/test_dags_present.py
+++ b/flowetl/tests/integration/test_dags_present.py
@@ -17,7 +17,7 @@ def test_dags_present(airflow_local_setup):
     Test that the correct dags are parsed
     """
     assert set(DagBag(dag_folder="./dags", include_examples=False).dag_ids) == set(
-        ["etl", "etl_sensor"]
+        ["etl_testing", "etl_sensor"]
     )
 
 
@@ -25,7 +25,7 @@ def test_dags_present(airflow_local_setup):
     "dag_name,expected_task_list",
     [
         (
-            "etl",
+            "etl_testing",
             [
                 "init",
                 "extract",
diff --git a/flowetl/tests/integration/test_fixuid.py b/flowetl/tests/integration/test_fixuid.py
index 246ac784a8..bbb02abb74 100644
--- a/flowetl/tests/integration/test_fixuid.py
+++ b/flowetl/tests/integration/test_fixuid.py
@@ -8,7 +8,7 @@
 """
 
 
-def test_uid(docker_client, flowetl_tag):
+def test_uid(docker_client, tag):
     """
     test that we can run the flowetl container with a specific user.
     Check UID is correct.
@@ -16,12 +16,12 @@ def test_uid(docker_client, flowetl_tag):
 
     user = "1002:1003"
     out = docker_client.containers.run(
-        f"flowminder/flowetl:{flowetl_tag}", "bash -c 'id -u'", user=user
+        f"flowminder/flowetl:{tag}", "bash -c 'id -u'", user=user
     )
     assert out.decode("utf-8").strip() == "1002"
 
 
-def test_gid(docker_client, flowetl_tag):
+def test_gid(docker_client, tag):
     """
     test that we can run the flowetl container with a specific user.
     Check GID is correct.
@@ -29,18 +29,18 @@ def test_gid(docker_client, flowetl_tag):
 
     user = "1002:1003"
     out = docker_client.containers.run(
-        f"flowminder/flowetl:{flowetl_tag}", "bash -c 'id -g'", user=user
+        f"flowminder/flowetl:{tag}", "bash -c 'id -g'", user=user
     )
     assert out.decode("utf-8").strip() == "1003"
 
 
-def test_uid_is_airflow(docker_client, flowetl_tag):
+def test_uid_is_airflow(docker_client, tag):
     """
     Test that the user we run the container with is the airflow user.
     """
 
     user = "1002:1003"
     out = docker_client.containers.run(
-        f"flowminder/flowetl:{flowetl_tag}", "bash -c 'id -u | id -nu'", user=user
+        f"flowminder/flowetl:{tag}", "bash -c 'id -u | id -nu'", user=user
     )
     assert out.decode("utf-8").strip() == "airflow"
diff --git a/flowetl/tests/integration/test_full_pipeline.py b/flowetl/tests/integration/test_full_pipeline.py
new file mode 100644
index 0000000000..a276c576f7
--- /dev/null
+++ b/flowetl/tests/integration/test_full_pipeline.py
@@ -0,0 +1,171 @@
+import os
+
+from pathlib import Path
+from pendulum import parse
+
+from etl.model import ETLRecord
+
+
+def test_single_file_previously_quarantined(
+    flowetl_container,
+    write_files_to_dump,
+    trigger_dags,
+    wait_for_completion,
+    flowetl_db_session,
+    flowdb_session,
+    flowdb_connection,
+):
+    """
+    Test for full pipeline. We want to test the following things;
+
+    1. Do files in the dump location get picked up?
+    2. Do files that do not match a configuration pattern get ignored?
+    3. Do files (cdr_type, cdr_date pairs) that have a state of archive
+    in etl.etl_records get ignored?
+    4. Do files (cdr_type, cdr_date pairs) that have a state of quarantine
+    in etl.etl_records get picked up to be reprocessed?
+    5. Do files of different CDR types cause the correct etl_{cdr_type}
+    DAG to run?
+    6. Do child tables get created under the associated parent table in
+    the events schema?
+    """
+    write_files_to_dump(
+        file_names=[
+            "CALLS_20160101.csv.gz",
+            "CALLS_20160102.csv.gz",
+            "SMS_20160101.csv.gz",
+            "bad_file.bad",
+            "MDS_20160101.csv.gz",
+            "TOPUPS_20160101.csv.gz",
+        ]
+    )
+
+    # set CALLS_20160101 as archived and SMS_20160101 as quarantined
+    CALLS_20160101_record = {
+        "cdr_type": "calls",
+        "cdr_date": parse("2016-01-01").date(),
+        "state": "archive",
+    }
+    ETLRecord.set_state(
+        cdr_type=CALLS_20160101_record["cdr_type"],
+        cdr_date=CALLS_20160101_record["cdr_date"],
+        state=CALLS_20160101_record["state"],
+        session=flowdb_session,
+    )
+
+    SMS_20160101_record = {
+        "cdr_type": "sms",
+        "cdr_date": parse("2016-01-01").date(),
+        "state": "quarantine",
+    }
+    ETLRecord.set_state(
+        cdr_type=SMS_20160101_record["cdr_type"],
+        cdr_date=SMS_20160101_record["cdr_date"],
+        state=SMS_20160101_record["state"],
+        session=flowdb_session,
+    )
+
+    trigger_dags(flowetl_container=flowetl_container)
+
+    # 1 calls, 1 sms, 1 mds and 1 topups DAG should run and we wait for
+    # their completion
+    wait_for_completion("success", "etl_calls", session=flowetl_db_session)
+    wait_for_completion("success", "etl_sms", session=flowetl_db_session)
+    wait_for_completion("success", "etl_mds", session=flowetl_db_session)
+    wait_for_completion("success", "etl_topups", session=flowetl_db_session)
+
+    # make sure files are where they should be
+
+    dump_files = ["CALLS_20160101.csv.gz", "bad_file.bad"]  # should have been ignored
+    archive_files = [
+        "CALLS_20160102.csv.gz",
+        "SMS_20160101.csv.gz",
+        "MDS_20160101.csv.gz",
+        "TOPUPS_20160101.csv.gz",
+    ]  # ingested so now in archive
+
+    dump = [file.name for file in Path(f"{os.getcwd()}/mounts/dump").glob("*")]
+    archive = [file.name for file in Path(f"{os.getcwd()}/mounts/archive").glob("*")]
+    quarantine = [
+        file.name for file in Path(f"{os.getcwd()}/mounts/quarantine").glob("*")
+    ]
+    ingest = [file.name for file in Path(f"{os.getcwd()}/mounts/ingest").glob("*")]
+
+    assert set(dump_files) == (set(dump) - set(["README.md"]))
+    assert set(archive_files) == (set(archive) - set(["README.md"]))
+
+    # quarantine and ingest should be empty
+    assert set() == (set(quarantine) - set(["README.md"]))
+    assert set() == (set(ingest) - set(["README.md"]))
+
+    # make sure tables expected exist in flowdb
+    connection, _ = flowdb_connection
+    sql = """
+    select
+        count(*)
+    from
+        information_schema.tables
+    where
+        table_schema = 'events'
+        and
+        table_name = '{table_name}'
+    """
+    # calls_20160102
+    res = connection.execute(sql.format(table_name="calls_20160102")).fetchone()[0]
+    assert res == 1
+
+    # sms_20160101
+    res = connection.execute(sql.format(table_name="sms_20160101")).fetchone()[0]
+    assert res == 1
+
+    # mds_20160101
+    res = connection.execute(sql.format(table_name="mds_20160101")).fetchone()[0]
+    assert res == 1
+
+    # topups_20160101
+    res = connection.execute(sql.format(table_name="topups_20160101")).fetchone()[0]
+    assert res == 1
+
+    # make sure etl_records table is what we expect
+
+    # calls,20160101 -> archive
+    res = (
+        flowdb_session.query(ETLRecord.state)
+        .filter(ETLRecord.cdr_type == "calls", ETLRecord.cdr_date == "2016-01-01")
+        .all()
+    )
+    assert set([row[0] for row in res]) == set(["archive"])
+
+    # calls,20160102 -> ingest + archive
+    res = (
+        flowdb_session.query(ETLRecord.state)
+        .filter(ETLRecord.cdr_type == "calls", ETLRecord.cdr_date == "2016-01-02")
+        .all()
+    )
+    assert sorted([row[0] for row in res]) == sorted(["ingest", "archive"])
+
+    # sms,20160101 -> quarantine + ingest + archive
+    res = (
+        flowdb_session.query(ETLRecord.state)
+        .filter(ETLRecord.cdr_type == "sms", ETLRecord.cdr_date == "2016-01-01")
+        .all()
+    )
+    assert sorted([row[0] for row in res]) == sorted(
+        ["quarantine", "ingest", "archive"]
+    )
+
+    # mds,20160101 -> ingest + archive
+    res = (
+        flowdb_session.query(ETLRecord.state)
+        .filter(ETLRecord.cdr_type == "mds", ETLRecord.cdr_date == "2016-01-01")
+        .all()
+    )
+    assert sorted([row[0] for row in res]) == sorted(["ingest", "archive"])
+
+    # topups,20160101 -> ingest + archive
+    res = (
+        flowdb_session.query(ETLRecord.state)
+        .filter(ETLRecord.cdr_type == "topups", ETLRecord.cdr_date == "2016-01-01")
+        .all()
+    )
+    assert sorted([row[0] for row in res]) == sorted(["ingest", "archive"])