Skip to content

Commit

Permalink
Merge pull request #858 from Flowminder/flowetl-config
Browse files Browse the repository at this point in the history
Flowetl - basically working....
  • Loading branch information
mergify[bot] committed Jun 13, 2019
2 parents d3ce53f + 31d2b6d commit 9a74291
Show file tree
Hide file tree
Showing 45 changed files with 2,210 additions and 230 deletions.
10 changes: 5 additions & 5 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ executors:
default: "2015-01-01"
disaster_end_date:
type: string
default: "2015-01-01"
default: "2015-01-01"
environment:
POSTGRES_PASSWORD: flowflow
MPLBACKEND: "agg"
Expand Down Expand Up @@ -560,16 +560,15 @@ jobs:
docker push flowminder/<<parameters.component>>:$CIRCLE_SHA1
run_flowetl_tests:
docker:
- image: circleci/python:3.7
machine:
image: circleci/classic:201808-01
environment:
AIRFLOW_HOME: ./test_airflow_home
TESTING: "true"
working_directory: /home/circleci/project/
steps:
- checkout:
path: /home/circleci/project/
- setup_remote_docker
- restore_cache:
key: flowetl-deps-1-{{ checksum "flowetl/Pipfile.lock"}}
- run:
Expand All @@ -583,7 +582,7 @@ jobs:
name: run flowetl integration tests
command: |
cd flowetl
FLOWETL_TAG=$CIRCLE_SHA1 pipenv run pytest --junit-xml=test_results/pytest/results_integration.xml ./tests
TAG=$CIRCLE_SHA1 pipenv run pytest --junit-xml=test_results/pytest/results_integration.xml ./tests
- run:
name: run etl module unit tests
command: |
Expand Down Expand Up @@ -874,6 +873,7 @@ workflows:
- run_flowetl_tests:
requires:
- build_flowetl
- build_flowdb
<<: *run_always_org_context
- build_python_wheel:
name: build_flowclient_wheel
Expand Down
62 changes: 35 additions & 27 deletions development_environment
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

## FlowAuth settings
# Database that stores users and servers, defaults to temporary sqlite
#SQLALCHEMY_DATABASE_URI
#SQLALCHEMY_DATABASE_URI
# Flask secret key for CSRF protection
SECRET_KEY=secret
SECRET_KEY=secret
# At rest encryption of server secret keys
FLOWAUTH_FERNET_KEY="XU-J5xNOtkaUKAoqWT7_VoT3zk2OTuoqKPBN3l0pOFg="
FLOWAUTH_FERNET_KEY="XU-J5xNOtkaUKAoqWT7_VoT3zk2OTuoqKPBN3l0pOFg="
# Creates demo data when running
DEMO_MODE=true
# Required by flask
FLASK_APP=flowauth
FLASK_APP=flowauth
# Enable/disable flask debug/autoreload
FLASK_DEBUG=1
FLOWAUTH_PORT=9091
Expand All @@ -22,13 +22,13 @@ FLOWAUTH_LOG_LEVEL=debug

## FlowAPI settings
# JWT secret key
JWT_SECRET_KEY=secret
JWT_SECRET_KEY=secret
# Shouldn't be relevant as serving now done by hypercorn?
QUART_APP="flowapi.main:create_app()"
QUART_APP="flowapi.main:create_app()"
# Shouldn't be relevant as serving now done by hypercorn?
QUART_DEBUG=1
QUART_DEBUG=1
# Error & debug log level
FLOWAPI_LOG_LEVEL=debug
FLOWAPI_LOG_LEVEL=debug
# Flowmachine ZMQ host
FLOWMACHINE_HOST=localhost
FLOWAPI_PORT=9090
Expand All @@ -38,25 +38,25 @@ FLOWAPI_IDENTIFIER=TEST_SERVER
# Hostname to connect to flowdb
FLOWDB_HOST=localhost
# Port to connect to flowdb
FLOWDB_PORT=9000
FLOWDB_PORT=9000

## FlowMachine settings
# To avoid mpl errors
MPLBACKEND="agg"
MPLBACKEND="agg"
# Password for redis
REDIS_PASSWORD=fm_redis
REDIS_PASSWORD=fm_redis
# Error and debugging log level
FLOWMACHINE_LOG_LEVEL=debug

# Number of connections to keep open to flowdb
#DB_CONNECTION_POOL_SIZE
#DB_CONNECTION_POOL_SIZE
# Number of connections to open in addition if needed
#DB_CONNECTION_POOL_OVERFLOW
#DB_CONNECTION_POOL_OVERFLOW

# Hostname of redis
REDIS_HOST=localhost
REDIS_HOST=localhost
# Port to connect to redis
REDIS_PORT=6379
REDIS_PORT=6379

## FlowMachine server settings
#asyncio debug mode
Expand All @@ -67,41 +67,41 @@ FLOWMACHINE_PORT=5555

## FlowDB Settings
# Superuser
POSTGRES_USER=flowdb
POSTGRES_USER=flowdb
# Superuser password
POSTGRES_PASSWORD=flowflow
POSTGRES_PASSWORD=flowflow
# Flowmachine user username
FLOWMACHINE_FLOWDB_USER=flowmachine
FLOWMACHINE_FLOWDB_USER=flowmachine
# Flowapi user username
FLOWAPI_FLOWDB_USER=flowapi
FLOWAPI_FLOWDB_USER=flowapi
# Off by default, enables pldebugger
FLOWDB_ENABLE_POSTGRES_DEBUG_MODE=False
# Password for flowmachine user
FLOWMACHINE_FLOWDB_PASSWORD=foo
FLOWMACHINE_FLOWDB_PASSWORD=foo
# Password for flowapi user
FLOWAPI_FLOWDB_PASSWORD=foo
FLOWAPI_FLOWDB_PASSWORD=foo
# Size in bytes to limit cache to
CACHE_SIZE=""
# Decay rate for cache records
CACHE_HALF_LIFE=10000
# Max number of CPUs to use
#MAX_CPUS
#MAX_CPUS
# Max number of worker processes to use
#MAX_WORKERS
#MAX_WORKERS
# Max number of worker processes to use per query
#MAX_WORKERS_PER_GATHER
#MAX_WORKERS_PER_GATHER
# Size of postgres shared memory buffers
#SHARED_BUFFERS_SIZE
# Set the postgres 'effective_cache_size' parameter
#EFFECTIVE_CACHE_SIZE
# Enable/disable postgres JIT
#JIT
#JIT
# Default stats target for tables
#STATS_TARGET
#STATS_TARGET

# Synthetic data settings
# Which data generator to use
SYNTHETIC_DATA_GENERATOR=sql
SYNTHETIC_DATA_GENERATOR=sql
N_SITES=200
N_CELLS=1000
N_SUBSCRIBERS=50000
Expand Down Expand Up @@ -133,6 +133,14 @@ FLOWETL_POSTGRES_PASSWORD=flowetl
FLOWETL_POSTGRES_USER=flowetl
FLOWETL_POSTGRES_HOST=flowetl_db
FLOWETL_POSTGRES_DB=flowetl
FLOWETL_POSTGRES_PORT=9001

MOUNT_HOME=/mounts
HOST_CONFIG_DIR=./flowetl/mounts/config
HOST_DUMP_DIR=./flowetl/mounts/dump
HOST_ARCHIVE_DIR=./flowetl/mounts/archive
HOST_QUARANTINE_DIR=./flowetl/mounts/quarantine
HOST_INGEST_DIR=./flowetl/mounts/ingest

# Worked examples
WORKED_EXAMPLES_PORT=8888
14 changes: 11 additions & 3 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ networks:
flowetl_db:



services:

flowdb:
Expand Down Expand Up @@ -193,15 +193,23 @@ services:
stdin_open: true
ports:
- ${FLOWETL_PORT:?Must set FLOWETL_PORT env var}:8080
volumes:
- ${HOST_CONFIG_DIR:?Must set HOST_CONFIG_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/config:ro
- ${HOST_DUMP_DIR:?Must set HOST_DUMP_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/dump:rw
- ${HOST_ARCHIVE_DIR:?Must set HOST_ARCHIVE_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/archive:rw
- ${HOST_QUARANTINE_DIR:?Must set HOST_QUARANTINE_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/quarantine:rw
- ${HOST_INGEST_DIR:?Must set HOST_INGEST_DIR env var}:${MOUNT_HOME:?Must set MOUNT_HOME env var}/ingest:rw

environment:
AIRFLOW__CORE__EXECUTOR: LocalExecutor
AIRFLOW__CORE__SQL_ALCHEMY_CONN: ${SQL_ALCHEMY_CONN:?Must set SQL_ALCHEMY_CONN env var}
AIRFLOW_CONN_FLOWDB: postgres://$POSTGRES_USER:$POSTGRES_PASSWORD@flowdb:5432/flowdb
AIRFLOW__CORE__FERNET_KEY: ${FLOWETL_FERNET_KEY:?Must set FLOWETL_FERNET_KEY env var}
POSTGRES_USER: ${FLOWETL_POSTGRES_USER:?Must set FLOWETL_POSTGRES_USER env var}
POSTGRES_PASSWORD: ${FLOWETL_POSTGRES_PASSWORD:?Must set FLOWETL_POSTGRES_PASSWORD env var}
POSTGRES_HOST: ${FLOWETL_POSTGRES_HOST:?Must set FLOWETL_POSTGRES_HOST env var}
POSTGRES_DB: ${FLOWETL_POSTGRES_DB:?Must set FLOWETL_POSTGRES_DB env var}
MOUNT_HOME: ${MOUNT_HOME:?Must set MOUNT_HOME env var}

networks:
- db
Expand All @@ -216,8 +224,8 @@ services:
restart: always

ports:
- 5433:5432
- ${FLOWETL_POSTGRES_PORT:?Must set FLOWETL_POSTGRES_PORT env var}:5432

environment:
POSTGRES_USER: ${FLOWETL_POSTGRES_USER:?Must set FLOWETL_POSTGRES_USER env var}
POSTGRES_PASSWORD: ${FLOWETL_POSTGRES_PASSWORD:?Must set FLOWETL_POSTGRES_PASSWORD env var}
Expand Down
11 changes: 4 additions & 7 deletions flowdb/sql/04_schema_other.sql
Original file line number Diff line number Diff line change
Expand Up @@ -69,15 +69,12 @@ Schema used for temp storage during etl.

CREATE SCHEMA IF NOT EXISTS etl;

CREATE TYPE cdrtype AS ENUM ('voice', 'sms', 'mds', 'topups');
CREATE TYPE etl_status AS ENUM ('in_process', 'done', 'quarantine');
CREATE TABLE etl.etl (
CREATE TABLE etl.etl_records (
id SERIAL NOT NULL,
file_name VARCHAR,
cdr_type cdrtype,
cdr_type VARCHAR,
cdr_date DATE,
status etl_status,
time_stamp TIMESTAMP WITH TIME ZONE,
state VARCHAR,
timestamp TIMESTAMP WITH TIME ZONE,
PRIMARY KEY (id)
);

Expand Down
3 changes: 2 additions & 1 deletion flowetl/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ verify_ssl = true

[dev-packages]
black = "==19.3b0"
apache-airflow = "==1.10.3"
apache-airflow = {extras = ["postgres"],version = "==1.10.3"}
pylint = "*"
pytest = "*"
docker = "*"
ipython = "*"
etl = {editable = true,path = "./etl"}
pytest-cov = "*"

[packages]

Expand Down
84 changes: 83 additions & 1 deletion flowetl/Pipfile.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9a74291

Please sign in to comment.