diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..7f7f49d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,8 @@ +# Changelog + +## [0.0.1] + +### Added + +- Initial draft #2 +- Changelog #6 diff --git a/Makefile b/Makefile index 261cecf..c6edd7e 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,6 @@ +# todo: refactor +LAST_COMMIT_HASH = $(shell git rev-parse --verify HEAD) + include dev/.env export export PATH := $(shell pwd)/tmp:$(PATH) @@ -28,20 +31,32 @@ example: custom_ca ifdef CI # CI is set in Github Actions cd test_example; SSL_CERT_FILE=${SSL_CERT_FILE} CURL_CA_BUNDLE=${CURL_CA_BUNDLE} vagrant up --provision else - cp -f docker/conf/certificates/*.crt test_example/docker/conf/certificates + if [ -f "docker/conf/certificates/*.crt" ]; then cp -f docker/conf/certificates/*.crt test_example/docker/conf/certificates; fi cd test_example; SSL_CERT_FILE=${SSL_CERT_FILE} CURL_CA_BUNDLE=${CURL_CA_BUNDLE} CUSTOM_CA=${CUSTOM_CA} ANSIBLE_ARGS='--extra-vars "local_test=true"' vagrant up --provision endif # clean commands -destroy-box: +destroy: vagrant destroy -f + rm terraform.tfstate || true + rm terraform.tfstate.backup || true + rm example/terraform.tfstate || true +status: + vagrant global-status +format: + ./format.sh remove-tmp: rm -rf ./tmp -clean: destroy-box remove-tmp +clean: destroy remove-tmp # helper commands update-box: @SSL_CERT_FILE=${SSL_CERT_FILE} CURL_CA_BUNDLE=${CURL_CA_BUNDLE} vagrant box update || (echo '\n\nIf you get an SSL error you might be behind a transparent proxy. \nMore info https://github.com/fredrikhgrelland/vagrant-hashistack/blob/master/README.md#if-you-are-behind-a-transparent-proxy\n\n' && exit 2) +proxy-to-minio: + consul connect proxy -service minio-local -upstream minio:9000 -log-level debug +docker-local-build: + docker build docker -t fredrikhgrelland/hive:${LAST_COMMIT_HASH} + docker tag fredrikhgrelland/hive:${LAST_COMMIT_HASH} fredrikhgrelland/hive:test diff --git a/README.md b/README.md index 95c07e4..56178da 100644 --- a/README.md +++ b/README.md @@ -1,90 +1,12 @@ -# Starter template for `fredrikhgrelland/hashistack` +# Terraform-nomad-hive -This repository can be used as a base for developing services on the hashistack. -On github, you may use the ["Use this template"](https://github.com/fredrikhgrelland/vagrant-hashistack-template/generate) button to generate a new repository from this template. - -If you found this in `fredrikhgrelland/vagrant-hashistack`, you may be interested in this separate repository [vagrant-hashistack-template](https://github.com/fredrikhgrelland/vagrant-hashistack-template/generate) button to start a new repository from this repo -. - -Documentation on [parent repository](https://github.com/fredrikhgrelland/vagrant-hashistack#usage). - -## Customizing and using the vagrant box - -### Building and testing docker image -See docker [README.md](docker/README.md). - -### Starting a box -The vagrant box ships with a default startup scheme. `make` from this directory will start the box, and it will run all books in [dev/ansible](dev/ansible) in lexical order (NB: `playbook.yml` is run first, but is only used to run all other playbooks) after the bootstrap-process for the hashistack is done. In the [example](test_example/dev/ansible/playbook.yml) we use it to start terraform which then starts a nomad-job. - -### Pre and post hashistack procedure -You may change the hashistack configuration or add aditional pre and post steps to the startup procedure to match your needs. -Detailed documentation in [dev/vagrant/conf/README.md](dev/vagrant/conf/README.md) - -### Pre packaged configuration switches - -The box comes standard with a set of environment switches to simplify testing of different scenarios and enable staged development efforts. - -NB: All lowercase variables will automatically get a corresponding TF_VAR_ prepended variant for use directly in terraform. -To change from the default value, you may add the environment variable to [.env](dev/.env) - -#### Enterprise vs Open Source Software (OSS) -As long as Enterprise is not set to `true` the box will utilise OSS version of the binaries. - -#### Nomad - -| default | environment variable | value | -|:---------:|:----------------------|:-------:| -| | nomad_enterprise | true | -| x | nomad_enterprise | false | -| | nomad_acl | true | -| x | nomad_acl | false | - -When ACLs in Nomad are enabled the bootstrap token will be available in vault under `secret/nomad/management-token` with the two key-value pairs `accessor-id` and `secret-id`. `secret-id` is the token itself. These can be accessed in several ways: -- From inside the vagrant box with `vault kv get secret/nomad-bootstrap-token` -- From local machine with `vagrant ssh -c vault kv get secret/nomad-bootstrap-token"` -- By going to vault's UI on `localhost:8200`, and signing in with the root token. - -#### Consul - -| default | environment variable | value | -|:---------:|:---------------------------------|:-------:| -| | consul_enterprise | true | -| x | consul_enterprise | false | -| x | consul_acl | true | -| | consul_acl | false | -| x | consul_acl_default_policy | allow | -| | consul_acl_default_policy | deny | - -#### Vault - -| default | environment variable | value | -|:---------:|:---------------------------------|:-------:| -| | vault_enterprise | true | -| x | vault_enterprise | false | - -##### Consul secrets engine - -If `consul_acl_default_policy` has value `deny`, it will also enable [consul secrets engine](https://www.vaultproject.io/docs/secrets/consul) in vault. -Ansible will provision additional custom roles (admin-team, dev-team), [policies](../ansible/templates/consul-policies) and tokens for test purpose with different access level. - -How to generate token: ```text -# generate token for dev team member -vagrant ssh -c 'vault read consul/creds/dev-team' - -# generate token for admin team member -vagrant ssh -c 'vault read consul/creds/admin-team' +make test ``` -*Tokens can be used to access UI (different access level depends on role) +## Modes +Hive can be run in two modes: +- [hivemetastore](./docker/bin/hivemetastore) +- [hiveserver](./docker/bin/hiveserver) -## Vagrant box life-cycle -1. `/home/vagrant/.env_default` - _preloaded_ - default variables -1. `vagrant/.env` - _user provided_ - variables override -1. `vagrant/.env_override` - _system provided_ - variables are overridden for test purposes -1. `vagrant/dev/vagrant/conf/pre_ansible.sh` - _user provided_ - script running before ansible bootstrap procedure -1. `vagrant/dev/vagrant/conf/pre_bootstrap/*.yml` - _user provided_ - pre bootstrap tasks, running before hashistack software runs and ready -1. `/etc/ansible/bootstrap.yml` - _preloaded_ - verify ansible variables and software configuration, run hashistack software & verify that it started correctly -1. `vagrant/conf/post_bootstrap/*.yml` - _user provided_ - poststart scripts, running after hasistack software runs and ready -1. `vagrant/dev/conf/pre_ansible.sh` - _user provided_ - script running after ansible bootstrap procedure -1. `vagrant/ansible/*.yml` - _user provided_ - ansible tasks included in playbook +`NB!` current implementation supports only [`hivemetastore`](./conf/nomad/hive.hcl#L99) diff --git a/Vagrantfile b/Vagrantfile index 3ad3025..8c6d4b0 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -3,10 +3,11 @@ Vagrant.configure("2") do |config| config.vm.box_version = ">= 0.3, < 0.4" config.vm.provider "virtualbox" do |vb| vb.linked_clone = true - vb.memory = 2048 + vb.memory = 4096 + vb.cpus = 2 + end + config.vm.provision "ansible_local" do |ansible| + ansible.provisioning_path = "/vagrant/dev/ansible" + ansible.playbook = "playbook.yml" # Note this playbook is, in this context, /ansible/playbook.yml end - config.vm.provision "ansible_local" do |ansible| - ansible.provisioning_path = "/vagrant/dev/ansible" - ansible.playbook = "playbook.yml" # Note this playbook is, in this context, /ansible/playbook.yml - end end diff --git a/conf/nomad/hive.hcl b/conf/nomad/hive.hcl new file mode 100644 index 0000000..28ab1d3 --- /dev/null +++ b/conf/nomad/hive.hcl @@ -0,0 +1,145 @@ +job "${service_name}" { + + type = "service" + datacenters = "${datacenters}" + namespace = "${namespace}" + + update { + max_parallel = 1 + health_check = "checks" + min_healthy_time = "10s" + healthy_deadline = "10m" + progress_deadline = "12m" + auto_revert = true + auto_promote = true + canary = 1 + stagger = "30s" + } + + group "metastoreserver" { + count = 1 + + service { + name = "${service_name}" + port = "${port}" + + check { + name = "beeline" + type = "script" + task = "metastoreserver" + command = "/bin/bash" + args = [ + "-c", + "beeline -u jdbc:hive2:// -e \"SHOW DATABASES;\" &> /tmp/check_script_beeline_metastoreserver && echo \"return code $?\""] + interval = "30s" + timeout = "120s" + } + + connect { + sidecar_service { + proxy { + upstreams { + destination_name = "${postgres_service_name}" + local_bind_port = "${postgres_local_bind_port}" + } + upstreams { + destination_name = "${minio_service_name}" + local_bind_port = "${minio_local_bind_port}" + } + } + } + } + } + + network { + mode = "bridge" + } + + task "waitfor-hive-database" { + restart { + attempts = 5 + delay = "15s" + } + lifecycle { + hook = "prestart" + } + driver = "docker" + resources { + memory = 32 + } + config { + image = "consul:latest" + entrypoint = ["/bin/sh"] + args = ["-c", "jq /dev/null || true && echo "NOTE: CA warnings suppressed." \ + #Test download ( does ssl trust work ) + && curl -s -I -o /dev/null $HIVE_DOWNLOAD || echo -e "\n###############\nERROR: You are probably behind a corporate proxy. Add your custom ca .crt in the ca_certificates docker build folder\n###############\n" \ + #Download and unpack hive + && curl -s -L $HIVE_DOWNLOAD | tar -xz --transform s/apache-hive-$HIVE_VERSION-bin/hive/ -C /opt/ \ + #Download postgres jdbc driver + && curl -s -L https://jdbc.postgresql.org/download/postgresql-$POSTGRES_JDBC_VERSION.jar -o $HIVE_HOME/lib/postgresql-jdbc.jar \ + #Download json serializer/deserializer + #https://stackoverflow.com/questions/26644351/cannot-validate-serde-org-openx-data-jsonserde-jsonserde + && curl -s -L http://www.congiu.net/hive-json-serde/1.3.8/cdh5/json-serde-1.3.8-jar-with-dependencies.jar -o $HIVE_HOME/lib/json-serde-1.3.8-jar-with-dependencies.jar \ + && mkdir -p $HIVE_HOME/extlib \ + #Install Atlas hooks + #&& tar xf /tmp/apache-atlas-2.0.0-hive-hook.tar.gz --strip-components 1 -C $HIVE_HOME/extlib \ + # TODO: remove me... + ; for file in $(find $HIVE_HOME/extlib/ -name '*.jar' -print); do ln -s $file $HIVE_HOME/lib/; done;\ + #Install AWS s3 drivers + ln -s $HADOOP_HOME/share/hadoop/tools/lib/aws-java-sdk-bundle-*.jar $HIVE_HOME/lib/. \ + && ln -s $HADOOP_HOME/share/hadoop/tools/lib/hadoop-aws-$HADOOP_VERSION.jar $HIVE_HOME/lib/. \ + && ln -s $HADOOP_HOME/share/hadoop/tools/lib/aws-java-sdk-bundle-*.jar $HADOOP_HOME/share/hadoop/common/lib/. \ + && ln -s $HADOOP_HOME/share/hadoop/tools/lib/hadoop-aws-$HADOOP_VERSION.jar $HADOOP_HOME/share/hadoop/common/lib/. \ + #Remove libs causing error from hive - duplicated from hadoop + && rm /opt/hive/lib/log4j-slf4j-impl-*.jar \ + && mv /var/tmp/conf/* $HIVE_HOME/conf/ \ + && chmod +x /var/tmp/bin/* \ + && mv /var/tmp/bin/* /usr/local/bin/ \ + && rm -rf /var/tmp/* + +EXPOSE 10000 +EXPOSE 10002 +EXPOSE 9083 + +WORKDIR /opt +ENTRYPOINT ["entrypoint.sh"] +CMD hiveserver diff --git a/docker/bin/entrypoint.sh b/docker/bin/entrypoint.sh new file mode 100644 index 0000000..7a2b65e --- /dev/null +++ b/docker/bin/entrypoint.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +function addProperty() { + local path=$1 + local name=$2 + local value=$3 + + local entry="$name${value}" + local escapedEntry + escapedEntry=$(echo "$entry" | sed 's/\//\\\//g') + sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" "$path" +} + +function configure() { + local path=$1 + local module=$2 + local envPrefix=$3 + + local var + local value + + echo "Configuring $module" + for c in $(printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' -- -envPrefix="$envPrefix"); do + name=$(echo "${c}" | perl -pe 's/___/-/g; s/__/_/g; s/_/./g') + var="${envPrefix}_${c}" + value=${!var} + echo " - Setting $name=$value" + addProperty "$path" "$name" "$value" + done +} + +configure /etc/hadoop/core-site.xml core CORE_CONF +configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF +configure /etc/hadoop/yarn-site.xml yarn YARN_CONF +configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF +configure /etc/hadoop/kms-site.xml kms KMS_CONF +configure /etc/hadoop/mapred-site.xml mapred MAPRED_CONF +configure /opt/hive/conf/hive-site.xml hive HIVE_SITE_CONF + +echo "127.0.0.1 $(hostname)">/etc/hosts + +tail -F /tmp/root/hive.log& + +exec "$@" diff --git a/docker/bin/hivemetastore b/docker/bin/hivemetastore new file mode 100644 index 0000000..59511ef --- /dev/null +++ b/docker/bin/hivemetastore @@ -0,0 +1,6 @@ +#!/bin/bash + +#TODO: init or upgrade only when needed +hive --service schemaTool -initSchema -dbType postgres --verbose | true +hive --service schemaTool -upgradeSchema -dbType postgres --verbose | true +hive --service metastore \ No newline at end of file diff --git a/docker/bin/hiveserver b/docker/bin/hiveserver new file mode 100644 index 0000000..4379621 --- /dev/null +++ b/docker/bin/hiveserver @@ -0,0 +1,5 @@ +#!/bin/bash + +cd $HIVE_HOME/bin +./hiveserver2 --hiveconf hive.server2.enable.doAs=false + diff --git a/docker/conf/beeline-log4j2.properties b/docker/conf/beeline-log4j2.properties new file mode 100644 index 0000000..103d722 --- /dev/null +++ b/docker/conf/beeline-log4j2.properties @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +status = INFO +name = BeelineLog4j2 +packages = org.apache.hadoop.hive.ql.log + +# list of properties +property.hive.log.level = WARN +property.hive.root.logger = console + +# list of all appenders +appenders = console + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n + +# list of all loggers +loggers = HiveConnection + +# HiveConnection logs useful info for dynamic service discovery +logger.HiveConnection.name = org.apache.hive.jdbc.HiveConnection +logger.HiveConnection.level = INFO + +# root logger +rootLogger.level = ${sys:hive.log.level} +rootLogger.appenderRefs = root +rootLogger.appenderRef.root.ref = ${sys:hive.root.logger} diff --git a/docker/conf/hive-env.sh b/docker/conf/hive-env.sh new file mode 100644 index 0000000..8987b15 --- /dev/null +++ b/docker/conf/hive-env.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Set Hive and Hadoop environment variables here. These variables can be used +# to control the execution of Hive. It should be used by admins to configure +# the Hive installation (so that users do not have to set environment variables +# or set command line parameters to get correct behavior). +# +# The hive service being invoked (CLI/HWI etc.) is available via the environment +# variable SERVICE + + +# Hive Client memory usage can be an issue if a large number of clients +# are running at the same time. The flags below have been useful in +# reducing memory usage: +# +# if [ "$SERVICE" = "cli" ]; then +# if [ -z "$DEBUG" ]; then +# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit" +# else +# export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit" +# fi +# fi + +# The heap size of the jvm stared by hive shell script can be controlled via: +# +export HADOOP_HEAPSIZE=1024 +# +# Larger heap size may be required when running queries over large number of files or partitions. +# By default hive shell scripts use a heap size of 256 (MB). Larger heap size would also be +# appropriate for hive server (hwi etc). + + +# Set HADOOP_HOME to point to a specific hadoop install directory +# HADOOP_HOME=${bin}/../../hadoop + +# Hive Configuration Directory can be controlled by: +# export HIVE_CONF_DIR= + +# Folder containing extra ibraries required for hive compilation/execution can be controlled by: +export HIVE_AUX_JARS_PATH=/opt/hive/extlib diff --git a/docker/conf/hive-exec-log4j2.properties b/docker/conf/hive-exec-log4j2.properties new file mode 100644 index 0000000..4fba04c --- /dev/null +++ b/docker/conf/hive-exec-log4j2.properties @@ -0,0 +1,66 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +status = INFO +name = HiveExecLog4j2 +packages = org.apache.hadoop.hive.ql.log + +# list of properties +property.hive.log.level = INFO +property.hive.root.logger = FA +property.hive.query.id = hadoop +property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name} +property.hive.log.file = ${sys:hive.query.id}.log + +# list of all appenders +appenders = console, FA + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n + +# simple file appender +appender.FA.type = File +appender.FA.name = FA +appender.FA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file} +appender.FA.layout.type = PatternLayout +appender.FA.layout.pattern = %d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n + +# list of all loggers +loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX + +logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn +logger.NIOServerCnxn.level = WARN + +logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO +logger.ClientCnxnSocketNIO.level = WARN + +logger.DataNucleus.name = DataNucleus +logger.DataNucleus.level = ERROR + +logger.Datastore.name = Datastore +logger.Datastore.level = ERROR + +logger.JPOX.name = JPOX +logger.JPOX.level = ERROR + +# root logger +rootLogger.level = ${sys:hive.log.level} +rootLogger.appenderRefs = root +rootLogger.appenderRef.root.ref = ${sys:hive.root.logger} diff --git a/docker/conf/hive-log4j2.properties b/docker/conf/hive-log4j2.properties new file mode 100644 index 0000000..12cd9ac --- /dev/null +++ b/docker/conf/hive-log4j2.properties @@ -0,0 +1,73 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +status = INFO +name = HiveLog4j2 +packages = org.apache.hadoop.hive.ql.log + +# list of properties +property.hive.log.level = INFO +property.hive.root.logger = DRFA +property.hive.log.dir = ${sys:java.io.tmpdir}/${sys:user.name} +property.hive.log.file = hive.log + +# list of all appenders +appenders = console, DRFA + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t]: %p %c{2}: %m%n + +# daily rolling file appender +appender.DRFA.type = RollingFile +appender.DRFA.name = DRFA +appender.DRFA.fileName = ${sys:hive.log.dir}/${sys:hive.log.file} +# Use %pid in the filePattern to append @ to the filename if you want separate log files for different CLI session +appender.DRFA.filePattern = ${sys:hive.log.dir}/${sys:hive.log.file}.%d{yyyy-MM-dd} +appender.DRFA.layout.type = PatternLayout +appender.DRFA.layout.pattern = %d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n +appender.DRFA.policies.type = Policies +appender.DRFA.policies.time.type = TimeBasedTriggeringPolicy +appender.DRFA.policies.time.interval = 1 +appender.DRFA.policies.time.modulate = true +appender.DRFA.strategy.type = DefaultRolloverStrategy +appender.DRFA.strategy.max = 30 + +# list of all loggers +loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX + +logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn +logger.NIOServerCnxn.level = WARN + +logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO +logger.ClientCnxnSocketNIO.level = WARN + +logger.DataNucleus.name = DataNucleus +logger.DataNucleus.level = ERROR + +logger.Datastore.name = Datastore +logger.Datastore.level = ERROR + +logger.JPOX.name = JPOX +logger.JPOX.level = ERROR + +# root logger +rootLogger.level = ${sys:hive.log.level} +rootLogger.appenderRefs = root +rootLogger.appenderRef.root.ref = ${sys:hive.root.logger} diff --git a/docker/conf/hive-site.xml b/docker/conf/hive-site.xml new file mode 100644 index 0000000..60f3935 --- /dev/null +++ b/docker/conf/hive-site.xml @@ -0,0 +1,18 @@ + + + diff --git a/docker/conf/ivysettings.xml b/docker/conf/ivysettings.xml new file mode 100644 index 0000000..aa10f43 --- /dev/null +++ b/docker/conf/ivysettings.xml @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docker/conf/llap-daemon-log4j2.properties b/docker/conf/llap-daemon-log4j2.properties new file mode 100644 index 0000000..5051ca5 --- /dev/null +++ b/docker/conf/llap-daemon-log4j2.properties @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +status = INFO +name = LlapDaemonLog4j2 +packages = org.apache.hadoop.hive.ql.log + +# list of properties +property.llap.daemon.log.level = INFO +property.llap.daemon.root.logger = console +property.llap.daemon.log.dir = . +property.llap.daemon.log.file = llapdaemon.log +property.llap.daemon.historylog.file = llapdaemon_history.log +property.llap.daemon.log.maxfilesize = 256MB +property.llap.daemon.log.maxbackupindex = 20 + +# list of all appenders +appenders = console, RFA, HISTORYAPPENDER + +# console appender +appender.console.type = Console +appender.console.name = console +appender.console.target = SYSTEM_ERR +appender.console.layout.type = PatternLayout +appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} [%t%x] %p %c{2} : %m%n + +# rolling file appender +appender.RFA.type = RollingFile +appender.RFA.name = RFA +appender.RFA.fileName = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.log.file} +appender.RFA.filePattern = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.log.file}_%i +appender.RFA.layout.type = PatternLayout +appender.RFA.layout.pattern = %d{ISO8601} %-5p [%t%x]: %c{2} (%F:%M(%L)) - %m%n +appender.RFA.policies.type = Policies +appender.RFA.policies.size.type = SizeBasedTriggeringPolicy +appender.RFA.policies.size.size = ${sys:llap.daemon.log.maxfilesize} +appender.RFA.strategy.type = DefaultRolloverStrategy +appender.RFA.strategy.max = ${sys:llap.daemon.log.maxbackupindex} + +# history file appender +appender.HISTORYAPPENDER.type = RollingFile +appender.HISTORYAPPENDER.name = HISTORYAPPENDER +appender.HISTORYAPPENDER.fileName = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.historylog.file} +appender.HISTORYAPPENDER.filePattern = ${sys:llap.daemon.log.dir}/${sys:llap.daemon.historylog.file}_%i +appender.HISTORYAPPENDER.layout.type = PatternLayout +appender.HISTORYAPPENDER.layout.pattern = %m%n +appender.HISTORYAPPENDER.policies.type = Policies +appender.HISTORYAPPENDER.policies.size.type = SizeBasedTriggeringPolicy +appender.HISTORYAPPENDER.policies.size.size = ${sys:llap.daemon.log.maxfilesize} +appender.HISTORYAPPENDER.strategy.type = DefaultRolloverStrategy +appender.HISTORYAPPENDER.strategy.max = ${sys:llap.daemon.log.maxbackupindex} + +# list of all loggers +loggers = NIOServerCnxn, ClientCnxnSocketNIO, DataNucleus, Datastore, JPOX, HistoryLogger + +logger.NIOServerCnxn.name = org.apache.zookeeper.server.NIOServerCnxn +logger.NIOServerCnxn.level = WARN + +logger.ClientCnxnSocketNIO.name = org.apache.zookeeper.ClientCnxnSocketNIO +logger.ClientCnxnSocketNIO.level = WARN + +logger.DataNucleus.name = DataNucleus +logger.DataNucleus.level = ERROR + +logger.Datastore.name = Datastore +logger.Datastore.level = ERROR + +logger.JPOX.name = JPOX +logger.JPOX.level = ERROR + +logger.HistoryLogger.name = org.apache.hadoop.hive.llap.daemon.HistoryLogger +logger.HistoryLogger.level = INFO +logger.HistoryLogger.additivity = false +logger.HistoryLogger.appenderRefs = HistoryAppender +logger.HistoryLogger.appenderRef.HistoryAppender.ref = HISTORYAPPENDER + +# root logger +rootLogger.level = ${sys:llap.daemon.log.level} +rootLogger.appenderRefs = root +rootLogger.appenderRef.root.ref = ${sys:llap.daemon.root.logger} diff --git a/example/main.tf b/example/main.tf new file mode 100644 index 0000000..59a6360 --- /dev/null +++ b/example/main.tf @@ -0,0 +1,78 @@ +module "minio" { + source = "github.com/fredrikhgrelland/terraform-nomad-minio.git?ref=0.0.2" + + # nomad + nomad_datacenters = ["dc1"] + nomad_namespace = "default" + + # minio + service_name = "minio" + host = "127.0.0.1" + port = 9000 + container_image = "minio/minio:latest" # todo: avoid using tag latest in future releases + access_key = "minio" + secret_key = "minio123" + buckets = ["default", "hive"] + container_environment_variables = ["JUST_EXAMPLE_VAR1=some-value", "ANOTHER_EXAMPLE2=some-other-value"] + + # mc + mc_service_name = "mc" + mc_container_image = "minio/mc:latest" # todo: avoid using tag latest in future releases + mc_container_environment_variables = ["JUST_EXAMPLE_VAR3=some-value", "ANOTHER_EXAMPLE4=some-other-value"] +} + +module "postgres" { + source = "github.com/fredrikhgrelland/terraform-nomad-postgres.git?ref=0.0.1" + + # nomad + nomad_datacenters = ["dc1"] + nomad_namespace = "default" + + # postgres + postgres_service_name = "postgres" + postgres_container_image = "postgres:12-alpine" + postgres_container_port = 5432 + postgres_admin_user = "hive" + postgres_admin_password = "hive" + postgres_database = "metastore" + postgres_container_environment_variables = ["PGDATA=/var/lib/postgresql/data"] +} + +module "hive" { + source = "./.." + + # nomad + nomad_datacenters = ["dc1"] + nomad_namespace = "default" + + # hive + hive_service_name = "hive-metastore" + hive_container_port = 9083 + hive_container_environment_variables = ["SOME_EXAMPLE=example-value"] + + # hive - minio + hive_bucket = { + default = "default", + hive = "hive" + } + minio_service = { + service_name = module.minio.minio_service_name, + port = 9000, # todo: minio 0.0.1 does not have output variable port + access_key = module.minio.minio_access_key, + secret_key = module.minio.minio_secret_key, + } + + # hive - postgres + postgres_service = { + service_name = module.postgres.service_name + port = module.postgres.port + database_name = module.postgres.database_name + username = module.postgres.username + password = module.postgres.password + } + + depends_on = [ + module.minio, + module.postgres + ] +} diff --git a/example/nomad_acl_test.tf b/example/nomad_acl_test.tf new file mode 100644 index 0000000..2a74819 --- /dev/null +++ b/example/nomad_acl_test.tf @@ -0,0 +1,11 @@ +data "vault_generic_secret" "nomad_secret_id" { + # Set count of this data source to 1 if ACLs are enabled in Nomad, and 0 if not + count = var.nomad_acl ? 1 : 0 + path = "nomad/creds/write" +} + +provider "nomad" { + address = "http://127.0.0.1:4646" + # Add a secret_id if ACLs are enabled in nomad + secret_id = var.nomad_acl ? data.vault_generic_secret.nomad_secret_id[0].data.secret_id : null +} diff --git a/example/variables.tf b/example/variables.tf new file mode 100644 index 0000000..55cbd1b --- /dev/null +++ b/example/variables.tf @@ -0,0 +1,3 @@ +variable "nomad_acl" { + type = bool +} \ No newline at end of file diff --git a/format.sh b/format.sh new file mode 100755 index 0000000..a4d2d1b --- /dev/null +++ b/format.sh @@ -0,0 +1,2 @@ +#!/bin/bash +terraform fmt -recursive diff --git a/main.tf b/main.tf index e69de29..7291f49 100644 --- a/main.tf +++ b/main.tf @@ -0,0 +1,45 @@ +locals { + datacenters = join(",", var.nomad_datacenters) + buckets = var.hive_bucket # output variable for presto + hive_env_vars = join("\n", + concat([ + "JUST_EXAMPLE_ENV=some-value", + ], var.hive_container_environment_variables) + ) +} + +data "template_file" "template-nomad-job-hive" { + + template = file("${path.module}/conf/nomad/hive.hcl") + + vars = { + service_name = var.hive_service_name + datacenters = local.datacenters + namespace = var.nomad_namespace + + // image = var.postgres_container_image #todo: optional rendering + port = var.hive_container_port + envs = local.hive_env_vars + + hive_bucket = var.hive_bucket.hive + default_bucket = var.hive_bucket.default + + # postgres + postgres_service_name = var.postgres_service.service_name + postgres_local_bind_port = var.postgres_service.port + postgres_database_name = var.postgres_service.database_name + postgres_username = var.postgres_service.username + postgres_password = var.postgres_service.password + + # minio + minio_service_name = var.minio_service.service_name + minio_local_bind_port = var.minio_service.port + minio_access_key = var.minio_service.access_key + minio_secret_key = var.minio_service.secret_key + } +} + +resource "nomad_job" "nomad-job-hive" { + jobspec = data.template_file.template-nomad-job-hive.rendered + detach = false +} diff --git a/outputs.tf b/outputs.tf index e69de29..0a97e7b 100644 --- a/outputs.tf +++ b/outputs.tf @@ -0,0 +1,8 @@ +output "service_name" { + description = "Hive service name" + value = data.template_file.template-nomad-job-hive.vars.service_name +} + +output "buckets" { + value = local.buckets +} diff --git a/test_example/example/nomad_acl_test.tf b/test_example/example/nomad_acl_test.tf index 2ed9a35..bcf3edf 100644 --- a/test_example/example/nomad_acl_test.tf +++ b/test_example/example/nomad_acl_test.tf @@ -1,7 +1,7 @@ data "vault_generic_secret" "nomad_secret_id" { # Set count of this data source to 1 if ACLs are enabled in Nomad, and 0 if not count = var.nomad_acl ? 1 : 0 - path = "nomad/creds/write" + path = "nomad/creds/write" } provider "nomad" { diff --git a/test_example/main.tf b/test_example/main.tf index 4a066ce..3a0df53 100644 --- a/test_example/main.tf +++ b/test_example/main.tf @@ -1,4 +1,4 @@ resource "nomad_job" "countdash" { jobspec = file("${path.module}/conf/nomad/countdash.hcl") - detach = false + detach = false } \ No newline at end of file diff --git a/variables.tf b/variables.tf index e69de29..9435aec 100644 --- a/variables.tf +++ b/variables.tf @@ -0,0 +1,67 @@ +# Nomad +variable "nomad_provider_address" { + type = string + description = "Nomad address" + default = "http://127.0.0.1:4646" +} +variable "nomad_datacenters" { + type = list(string) + description = "Nomad data centers" + default = ["dc1"] +} +variable "nomad_namespace" { + type = string + description = "[Enterprise] Nomad namespace" + default = "default" +} + +# Hive +variable "hive_service_name" { + type = string + description = "Hive service name" + default = "hive-metastore" +} + +variable "hive_container_port" { + type = number + description = "Hive port" + default = 9083 +} + +variable "hive_container_environment_variables" { + type = list(string) + description = "Hive environment variables" + default = [""] +} + +variable "hive_bucket" { + type = object({ + default = string, + hive = string + }) + description = "Hive requires minio buckets" +} + +# Minio +variable "minio_service" { + type = object({ + service_name = string, + port = number, + access_key = string, + secret_key = string, + }) + description = "Minio data-object contains service_name, port, access_key and secret_key" +} + +# Postgres +variable "postgres_service" { + type = object({ + service_name = string, + port = number, + database_name = string, + username = string, + password = string + }) + description = "Postgres data-object contains service_name, port, database_name, username and password" +} + diff --git a/versions.tf b/versions.tf new file mode 100644 index 0000000..960bd4b --- /dev/null +++ b/versions.tf @@ -0,0 +1,13 @@ +terraform { + required_providers { + nomad = { + source = "hashicorp/nomad" + version = "~> 1.4.9" + } + template = { + source = "hashicorp/template" + version = "~> 2.1.2" + } + } + required_version = ">= 0.13" +}