diff --git a/.travis.yml b/.travis.yml index 55333e13..c43d254c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,10 @@ cache: - "$HOME/.m2" - "$HOME/.jdk" sudo: false +addons: + apt: + packages: + - rpm env: global: - JDKW=https://raw.githubusercontent.com/vjkoskela/jdk-wrapper/master/jdk-wrapper.sh @@ -15,5 +19,5 @@ install: true before_script: - gpg --import arpnetworking.key script: -- curl -s ${JDKW} | bash /dev/stdin ./mvnw clean install --settings settings.xml -P ci -Dgpg.skip=true -U +- curl -s ${JDKW} | bash /dev/stdin ./mvnw clean install --settings settings.xml -P ci -P rpm -Dgpg.skip=true -U - if [ \( -n "${TRAVIS_TAG}" \) -a "${TRAVIS_PULL_REQUEST}" = "false" ]; then curl -s ${JDKW} | bash /dev/stdin ./mvnw deploy --settings settings.xml -P ci -DskipAllVerification=true; fi; diff --git a/config/clusterPipeline.json b/config/clusterPipeline.json new file mode 100644 index 00000000..d44d3673 --- /dev/null +++ b/config/clusterPipeline.json @@ -0,0 +1,5 @@ +{ + "sinks": + [ + ] +} diff --git a/config/config.json b/config/config.json new file mode 100644 index 00000000..44855c9d --- /dev/null +++ b/config/config.json @@ -0,0 +1,93 @@ +{ + "monitoringCluster": "tsd_aggregator_dev", + "httpPort": 7066, + "httpHost": "0.0.0.0", + "httpStatusPath": "/status", + "aggregationHost": "0.0.0.0", + "aggregationPort": 7065, + "logDirectory": "logs", + "hostPipelineConfiguration": "/opt/cluster-aggregator/config/hostPipeline.json", + "clusterPipelineConfiguration": "/opt/cluster-aggregator/config/clusterPipeline.json", + "maxConnectionTimeout": "PT2M", + "minConnectionTimeout": "PT1M", + "clusterHostSuffix": ".cluster", + "jvmMetricsCollectionInterval": "PT.5S", + "rebalanceConfiguration": { + "maxParallel": 100, + "threshold": 500 + }, + "databaseConfigurations": { + "metrics_clusteragg": { + "jdbcUrl": "jdbc:h2:/opt/cluster-aggregator/data/metrics:clusteragg;AUTO_SERVER=TRUE;AUTO_SERVER_PORT=7067;MODE=PostgreSQL;INIT=create schema if not exists clusteragg;DB_CLOSE_DELAY=-1", + "driverName": "org.h2.Driver", + "username": "sa", + "password": "secret", + "maximumPoolSize": 2, + "minimumIdle": 2, + "idleTimeout": 0, + "modelPackages": [ "com.arpnetworking.clusteraggregator.models.ebean" ] + }, + "metrics_clusteragg_ddl": { + "jdbcUrl": "jdbc:h2:/opt/cluster-aggregator/data/metrics:clusteragg;AUTO_SERVER=TRUE;AUTO_SERVER_PORT=7067;MODE=PostgreSQL;INIT=create schema if not exists clusteragg;DB_CLOSE_DELAY=-1", + "driverName": "org.h2.Driver", + "username": "sa", + "password": "secret", + "migrationLocations": ["db/migration/metrics_clusteragg/common"], + "migrationSchemas": ["clusteragg"], + "maximumPoolSize": 2, + "minimumIdle": 0, + "idleTimeout": 10000, + "modelPackages": [ "com.arpnetworking.clusteraggregator.models.ebean" ] + } + }, + "akkaConfiguration": { + "akka": { + "loggers": ["akka.event.slf4j.Slf4jLogger"], + "loglevel": "DEBUG", + "stdout-loglevel": "DEBUG", + "logging-filter": "akka.event.slf4j.Slf4jLoggingFilter", + "actor": { + "debug": { + "unhandled": "on" + }, + "provider": "akka.cluster.ClusterActorRefProvider" + }, + "cluster": { + "seed-nodes": [ + "akka.tcp://Metrics@127.0.0.1:2551" + ], + "auto-down-unreachable-after": "300s", + "sharding": { + "guardian-name": "sharding", + "role": "", + "retry-interval": "2 s", + "buffer-size": 100000, + "handoff-timeout": "60 s", + "rebalance-interval": "10 s", + "snapshot-interval": "720 s", + "least-shard-allocation-strategy": { + "rebalance-threshold": 10, + "max-simultaneous-rebalance": 3 + } + } + }, + "remote": { + "log-remote-lifecycle-events": "on", + "netty": { + "tcp": { + "hostname": "127.0.0.1", + "port": 2551 + } + } + }, + "persistence": { + "journal": { + "plugin": "akka.persistence.journal.leveldb", + "leveldb" : { + "dir": "/opt/cluster-aggregator/data/journal" + } + } + } + } + } +} diff --git a/config/hostPipeline.json b/config/hostPipeline.json new file mode 100644 index 00000000..d44d3673 --- /dev/null +++ b/config/hostPipeline.json @@ -0,0 +1,5 @@ +{ + "sinks": + [ + ] +} diff --git a/config/logback.xml b/config/logback.xml new file mode 100644 index 00000000..9932af83 --- /dev/null +++ b/config/logback.xml @@ -0,0 +1,51 @@ + + + + + /opt/cluster-aggregator/logs/cluster-aggregator.log + + + 900000 + 100MB + + /opt/cluster-aggregator/logs/cluster-aggregator.%d{yyyy-MM-dd_HH}.%i.log.gz + 4 + true + + + true + true + + + + true + + + + + + + 50 + 500 + + + + + + + + diff --git a/pom.xml b/pom.xml index 83e84459..afdf9eb6 100644 --- a/pom.xml +++ b/pom.xml @@ -126,6 +126,7 @@ 4.7.1 1.8.9 1.8 + 2.1.5 ${project.basedir}/findbugs.exclude.xml @@ -204,7 +205,7 @@ cluster-aggregator - com.arpnetworking.metrics.clusteraggregator.Main + com.arpnetworking.clusteraggregator.Main @@ -603,7 +604,6 @@ test - krasserm @@ -611,4 +611,85 @@ http://dl.bintray.com/krasserm/maven + + + rpm + + false + + + + + org.codehaus.mojo + rpm-maven-plugin + ${rpm.maven.plugin.version} + + + generate-rpm + + rpm + + + + + cluster-aggregator + Apache 2 License + false + System Environment/Daemons + + rpm/scripts/pre.sh + + + rpm/scripts/post.sh + + + rpm/scripts/preun.sh + + + rpm/scripts/postun.sh + + + java >= ${jdk.version} + + + + /opt/cluster-aggregator + + + target/appassembler + + + + + /opt/cluster-aggregator/config + + + config + + + + + / + + + rpm/fs + + + + + + /usr/local/bin + + + /opt/cluster-aggregator/bin/cluster-aggregator + + + + + + + + + + diff --git a/rpm/fs/etc/init.d/cluster-aggregator b/rpm/fs/etc/init.d/cluster-aggregator new file mode 100755 index 00000000..33348fd1 --- /dev/null +++ b/rpm/fs/etc/init.d/cluster-aggregator @@ -0,0 +1,169 @@ +#!/bin/sh +# +# cluster-aggregator cluster level metrics aggregation +# +# chkconfig: - 60 40 +# description: cluster-aggregator is an aggregation daemon that computes \ +# cluster-level metrics from disparate hosts and services +# processname: cluster-aggregator +# config: /opt/cluster-aggregator/config/config.json +# pidfile: /var/run/cluster-aggregator/cluster-aggregator.pid + +# Source function library. +. /etc/rc.d/init.d/functions + +user="cagg" +prog="cluster-aggregator" +dir="/opt/cluster-aggregator" +log_dir="$dir/logs" +config="/opt/cluster-aggregator/config/config.json" +logging_config="-Dlogback.configurationFile=/opt/cluster-aggregator/config/logback.xml" +exec="/opt/cluster-aggregator/bin/cluster-aggregator" +pid_file="/var/run/cluster-aggregator/cluster-aggregator.pid" +export JAVA_HOME="$(dirname $(dirname $(readlink -f $(which java))))" +export JAVA_OPTS="$logging_config" + +[ -e /etc/sysconfig/$prog ] && . /etc/sysconfig/$prog + +lockfile=/var/lock/subsys/$prog + +get_pid() { + if [ -f "$pid_file" ]; then + cat "$pid_file" 2> /dev/null + fi + echo "" +} + +is_running() { + [ ! -f "$pid_file" ] && return 1 + kill -0 `get_pid` &> /dev/null && return 0 || return 1 +} + +start() { + [ -f $config ] || exit 6 + echo -n $"Starting $prog: " + if is_running; then + warning "$name: already running" + else + cd "$dir" + su "$user" -s /bin/sh -c "nice $exec $config >> $log_dir/system.out 2>&1 & echo \$! > $pid_file" + if ! is_running; then + failure "$prog did not start" + retval=1 + else + success + retval=0 + fi + fi + echo + [ $retval -eq 0 ] && touch $lockfile + return $retval +} + +stop() { + echo -n $"Stopping $prog: " + if is_running; then + kill `get_pid` + for i in {1..10} + do + if ! is_running; then + break + fi + + echo -n "." + sleep 1 + done + if is_running; then + warning + echo + echo -n "Killing $name: " + kill -9 `get_pid` + for i in {1..10} + do + if ! is_running; then + break + fi + + echo -n "." + sleep 1 + done + fi + + if is_running; then + failure + echo + retval=1 + else + success + echo + if [ -f "$pid_file" ]; then + rm "$pid_file" + fi + retval=0 + fi + else + warning + echo + echo "$name: not running" + if [ -f "$pid_file" ]; then + rm "$pid_file" + fi + retval=0 + fi + [ $retval -eq 0 ] && rm -f $lockfile + return $retval +} + +restart() { + stop + start +} + +reload() { + restart +} + +force_reload() { + restart +} + +rh_status() { + is_running +} + +rh_status_q() { + rh_status >/dev/null 2>&1 +} + + +case "$1" in + start) + rh_status_q && exit 0 + $1 + ;; + stop) + rh_status_q || exit 0 + $1 + ;; + restart) + $1 + ;; + reload) + rh_status_q || exit 7 + $1 + ;; + force-reload) + force_reload + ;; + status) + rh_status + ;; + condrestart|try-restart) + rh_status_q || exit 0 + restart + ;; + *) + echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}" + exit 2 +esac +exit $? diff --git a/rpm/scripts/post.sh b/rpm/scripts/post.sh new file mode 100644 index 00000000..2c74afa9 --- /dev/null +++ b/rpm/scripts/post.sh @@ -0,0 +1,7 @@ +/sbin/chkconfig --add cluster-aggregator +mkdir /opt/cluster-aggregator/logs +chown cagg:cagg /opt/cluster-aggregator/logs +mkdir /var/run/cluster-aggregator +chown cagg:cagg /var/run/cluster-aggregator +mkdir /opt/cluster-aggregator/data +chown cagg:cagg /opt/cluster-aggregator/data diff --git a/rpm/scripts/postun.sh b/rpm/scripts/postun.sh new file mode 100644 index 00000000..ac046a70 --- /dev/null +++ b/rpm/scripts/postun.sh @@ -0,0 +1,4 @@ +if [ "$1" -ge 1 ]; then + /sbin/service cluster-aggregator condrestart > /dev/null 2>&1 +fi +exit 0 diff --git a/rpm/scripts/pre.sh b/rpm/scripts/pre.sh new file mode 100644 index 00000000..61e24e49 --- /dev/null +++ b/rpm/scripts/pre.sh @@ -0,0 +1,4 @@ +getent group cagg >/dev/null || groupadd -r cagg +getent passwd cagg >/dev/null || \ + useradd -r -g cagg -d /opt/cluster-aggregator -s /sbin/nologin \ + -c "Account used for isolation of metrics cluster aggregator" cagg diff --git a/rpm/scripts/preun.sh b/rpm/scripts/preun.sh new file mode 100644 index 00000000..0cd9b345 --- /dev/null +++ b/rpm/scripts/preun.sh @@ -0,0 +1,5 @@ +if [ "$1" = 0 ]; then + /sbin/service cluster-aggregator stop > /dev/null 2>&1 + /sbin/chkconfig --del cluster-aggregator +fi +exit 0