From f1fb64971395110fecb9c862f4bd2ff28ad53c08 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Fri, 12 Feb 2016 15:28:39 +0100 Subject: [PATCH 01/14] add history server and nginx proxy --- bin/make-docker.sh | 2 +- ...dispatcher.properties => log4j.properties} | 3 +- docker/Dockerfile | 15 +++++--- docker/{ => hadoop}/core-site.xml | 0 docker/{ => hadoop}/hdfs-site.xml | 0 docker/{ => hadoop}/mesos-site.xml | 0 docker/html/index.html | 14 ++++++++ docker/nginx/conf.d/spark.conf.template | 36 +++++++++++++++++++ docker/nginx/nginx.conf | 30 ++++++++++++++++ docker/runit/init.sh | 20 +++++++++++ docker/runit/service/history-server/log/run | 7 ++++ docker/runit/service/history-server/run | 16 +++++++++ docker/runit/service/nginx/log/run | 7 ++++ docker/runit/service/nginx/run | 15 ++++++++ docker/runit/service/spark/log/run | 7 ++++ docker/runit/service/spark/run | 22 ++++++++++++ package/config.json | 16 +++++++++ package/marathon.json.mustache | 13 +++++-- 18 files changed, 213 insertions(+), 10 deletions(-) rename conf/{log4j-dispatcher.properties => log4j.properties} (94%) rename docker/{ => hadoop}/core-site.xml (100%) rename docker/{ => hadoop}/hdfs-site.xml (100%) rename docker/{ => hadoop}/mesos-site.xml (100%) create mode 100644 docker/html/index.html create mode 100644 docker/nginx/conf.d/spark.conf.template create mode 100644 docker/nginx/nginx.conf create mode 100755 docker/runit/init.sh create mode 100755 docker/runit/service/history-server/log/run create mode 100755 docker/runit/service/history-server/run create mode 100755 docker/runit/service/nginx/log/run create mode 100755 docker/runit/service/nginx/run create mode 100755 docker/runit/service/spark/log/run create mode 100755 docker/runit/service/spark/run diff --git a/bin/make-docker.sh b/bin/make-docker.sh index 07609fe0e9d2c..02cea6c571f66 100755 --- a/bin/make-docker.sh +++ b/bin/make-docker.sh @@ -4,7 +4,7 @@ # ./bin/make-docker.sh rm -rf build/docker -mkdir -p build/docker +mkdir -p build/docker/dist cp -r "$1/." build/docker/dist cp -r conf/* build/docker/dist/conf cp -r docker/* build/docker diff --git a/conf/log4j-dispatcher.properties b/conf/log4j.properties similarity index 94% rename from conf/log4j-dispatcher.properties rename to conf/log4j.properties index 3bbde79fd1c61..0ab7a4a5ad06f 100644 --- a/conf/log4j-dispatcher.properties +++ b/conf/log4j.properties @@ -1,5 +1,5 @@ # Set everything to be logged to the console -log4j.rootCategory=TRACE, console +log4j.rootCategory=INFO, console log4j.appender.console=org.apache.log4j.ConsoleAppender log4j.appender.console.target=System.err log4j.appender.console.layout=org.apache.log4j.PatternLayout @@ -10,3 +10,4 @@ log4j.logger.org.eclipse.jetty=WARN log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO + diff --git a/docker/Dockerfile b/docker/Dockerfile index 7655d42061647..e1989adca21ef 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -27,7 +27,7 @@ ENV DEBCONF_NONINTERACTIVE_SEEN "true" # Upgrade package index and install basic commands. RUN apt-get update && \ - apt-get install -y software-properties-common + apt-get install -y software-properties-common runit nginx RUN add-apt-repository ppa:openjdk-r/ppa RUN apt-get update && \ apt-get install -y openjdk-8-jdk curl @@ -39,10 +39,11 @@ ENV HADOOP_CONF_DIR /etc/hadoop RUN mkdir /etc/hadoop ADD dist /opt/spark/dist - -ADD hdfs-site.xml /etc/hadoop/hdfs-site.xml -ADD core-site.xml /etc/hadoop/core-site.xml -ADD mesos-site.xml /etc/hadoop/mesos-site.xml +ADD hadoop/* /etc/hadoop/ +ADD runit/service /var/lib/runit/service +ADD runit/init.sh /sbin/init.sh +ADD nginx /etc/nginx +ADD html /usr/html #RUN ln -sf /usr/lib/libmesos.so /usr/lib/libmesos-0.23.1.so @@ -53,4 +54,8 @@ RUN ln -s /bin/bash /usr/bin/bash RUN ln -s /bin/ls /usr/bin/ls RUN ln -s /bin/grep /usr/bin/grep +RUN ln -s /var/lib/runit/service/spark /etc/service/spark +RUN ln -s /var/lib/runit/service/nginx /etc/service/nginx + WORKDIR /opt/spark/dist + diff --git a/docker/core-site.xml b/docker/hadoop/core-site.xml similarity index 100% rename from docker/core-site.xml rename to docker/hadoop/core-site.xml diff --git a/docker/hdfs-site.xml b/docker/hadoop/hdfs-site.xml similarity index 100% rename from docker/hdfs-site.xml rename to docker/hadoop/hdfs-site.xml diff --git a/docker/mesos-site.xml b/docker/hadoop/mesos-site.xml similarity index 100% rename from docker/mesos-site.xml rename to docker/hadoop/mesos-site.xml diff --git a/docker/html/index.html b/docker/html/index.html new file mode 100644 index 0000000000000..47443d83ef424 --- /dev/null +++ b/docker/html/index.html @@ -0,0 +1,14 @@ + + + + + + + + Spark Dispatcher UI +
+ Spark History Server + + + + diff --git a/docker/nginx/conf.d/spark.conf.template b/docker/nginx/conf.d/spark.conf.template new file mode 100644 index 0000000000000..3e592f3725029 --- /dev/null +++ b/docker/nginx/conf.d/spark.conf.template @@ -0,0 +1,36 @@ +server { + listen ; + port_in_redirect off; + + server_name ""; + + root /usr/html; + index index.html; + + location /v1 { + proxy_pass ; + + # Don't try to be intelligent, only proxy + proxy_buffering off; + proxy_redirect off; + proxy_next_upstream off; + + # Set long timeout + proxy_connect_timeout 159s; + proxy_send_timeout 600s; + proxy_read_timeout 600s; + } + + location /ui/ { + proxy_redirect $scheme://$host $scheme://$host/ui; + proxy_pass /; + proxy_set_header Host $host; + } + + location /history/ { + proxy_redirect $scheme://$host $scheme://$host/history; + proxy_pass /; + proxy_set_header Host $host; + } +} + diff --git a/docker/nginx/nginx.conf b/docker/nginx/nginx.conf new file mode 100644 index 0000000000000..d9dd49f0fb499 --- /dev/null +++ b/docker/nginx/nginx.conf @@ -0,0 +1,30 @@ +worker_processes 1; +daemon off; + +events { + worker_connections 1024; +} + +error_log /dev/stdout warn; +pid /var/run/nginx.pid; + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /dev/stdout main; + + sendfile on; + #tcp_nopush on; + + keepalive_timeout 65; + + gzip on; + + include /etc/nginx/conf.d/*.conf; +} + diff --git a/docker/runit/init.sh b/docker/runit/init.sh new file mode 100755 index 0000000000000..1cea13031f7fa --- /dev/null +++ b/docker/runit/init.sh @@ -0,0 +1,20 @@ +#!/bin/sh +set -e +set -x + +export DISPATCHER_PORT="${PORT0}" +export DISPATCHER_UI_PORT="${PORT1}" +export HISTORY_SERVER_PORT="${PORT2}" +export SPARK_PROXY_PORT="${PORT3}" +export WEBUI_URL="http://${FRAMEWORK_NAME}${DNS_SUFFIX}:${SPARK_PROXY_PORT}" + +# TODO(sur) remove, debug only +env | sort + +if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then + ln -s /var/lib/runit/service/history-server /etc/service/history-server +fi + +# start service +exec runsvdir -P /etc/service + diff --git a/docker/runit/service/history-server/log/run b/docker/runit/service/history-server/log/run new file mode 100755 index 0000000000000..516fee519dbdf --- /dev/null +++ b/docker/runit/service/history-server/log/run @@ -0,0 +1,7 @@ +#!/bin/sh +set -e +set -x + +mkdir -p "${MESOS_SANDBOX}"/history-server +exec svlogd "${MESOS_SANDBOX}"/history-server + diff --git a/docker/runit/service/history-server/run b/docker/runit/service/history-server/run new file mode 100755 index 0000000000000..bf779f24456a2 --- /dev/null +++ b/docker/runit/service/history-server/run @@ -0,0 +1,16 @@ +#!/bin/sh +set -e +set -x + +exec 2>&1 + +export APPLICATION_WEB_PROXY_BASE="/service/spark/history" + +# TODO(sur) remove, debug only +env | sort + +export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=${HISTORY_SERVER_PORT} -Dspark.history.fs.logDirectory=${HISTORY_LOG_DIR}" + +cd /opt/spark/dist +exec /opt/spark/dist/bin/spark-class org.apache.spark.deploy.history.HistoryServer + diff --git a/docker/runit/service/nginx/log/run b/docker/runit/service/nginx/log/run new file mode 100755 index 0000000000000..ddd89e051b93b --- /dev/null +++ b/docker/runit/service/nginx/log/run @@ -0,0 +1,7 @@ +#!/bin/sh +set -e +set -x + +mkdir -p "${MESOS_SANDBOX}"/nginx +exec svlogd "${MESOS_SANDBOX}"/nginx + diff --git a/docker/runit/service/nginx/run b/docker/runit/service/nginx/run new file mode 100755 index 0000000000000..863bc213d45d4 --- /dev/null +++ b/docker/runit/service/nginx/run @@ -0,0 +1,15 @@ +#!/bin/sh +set -e +set -x + +exec 2>&1 +vars="s,,${SPARK_PROXY_PORT},;s,,http://${HOST}:${DISPATCHER_PORT},;s,,http://${HOST}:${DISPATCHER_UI_PORT},;s,,http://${HOST}:${HISTORY_SERVER_PORT}," + +sed "${vars}" /etc/nginx/conf.d/spark.conf.template >/etc/nginx/conf.d/spark.conf + +# TODO(sur) remove, debug only +cat /etc/nginx/conf.d/spark.conf +env | sort + +exec /usr/sbin/nginx -c /etc/nginx/nginx.conf + diff --git a/docker/runit/service/spark/log/run b/docker/runit/service/spark/log/run new file mode 100755 index 0000000000000..98d11d0018391 --- /dev/null +++ b/docker/runit/service/spark/log/run @@ -0,0 +1,7 @@ +#!/bin/sh +set -e +set -x + +mkdir -p "${MESOS_SANDBOX}"/spark +exec svlogd "${MESOS_SANDBOX}"/spark + diff --git a/docker/runit/service/spark/run b/docker/runit/service/spark/run new file mode 100755 index 0000000000000..796d0958868d2 --- /dev/null +++ b/docker/runit/service/spark/run @@ -0,0 +1,22 @@ +#!/bin/sh +set -e +set -x + +exec 2>&1 + +export APPLICATION_WEB_PROXY_BASE="/service/spark/ui" + +# TODO(sur) remove, debug only +env | sort + +cd /opt/spark/dist +exec /opt/spark/dist/bin/spark-class \ + org.apache.spark.deploy.mesos.MesosClusterDispatcher \ + --port "${DISPATCHER_PORT}" \ + --webui-port "${DISPATCHER_UI_PORT}" \ + --master "${MESOS_MASTER}" \ + --zk "${ZK}" \ + --host "${HOST}" \ + --name "${FRAMEWORK_NAME}" \ + --webui-url "${WEBUI_URL}" + diff --git a/package/config.json b/package/config.json index e51dce991ca55..7194ea21e1b6a 100644 --- a/package/config.json +++ b/package/config.json @@ -19,6 +19,22 @@ "description": "Spark Framework Configuration Properties", "type": "object", "properties": { + "enable-history-server": { + "description": "Set to true to enable the Spark history server.", + "type": "boolean", + "default": false + }, + "history-log-dir": { + "description": "Base directory in which Spark events are logged if enable-history-server is set to true. Users may want to set this to a unified location like an HDFS directory so history files can be read by the history server. Note that this directory must exist prior to installing the Spark package.", + "type": "string", + "default": "hdfs://hdfs/history" + }, + "dns-suffix": { + "default": ".marathon.mesos", + "description": "This value is appended to the framework-name value to form the canonical DNS name for the Spark components.", + "type": "string", + "pattern": "^(?:\\.[a-z][a-z0-9]*?(?:-[a-z0-9]+)*)+$" + }, "framework-name": { "description": "The name of the framework. Until this is configurable, please do not change this from it's default value.", "type": "string", diff --git a/package/marathon.json.mustache b/package/marathon.json.mustache index 262aa264366d0..cb755540428d2 100644 --- a/package/marathon.json.mustache +++ b/package/marathon.json.mustache @@ -3,12 +3,19 @@ "cpus": {{spark.cpus}}, "mem": {{spark.mem}}, "instances": {{spark.instances}}, - "cmd": "mv ./conf/log4j-dispatcher.properties ./conf/log4j.properties && ./bin/spark-class org.apache.spark.deploy.mesos.MesosClusterDispatcher --port $PORT0 --webui-port $PORT1 --master {{mesos.master}} --zk {{spark.zookeeper}} --host {{spark.host}} --name {{spark.framework-name}}", + "cmd": "/sbin/init.sh", "env": { - "APPLICATION_WEB_PROXY_BASE": "/service/spark", - "SPARK_USER": "{{spark.mesos-user}}" + "SPARK_USER": "{{spark.mesos-user}}", + "MESOS_MASTER": "{{mesos.master}}", + "ZK": "{{spark.zookeeper}}", + "FRAMEWORK_NAME": "{{spark.framework-name}}", + "DNS_SUFFIX": "{{spark.dns-suffix}}", + "ENABLE_HISTORY_SERVER": "{{spark.enable-history-server}}", + "HISTORY_LOG_DIR": "{{spark.history-log-dir}}" }, "ports": [ + 0, + 0, 0, 0 ], From aade331f143167ac54bff1de76e0190a8504e00b Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 29 Feb 2016 12:22:30 +0100 Subject: [PATCH 02/14] cluster dispatcher: remove webui-url, use properties-file --- conf/mesos-cluster-dispatcher.properties.template | 2 ++ docker/runit/service/spark/run | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 conf/mesos-cluster-dispatcher.properties.template diff --git a/conf/mesos-cluster-dispatcher.properties.template b/conf/mesos-cluster-dispatcher.properties.template new file mode 100644 index 0000000000000..e9197082c6ffd --- /dev/null +++ b/conf/mesos-cluster-dispatcher.properties.template @@ -0,0 +1,2 @@ +spark.mesos.dispatcher.webui.url= + diff --git a/docker/runit/service/spark/run b/docker/runit/service/spark/run index 796d0958868d2..d59e2e2db6a62 100755 --- a/docker/runit/service/spark/run +++ b/docker/runit/service/spark/run @@ -10,6 +10,9 @@ export APPLICATION_WEB_PROXY_BASE="/service/spark/ui" env | sort cd /opt/spark/dist + +sed "s,,${WEBUI_URL}," conf/mesos-cluster-dispatcher.properties.template >conf/mesos-cluster-dispatcher.properties + exec /opt/spark/dist/bin/spark-class \ org.apache.spark.deploy.mesos.MesosClusterDispatcher \ --port "${DISPATCHER_PORT}" \ @@ -18,5 +21,5 @@ exec /opt/spark/dist/bin/spark-class \ --zk "${ZK}" \ --host "${HOST}" \ --name "${FRAMEWORK_NAME}" \ - --webui-url "${WEBUI_URL}" + --properties-file "conf/mesos-cluster-dispatcher.properties" From a36912effe27a9d13ba6c2afd2b2d579a05134cd Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 29 Feb 2016 15:56:21 +0100 Subject: [PATCH 03/14] remove debug entries --- docker/runit/init.sh | 3 --- docker/runit/service/history-server/run | 4 ---- docker/runit/service/nginx/run | 5 ----- docker/runit/service/spark/run | 3 --- 4 files changed, 15 deletions(-) diff --git a/docker/runit/init.sh b/docker/runit/init.sh index 1cea13031f7fa..e1996f832e3df 100755 --- a/docker/runit/init.sh +++ b/docker/runit/init.sh @@ -8,9 +8,6 @@ export HISTORY_SERVER_PORT="${PORT2}" export SPARK_PROXY_PORT="${PORT3}" export WEBUI_URL="http://${FRAMEWORK_NAME}${DNS_SUFFIX}:${SPARK_PROXY_PORT}" -# TODO(sur) remove, debug only -env | sort - if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then ln -s /var/lib/runit/service/history-server /etc/service/history-server fi diff --git a/docker/runit/service/history-server/run b/docker/runit/service/history-server/run index bf779f24456a2..207966b9dee5e 100755 --- a/docker/runit/service/history-server/run +++ b/docker/runit/service/history-server/run @@ -5,10 +5,6 @@ set -x exec 2>&1 export APPLICATION_WEB_PROXY_BASE="/service/spark/history" - -# TODO(sur) remove, debug only -env | sort - export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=${HISTORY_SERVER_PORT} -Dspark.history.fs.logDirectory=${HISTORY_LOG_DIR}" cd /opt/spark/dist diff --git a/docker/runit/service/nginx/run b/docker/runit/service/nginx/run index 863bc213d45d4..676d65adfbf1f 100755 --- a/docker/runit/service/nginx/run +++ b/docker/runit/service/nginx/run @@ -4,12 +4,7 @@ set -x exec 2>&1 vars="s,,${SPARK_PROXY_PORT},;s,,http://${HOST}:${DISPATCHER_PORT},;s,,http://${HOST}:${DISPATCHER_UI_PORT},;s,,http://${HOST}:${HISTORY_SERVER_PORT}," - sed "${vars}" /etc/nginx/conf.d/spark.conf.template >/etc/nginx/conf.d/spark.conf -# TODO(sur) remove, debug only -cat /etc/nginx/conf.d/spark.conf -env | sort - exec /usr/sbin/nginx -c /etc/nginx/nginx.conf diff --git a/docker/runit/service/spark/run b/docker/runit/service/spark/run index d59e2e2db6a62..1dacb00741729 100755 --- a/docker/runit/service/spark/run +++ b/docker/runit/service/spark/run @@ -6,9 +6,6 @@ exec 2>&1 export APPLICATION_WEB_PROXY_BASE="/service/spark/ui" -# TODO(sur) remove, debug only -env | sort - cd /opt/spark/dist sed "s,,${WEBUI_URL}," conf/mesos-cluster-dispatcher.properties.template >conf/mesos-cluster-dispatcher.properties From 56085f14643c3e19f7f105018fe8fc42cdf6805d Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Thu, 3 Mar 2016 14:11:18 +0100 Subject: [PATCH 04/14] configure history server url --- conf/mesos-cluster-dispatcher.properties.template | 1 + docker/runit/service/spark/run | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/conf/mesos-cluster-dispatcher.properties.template b/conf/mesos-cluster-dispatcher.properties.template index e9197082c6ffd..4ff2dcf0a2b34 100644 --- a/conf/mesos-cluster-dispatcher.properties.template +++ b/conf/mesos-cluster-dispatcher.properties.template @@ -1,2 +1,3 @@ spark.mesos.dispatcher.webui.url= +spark.mesos.historyServer.url= diff --git a/docker/runit/service/spark/run b/docker/runit/service/spark/run index 1dacb00741729..aede094ab9725 100755 --- a/docker/runit/service/spark/run +++ b/docker/runit/service/spark/run @@ -8,7 +8,8 @@ export APPLICATION_WEB_PROXY_BASE="/service/spark/ui" cd /opt/spark/dist -sed "s,,${WEBUI_URL}," conf/mesos-cluster-dispatcher.properties.template >conf/mesos-cluster-dispatcher.properties +sed "s,,${WEBUI_URL},;s,,/service/spark/history," \ + conf/mesos-cluster-dispatcher.properties.template >conf/mesos-cluster-dispatcher.properties exec /opt/spark/dist/bin/spark-class \ org.apache.spark.deploy.mesos.MesosClusterDispatcher \ From 179c2dbf3d1fec4d12590de2bf4919ff52c3ab98 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 7 Mar 2016 12:43:37 +0100 Subject: [PATCH 05/14] history server: configure cleaner --- docker/runit/service/history-server/run | 2 +- package/config.json | 15 +++++++++++++++ package/marathon.json.mustache | 5 ++++- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/docker/runit/service/history-server/run b/docker/runit/service/history-server/run index 207966b9dee5e..9075ffbd31026 100755 --- a/docker/runit/service/history-server/run +++ b/docker/runit/service/history-server/run @@ -5,7 +5,7 @@ set -x exec 2>&1 export APPLICATION_WEB_PROXY_BASE="/service/spark/history" -export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=${HISTORY_SERVER_PORT} -Dspark.history.fs.logDirectory=${HISTORY_LOG_DIR}" +export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=${HISTORY_SERVER_PORT} -Dspark.history.fs.logDirectory=${HISTORY_LOG_DIR} -Dspark.history.fs.cleaner.enabled=${HISTORY_CLEANER_ENABLED} -Dspark.history.fs.cleaner.interval=${HISTORY_CLEANER_INTERVAL} -Dspark.history.fs.cleaner.maxAge=${HISTORY_CLEANER_MAXAGE}" cd /opt/spark/dist exec /opt/spark/dist/bin/spark-class org.apache.spark.deploy.history.HistoryServer diff --git a/package/config.json b/package/config.json index 7194ea21e1b6a..44cf8cada6e98 100644 --- a/package/config.json +++ b/package/config.json @@ -29,6 +29,21 @@ "type": "string", "default": "hdfs://hdfs/history" }, + "history-cleaner-enabled": { + "description": "Specifies whether the History Server should periodically clean up event logs from storage.", + "type": "boolean", + "default": false + }, + "history-cleaner-interval": { + "default": "1d", + "description": "How often the job history cleaner checks for files to delete. Files are only deleted if they are older than history-cleaner-maxage.", + "type": "string" + }, + "history-cleaner-maxage": { + "default": "7d", + "description": "Job history files older than this will be deleted when the history cleaner runs.", + "type": "string" + }, "dns-suffix": { "default": ".marathon.mesos", "description": "This value is appended to the framework-name value to form the canonical DNS name for the Spark components.", diff --git a/package/marathon.json.mustache b/package/marathon.json.mustache index cb755540428d2..229bda8044f02 100644 --- a/package/marathon.json.mustache +++ b/package/marathon.json.mustache @@ -11,7 +11,10 @@ "FRAMEWORK_NAME": "{{spark.framework-name}}", "DNS_SUFFIX": "{{spark.dns-suffix}}", "ENABLE_HISTORY_SERVER": "{{spark.enable-history-server}}", - "HISTORY_LOG_DIR": "{{spark.history-log-dir}}" + "HISTORY_LOG_DIR": "{{spark.history-log-dir}}", + "HISTORY_CLEANER_ENABLED": "{{spark.history-cleaner-enabled}}", + "HISTORY_CLEANER_INTERVAL": "{{spark.history-cleaner-interval}}", + "HISTORY_CLEANER_MAXAGE": "{{spark.history-cleaner-maxage}}" }, "ports": [ 0, From 919bdbef00986cdf4167d6f7a7efcd3fe245b66b Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 14 Mar 2016 10:57:40 +0100 Subject: [PATCH 06/14] dispatcher/history: use FRAMEWORK_NAME in web proxy base settings --- docker/runit/init.sh | 2 ++ docker/runit/service/history-server/run | 2 +- docker/runit/service/spark/run | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/runit/init.sh b/docker/runit/init.sh index e1996f832e3df..64312bf017e88 100755 --- a/docker/runit/init.sh +++ b/docker/runit/init.sh @@ -7,6 +7,8 @@ export DISPATCHER_UI_PORT="${PORT1}" export HISTORY_SERVER_PORT="${PORT2}" export SPARK_PROXY_PORT="${PORT3}" export WEBUI_URL="http://${FRAMEWORK_NAME}${DNS_SUFFIX}:${SPARK_PROXY_PORT}" +export HISTORY_SERVER_WEB_PROXY_BASE="/service/${FRAMEWORK_NAME}/history" +export DISPATCHER_UI_WEB_PROXY_BASE="/service/${FRAMEWORK_NAME}/ui" if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then ln -s /var/lib/runit/service/history-server /etc/service/history-server diff --git a/docker/runit/service/history-server/run b/docker/runit/service/history-server/run index 9075ffbd31026..b9a5d80bbd0a7 100755 --- a/docker/runit/service/history-server/run +++ b/docker/runit/service/history-server/run @@ -4,7 +4,7 @@ set -x exec 2>&1 -export APPLICATION_WEB_PROXY_BASE="/service/spark/history" +export APPLICATION_WEB_PROXY_BASE="${HISTORY_SERVER_WEB_PROXY_BASE}" export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=${HISTORY_SERVER_PORT} -Dspark.history.fs.logDirectory=${HISTORY_LOG_DIR} -Dspark.history.fs.cleaner.enabled=${HISTORY_CLEANER_ENABLED} -Dspark.history.fs.cleaner.interval=${HISTORY_CLEANER_INTERVAL} -Dspark.history.fs.cleaner.maxAge=${HISTORY_CLEANER_MAXAGE}" cd /opt/spark/dist diff --git a/docker/runit/service/spark/run b/docker/runit/service/spark/run index aede094ab9725..0348db22c3362 100755 --- a/docker/runit/service/spark/run +++ b/docker/runit/service/spark/run @@ -4,11 +4,11 @@ set -x exec 2>&1 -export APPLICATION_WEB_PROXY_BASE="/service/spark/ui" +export APPLICATION_WEB_PROXY_BASE="${DISPATCHER_UI_WEB_PROXY_BASE}" cd /opt/spark/dist -sed "s,,${WEBUI_URL},;s,,/service/spark/history," \ +sed "s,,${WEBUI_URL},;s,,${HISTORY_SERVER_WEB_PROXY_BASE}," \ conf/mesos-cluster-dispatcher.properties.template >conf/mesos-cluster-dispatcher.properties exec /opt/spark/dist/bin/spark-class \ From 763987dff8f02300c8a157ee65f588e38355f0db Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 14 Mar 2016 11:05:28 +0100 Subject: [PATCH 07/14] remove newlines --- conf/log4j.properties | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/conf/log4j.properties b/conf/log4j.properties index 0ab7a4a5ad06f..af023283100f6 100644 --- a/conf/log4j.properties +++ b/conf/log4j.properties @@ -9,5 +9,4 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: log4j.logger.org.eclipse.jetty=WARN log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO -log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO - +log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO \ No newline at end of file From 7217a92ae03f3eb3b785d56acb56b4de25ebe670 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 14 Mar 2016 11:53:06 +0100 Subject: [PATCH 08/14] history-server: separate configuration namespace --- package/config.json | 54 +++++++++++++++++++--------------- package/marathon.json.mustache | 10 +++---- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/package/config.json b/package/config.json index 44cf8cada6e98..7f83ba96f1298 100644 --- a/package/config.json +++ b/package/config.json @@ -19,30 +19,36 @@ "description": "Spark Framework Configuration Properties", "type": "object", "properties": { - "enable-history-server": { - "description": "Set to true to enable the Spark history server.", - "type": "boolean", - "default": false - }, - "history-log-dir": { - "description": "Base directory in which Spark events are logged if enable-history-server is set to true. Users may want to set this to a unified location like an HDFS directory so history files can be read by the history server. Note that this directory must exist prior to installing the Spark package.", - "type": "string", - "default": "hdfs://hdfs/history" - }, - "history-cleaner-enabled": { - "description": "Specifies whether the History Server should periodically clean up event logs from storage.", - "type": "boolean", - "default": false - }, - "history-cleaner-interval": { - "default": "1d", - "description": "How often the job history cleaner checks for files to delete. Files are only deleted if they are older than history-cleaner-maxage.", - "type": "string" - }, - "history-cleaner-maxage": { - "default": "7d", - "description": "Job history files older than this will be deleted when the history cleaner runs.", - "type": "string" + "history-server": { + "description": "Spark-History Server configuration properties", + "type": "object", + "properties": { + "enabled": { + "description": "Set to true to enable the Spark history server.", + "type": "boolean", + "default": false + }, + "log-dir": { + "description": "Base directory in which Spark events are logged if spark.history-server.enabled is set to true. Users may want to set this to a unified location like an HDFS directory so history files can be read by the history server. Note that this directory must exist prior to installing the Spark package.", + "type": "string", + "default": "hdfs://hdfs/history" + }, + "cleaner-enabled": { + "description": "Specifies whether the History Server should periodically clean up event logs from storage.", + "type": "boolean", + "default": false + }, + "cleaner-interval": { + "default": "1d", + "description": "How often the job history cleaner checks for files to delete. Files are only deleted if they are older than spark.history-server.cleaner-maxage.", + "type": "string" + }, + "cleaner-maxage": { + "default": "7d", + "description": "Job history files older than this will be deleted when the history cleaner runs.", + "type": "string" + } + } }, "dns-suffix": { "default": ".marathon.mesos", diff --git a/package/marathon.json.mustache b/package/marathon.json.mustache index 229bda8044f02..c1bef81c83b9d 100644 --- a/package/marathon.json.mustache +++ b/package/marathon.json.mustache @@ -10,11 +10,11 @@ "ZK": "{{spark.zookeeper}}", "FRAMEWORK_NAME": "{{spark.framework-name}}", "DNS_SUFFIX": "{{spark.dns-suffix}}", - "ENABLE_HISTORY_SERVER": "{{spark.enable-history-server}}", - "HISTORY_LOG_DIR": "{{spark.history-log-dir}}", - "HISTORY_CLEANER_ENABLED": "{{spark.history-cleaner-enabled}}", - "HISTORY_CLEANER_INTERVAL": "{{spark.history-cleaner-interval}}", - "HISTORY_CLEANER_MAXAGE": "{{spark.history-cleaner-maxage}}" + "ENABLE_HISTORY_SERVER": "{{spark.history-server.enabled}}", + "HISTORY_LOG_DIR": "{{spark.history-server.log-dir}}", + "HISTORY_CLEANER_ENABLED": "{{spark.history-server.cleaner-enabled}}", + "HISTORY_CLEANER_INTERVAL": "{{spark.history-server.cleaner-interval}}", + "HISTORY_CLEANER_MAXAGE": "{{spark.history-server.cleaner-maxage}}" }, "ports": [ 0, From 97fa75cccb7d9334c69082a455c4689c319ee8a4 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 14 Mar 2016 11:56:26 +0100 Subject: [PATCH 09/14] dispatcher: configure /dispatcher as root URL --- docker/html/index.html | 2 +- docker/nginx/conf.d/spark.conf.template | 4 ++-- docker/runit/init.sh | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/html/index.html b/docker/html/index.html index 47443d83ef424..cad9865d5a14a 100644 --- a/docker/html/index.html +++ b/docker/html/index.html @@ -5,7 +5,7 @@ - Spark Dispatcher UI + Spark Dispatcher UI
Spark History Server diff --git a/docker/nginx/conf.d/spark.conf.template b/docker/nginx/conf.d/spark.conf.template index 3e592f3725029..4616713d80a56 100644 --- a/docker/nginx/conf.d/spark.conf.template +++ b/docker/nginx/conf.d/spark.conf.template @@ -21,8 +21,8 @@ server { proxy_read_timeout 600s; } - location /ui/ { - proxy_redirect $scheme://$host $scheme://$host/ui; + location /dispatcher/ { + proxy_redirect $scheme://$host $scheme://$host/dispatcher; proxy_pass /; proxy_set_header Host $host; } diff --git a/docker/runit/init.sh b/docker/runit/init.sh index 64312bf017e88..4eb944f7e1d52 100755 --- a/docker/runit/init.sh +++ b/docker/runit/init.sh @@ -8,7 +8,7 @@ export HISTORY_SERVER_PORT="${PORT2}" export SPARK_PROXY_PORT="${PORT3}" export WEBUI_URL="http://${FRAMEWORK_NAME}${DNS_SUFFIX}:${SPARK_PROXY_PORT}" export HISTORY_SERVER_WEB_PROXY_BASE="/service/${FRAMEWORK_NAME}/history" -export DISPATCHER_UI_WEB_PROXY_BASE="/service/${FRAMEWORK_NAME}/ui" +export DISPATCHER_UI_WEB_PROXY_BASE="/service/${FRAMEWORK_NAME}/dispatcher" if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then ln -s /var/lib/runit/service/history-server /etc/service/history-server From 333653522594813117a02f4029268d59f4aef612 Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 14 Mar 2016 12:01:29 +0100 Subject: [PATCH 10/14] configure DCOS_SERVICE --- package/marathon.json.mustache | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package/marathon.json.mustache b/package/marathon.json.mustache index c1bef81c83b9d..dcb1f6471b8de 100644 --- a/package/marathon.json.mustache +++ b/package/marathon.json.mustache @@ -42,6 +42,7 @@ ], "labels": { {{#hdfs.config-url}}"SPARK_HDFS_CONFIG_URL": "{{hdfs.config-url}}",{{/hdfs.config-url}} - "SPARK_URI": "{{spark.uri}}" + "SPARK_URI": "{{spark.uri}}", + "DCOS_SERVICE": "http:spark:4" } } From 76e3cf925c5990dd4a32a1e84ac0af7f5f2016ac Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 14 Mar 2016 17:33:19 +0100 Subject: [PATCH 11/14] use spark.framework-name for DCOS_SERVICE --- package/marathon.json.mustache | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package/marathon.json.mustache b/package/marathon.json.mustache index dcb1f6471b8de..fc4fe30e4b683 100644 --- a/package/marathon.json.mustache +++ b/package/marathon.json.mustache @@ -43,6 +43,6 @@ "labels": { {{#hdfs.config-url}}"SPARK_HDFS_CONFIG_URL": "{{hdfs.config-url}}",{{/hdfs.config-url}} "SPARK_URI": "{{spark.uri}}", - "DCOS_SERVICE": "http:spark:4" + "DCOS_SERVICE": "http:{{spark.framework-name}}:4" } } From 538afac895f4f479aaecdd3ef70131b5431f0faf Mon Sep 17 00:00:00 2001 From: Sergiusz Urbaniak Date: Mon, 14 Mar 2016 17:39:09 +0100 Subject: [PATCH 12/14] better history server config --- conf/mesos-cluster-dispatcher.properties.template | 2 +- docker/runit/service/spark/run | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/conf/mesos-cluster-dispatcher.properties.template b/conf/mesos-cluster-dispatcher.properties.template index 4ff2dcf0a2b34..73af653b23e0c 100644 --- a/conf/mesos-cluster-dispatcher.properties.template +++ b/conf/mesos-cluster-dispatcher.properties.template @@ -1,3 +1,3 @@ spark.mesos.dispatcher.webui.url= -spark.mesos.historyServer.url= + diff --git a/docker/runit/service/spark/run b/docker/runit/service/spark/run index 0348db22c3362..2c4f1497ca744 100755 --- a/docker/runit/service/spark/run +++ b/docker/runit/service/spark/run @@ -8,7 +8,12 @@ export APPLICATION_WEB_PROXY_BASE="${DISPATCHER_UI_WEB_PROXY_BASE}" cd /opt/spark/dist -sed "s,,${WEBUI_URL},;s,,${HISTORY_SERVER_WEB_PROXY_BASE}," \ +HISTORY_SERVER_CONF="" +if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then + HISTORY_SERVER_CONF="spark.mesos.historyServer.url=${HISTORY_SERVER_WEB_PROXY_BASE}" +fi + +sed "s,,${WEBUI_URL},;s,,${HISTORY_SERVER_CONF}," \ conf/mesos-cluster-dispatcher.properties.template >conf/mesos-cluster-dispatcher.properties exec /opt/spark/dist/bin/spark-class \ From 903abaa119414e33d6024356f1ba2d216d0bb9e5 Mon Sep 17 00:00:00 2001 From: Timothy Chen Date: Mon, 29 Feb 2016 16:39:28 -0800 Subject: [PATCH 13/14] Set zookeeper deploy dir to allow multiple spark dispatchers. --- bin/make-package.py | 2 ++ conf/mesos-cluster-dispatcher.properties.template | 2 -- docker/runit/service/spark/run | 11 +++++++++-- package/config.json | 9 +++++++++ package/marathon.json.mustache | 2 ++ 5 files changed, 22 insertions(+), 4 deletions(-) diff --git a/bin/make-package.py b/bin/make-package.py index 648e8fc85ccae..10a2a5c481333 100755 --- a/bin/make-package.py +++ b/bin/make-package.py @@ -29,6 +29,8 @@ def main(): output_file.write(template) + print("Package built successfully.") + if __name__ == '__main__': main() diff --git a/conf/mesos-cluster-dispatcher.properties.template b/conf/mesos-cluster-dispatcher.properties.template index 73af653b23e0c..27c4da90b215c 100644 --- a/conf/mesos-cluster-dispatcher.properties.template +++ b/conf/mesos-cluster-dispatcher.properties.template @@ -1,3 +1 @@ -spark.mesos.dispatcher.webui.url= - diff --git a/docker/runit/service/spark/run b/docker/runit/service/spark/run index 2c4f1497ca744..a92dbd7fcd020 100755 --- a/docker/runit/service/spark/run +++ b/docker/runit/service/spark/run @@ -8,12 +8,20 @@ export APPLICATION_WEB_PROXY_BASE="${DISPATCHER_UI_WEB_PROXY_BASE}" cd /opt/spark/dist +if [ "$FRAMEWORK_NAME" != "spark" ]; then + export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.deploy.zookeeper.dir=/spark_mesos_dispatcher_$FRAMEWORK_NAME" +fi + +if [ "$SPARK_DISPATCHER_MESOS_ROLE" != "" ]; then + export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.mesos.role=$SPARK_DISPATCHER_MESOS_ROLE" +fi + HISTORY_SERVER_CONF="" if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then HISTORY_SERVER_CONF="spark.mesos.historyServer.url=${HISTORY_SERVER_WEB_PROXY_BASE}" fi -sed "s,,${WEBUI_URL},;s,,${HISTORY_SERVER_CONF}," \ +sed "s,,${HISTORY_SERVER_CONF}," \ conf/mesos-cluster-dispatcher.properties.template >conf/mesos-cluster-dispatcher.properties exec /opt/spark/dist/bin/spark-class \ @@ -25,4 +33,3 @@ exec /opt/spark/dist/bin/spark-class \ --host "${HOST}" \ --name "${FRAMEWORK_NAME}" \ --properties-file "conf/mesos-cluster-dispatcher.properties" - diff --git a/package/config.json b/package/config.json index 7f83ba96f1298..3ea7a8a6fb438 100644 --- a/package/config.json +++ b/package/config.json @@ -89,6 +89,15 @@ "type": "string", "default": "root" }, + "mesos-role": { + "description": "Mesos role the Dispatcher will be registered with.", + "type": "string", + "default": "*" + }, + "dispatcher-java-opts": { + "description": "Java options set when starting the Spark dispatcher.", + "type": "string" + }, "zookeeper": { "type": "string", "description": "URL to the Zookeeper that Spark cluster framework connects to persist state to. (l.e: zk://0.0.0.0:2181)", diff --git a/package/marathon.json.mustache b/package/marathon.json.mustache index fc4fe30e4b683..f4e7fbb70989d 100644 --- a/package/marathon.json.mustache +++ b/package/marathon.json.mustache @@ -10,6 +10,8 @@ "ZK": "{{spark.zookeeper}}", "FRAMEWORK_NAME": "{{spark.framework-name}}", "DNS_SUFFIX": "{{spark.dns-suffix}}", + "SPARK_DISPATCHER_MESOS_ROLE": "{{spark.mesos-role}}", + "SPARK_DAEMON_JAVA_OPTS": "{{spark.dispatcher-java-opts}}", "ENABLE_HISTORY_SERVER": "{{spark.history-server.enabled}}", "HISTORY_LOG_DIR": "{{spark.history-server.log-dir}}", "HISTORY_CLEANER_ENABLED": "{{spark.history-server.cleaner-enabled}}", From f804bb1154154eedf60e3e34fbee6059e8e6541f Mon Sep 17 00:00:00 2001 From: Timothy Chen Date: Mon, 14 Mar 2016 13:49:54 -0700 Subject: [PATCH 14/14] Only run zip in docker if it's not available. --- bin/make-universe.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/bin/make-universe.sh b/bin/make-universe.sh index 938f58c062be9..45116522d08fd 100755 --- a/bin/make-universe.sh +++ b/bin/make-universe.sh @@ -24,5 +24,9 @@ pushd build/spark-universe ./scripts/build.sh popd -# TODO: remove the docker wrapper once `zip` is available on TC -docker run -v $(pwd)/build/:/build/ ubuntu:latest sh -c "apt-get install -y zip && cd /build/ && zip -r spark-universe.zip spark-universe" +if [ -x "$(command -v zip)" ]; then + zip -r build/spark-universe.zip build/spark-universe +else + # TODO: remove the docker wrapper once `zip` is available on TC + docker run -v $(pwd)/build/:/build/ ubuntu:latest sh -c "apt-get install -y zip && cd /build/ && zip -r spark-universe.zip spark-universe" +fi