Skip to content

Commit

Permalink
Merge pull request apache#10 from mesosphere/multiple_spark
Browse files Browse the repository at this point in the history
[SPARK-110] Support multiple spark dispatchers
  • Loading branch information
tnachen committed Mar 14, 2016
2 parents 4064999 + f804bb1 commit b3c6281
Show file tree
Hide file tree
Showing 21 changed files with 262 additions and 14 deletions.
2 changes: 1 addition & 1 deletion bin/make-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# ./bin/make-docker.sh <spark-dist-dir> <image>

rm -rf build/docker
mkdir -p build/docker
mkdir -p build/docker/dist
cp -r "$1/." build/docker/dist
cp -r conf/* build/docker/dist/conf
cp -r docker/* build/docker
Expand Down
2 changes: 2 additions & 0 deletions bin/make-package.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ def main():

output_file.write(template)

print("Package built successfully.")


if __name__ == '__main__':
main()
8 changes: 6 additions & 2 deletions bin/make-universe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,9 @@ pushd build/spark-universe
./scripts/build.sh
popd

# TODO: remove the docker wrapper once `zip` is available on TC
docker run -v $(pwd)/build/:/build/ ubuntu:latest sh -c "apt-get install -y zip && cd /build/ && zip -r spark-universe.zip spark-universe"
if [ -x "$(command -v zip)" ]; then
zip -r build/spark-universe.zip build/spark-universe
else
# TODO: remove the docker wrapper once `zip` is available on TC
docker run -v $(pwd)/build/:/build/ ubuntu:latest sh -c "apt-get install -y zip && cd /build/ && zip -r spark-universe.zip spark-universe"
fi
4 changes: 2 additions & 2 deletions conf/log4j-dispatcher.properties → conf/log4j.properties
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Set everything to be logged to the console
log4j.rootCategory=TRACE, console
log4j.rootCategory=INFO, console
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
Expand All @@ -9,4 +9,4 @@ log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}:
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
1 change: 1 addition & 0 deletions conf/mesos-cluster-dispatcher.properties.template
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<HISTORY_SERVER_CONF>
15 changes: 10 additions & 5 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ ENV DEBCONF_NONINTERACTIVE_SEEN "true"

# Upgrade package index and install basic commands.
RUN apt-get update && \
apt-get install -y software-properties-common
apt-get install -y software-properties-common runit nginx
RUN add-apt-repository ppa:openjdk-r/ppa
RUN apt-get update && \
apt-get install -y openjdk-8-jdk curl
Expand All @@ -39,10 +39,11 @@ ENV HADOOP_CONF_DIR /etc/hadoop
RUN mkdir /etc/hadoop

ADD dist /opt/spark/dist

ADD hdfs-site.xml /etc/hadoop/hdfs-site.xml
ADD core-site.xml /etc/hadoop/core-site.xml
ADD mesos-site.xml /etc/hadoop/mesos-site.xml
ADD hadoop/* /etc/hadoop/
ADD runit/service /var/lib/runit/service
ADD runit/init.sh /sbin/init.sh
ADD nginx /etc/nginx
ADD html /usr/html

#RUN ln -sf /usr/lib/libmesos.so /usr/lib/libmesos-0.23.1.so

Expand All @@ -53,4 +54,8 @@ RUN ln -s /bin/bash /usr/bin/bash
RUN ln -s /bin/ls /usr/bin/ls
RUN ln -s /bin/grep /usr/bin/grep

RUN ln -s /var/lib/runit/service/spark /etc/service/spark
RUN ln -s /var/lib/runit/service/nginx /etc/service/nginx

WORKDIR /opt/spark/dist

File renamed without changes.
File renamed without changes.
File renamed without changes.
14 changes: 14 additions & 0 deletions docker/html/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
</head>

<body>
<a href="dispatcher">Spark Dispatcher UI</a>
<br/>
<a href="history">Spark History Server</a>
</body>

</html>

36 changes: 36 additions & 0 deletions docker/nginx/conf.d/spark.conf.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
server {
listen <PORT>;
port_in_redirect off;

server_name "";

root /usr/html;
index index.html;

location /v1 {
proxy_pass <DISPATCHER_URL>;

# Don't try to be intelligent, only proxy
proxy_buffering off;
proxy_redirect off;
proxy_next_upstream off;

# Set long timeout
proxy_connect_timeout 159s;
proxy_send_timeout 600s;
proxy_read_timeout 600s;
}

location /dispatcher/ {
proxy_redirect $scheme://$host $scheme://$host/dispatcher;
proxy_pass <DISPATCHER_UI_URL>/;
proxy_set_header Host $host;
}

location /history/ {
proxy_redirect $scheme://$host $scheme://$host/history;
proxy_pass <HISTORY_SERVER_URL>/;
proxy_set_header Host $host;
}
}

30 changes: 30 additions & 0 deletions docker/nginx/nginx.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
worker_processes 1;
daemon off;

events {
worker_connections 1024;
}

error_log /dev/stdout warn;
pid /var/run/nginx.pid;

http {
include /etc/nginx/mime.types;
default_type application/octet-stream;

log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';

access_log /dev/stdout main;

sendfile on;
#tcp_nopush on;

keepalive_timeout 65;

gzip on;

include /etc/nginx/conf.d/*.conf;
}

19 changes: 19 additions & 0 deletions docker/runit/init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/sh
set -e
set -x

export DISPATCHER_PORT="${PORT0}"
export DISPATCHER_UI_PORT="${PORT1}"
export HISTORY_SERVER_PORT="${PORT2}"
export SPARK_PROXY_PORT="${PORT3}"
export WEBUI_URL="http://${FRAMEWORK_NAME}${DNS_SUFFIX}:${SPARK_PROXY_PORT}"
export HISTORY_SERVER_WEB_PROXY_BASE="/service/${FRAMEWORK_NAME}/history"
export DISPATCHER_UI_WEB_PROXY_BASE="/service/${FRAMEWORK_NAME}/dispatcher"

if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then
ln -s /var/lib/runit/service/history-server /etc/service/history-server
fi

# start service
exec runsvdir -P /etc/service

7 changes: 7 additions & 0 deletions docker/runit/service/history-server/log/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh
set -e
set -x

mkdir -p "${MESOS_SANDBOX}"/history-server
exec svlogd "${MESOS_SANDBOX}"/history-server

12 changes: 12 additions & 0 deletions docker/runit/service/history-server/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/sh
set -e
set -x

exec 2>&1

export APPLICATION_WEB_PROXY_BASE="${HISTORY_SERVER_WEB_PROXY_BASE}"
export SPARK_HISTORY_OPTS="-Dspark.history.ui.port=${HISTORY_SERVER_PORT} -Dspark.history.fs.logDirectory=${HISTORY_LOG_DIR} -Dspark.history.fs.cleaner.enabled=${HISTORY_CLEANER_ENABLED} -Dspark.history.fs.cleaner.interval=${HISTORY_CLEANER_INTERVAL} -Dspark.history.fs.cleaner.maxAge=${HISTORY_CLEANER_MAXAGE}"

cd /opt/spark/dist
exec /opt/spark/dist/bin/spark-class org.apache.spark.deploy.history.HistoryServer

7 changes: 7 additions & 0 deletions docker/runit/service/nginx/log/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh
set -e
set -x

mkdir -p "${MESOS_SANDBOX}"/nginx
exec svlogd "${MESOS_SANDBOX}"/nginx

10 changes: 10 additions & 0 deletions docker/runit/service/nginx/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/sh
set -e
set -x

exec 2>&1
vars="s,<PORT>,${SPARK_PROXY_PORT},;s,<DISPATCHER_URL>,http://${HOST}:${DISPATCHER_PORT},;s,<DISPATCHER_UI_URL>,http://${HOST}:${DISPATCHER_UI_PORT},;s,<HISTORY_SERVER_URL>,http://${HOST}:${HISTORY_SERVER_PORT},"
sed "${vars}" /etc/nginx/conf.d/spark.conf.template >/etc/nginx/conf.d/spark.conf

exec /usr/sbin/nginx -c /etc/nginx/nginx.conf

7 changes: 7 additions & 0 deletions docker/runit/service/spark/log/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh
set -e
set -x

mkdir -p "${MESOS_SANDBOX}"/spark
exec svlogd "${MESOS_SANDBOX}"/spark

35 changes: 35 additions & 0 deletions docker/runit/service/spark/run
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/bin/sh
set -e
set -x

exec 2>&1

export APPLICATION_WEB_PROXY_BASE="${DISPATCHER_UI_WEB_PROXY_BASE}"

cd /opt/spark/dist

if [ "$FRAMEWORK_NAME" != "spark" ]; then
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.deploy.zookeeper.dir=/spark_mesos_dispatcher_$FRAMEWORK_NAME"
fi

if [ "$SPARK_DISPATCHER_MESOS_ROLE" != "" ]; then
export SPARK_DAEMON_JAVA_OPTS="$SPARK_DAEMON_JAVA_OPTS -Dspark.mesos.role=$SPARK_DISPATCHER_MESOS_ROLE"
fi

HISTORY_SERVER_CONF=""
if [ "${ENABLE_HISTORY_SERVER:=false}" = "true" ]; then
HISTORY_SERVER_CONF="spark.mesos.historyServer.url=${HISTORY_SERVER_WEB_PROXY_BASE}"
fi

sed "s,<HISTORY_SERVER_CONF>,${HISTORY_SERVER_CONF}," \
conf/mesos-cluster-dispatcher.properties.template >conf/mesos-cluster-dispatcher.properties

exec /opt/spark/dist/bin/spark-class \
org.apache.spark.deploy.mesos.MesosClusterDispatcher \
--port "${DISPATCHER_PORT}" \
--webui-port "${DISPATCHER_UI_PORT}" \
--master "${MESOS_MASTER}" \
--zk "${ZK}" \
--host "${HOST}" \
--name "${FRAMEWORK_NAME}" \
--properties-file "conf/mesos-cluster-dispatcher.properties"
46 changes: 46 additions & 0 deletions package/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,43 @@
"description": "Spark Framework Configuration Properties",
"type": "object",
"properties": {
"history-server": {
"description": "Spark-History Server configuration properties",
"type": "object",
"properties": {
"enabled": {
"description": "Set to true to enable the Spark history server.",
"type": "boolean",
"default": false
},
"log-dir": {
"description": "Base directory in which Spark events are logged if spark.history-server.enabled is set to true. Users may want to set this to a unified location like an HDFS directory so history files can be read by the history server. Note that this directory must exist prior to installing the Spark package.",
"type": "string",
"default": "hdfs://hdfs/history"
},
"cleaner-enabled": {
"description": "Specifies whether the History Server should periodically clean up event logs from storage.",
"type": "boolean",
"default": false
},
"cleaner-interval": {
"default": "1d",
"description": "How often the job history cleaner checks for files to delete. Files are only deleted if they are older than spark.history-server.cleaner-maxage.",
"type": "string"
},
"cleaner-maxage": {
"default": "7d",
"description": "Job history files older than this will be deleted when the history cleaner runs.",
"type": "string"
}
}
},
"dns-suffix": {
"default": ".marathon.mesos",
"description": "This value is appended to the framework-name value to form the canonical DNS name for the Spark components.",
"type": "string",
"pattern": "^(?:\\.[a-z][a-z0-9]*?(?:-[a-z0-9]+)*)+$"
},
"framework-name": {
"description": "The name of the framework. Until this is configurable, please do not change this from it's default value.",
"type": "string",
Expand Down Expand Up @@ -52,6 +89,15 @@
"type": "string",
"default": "root"
},
"mesos-role": {
"description": "Mesos role the Dispatcher will be registered with.",
"type": "string",
"default": "*"
},
"dispatcher-java-opts": {
"description": "Java options set when starting the Spark dispatcher.",
"type": "string"
},
"zookeeper": {
"type": "string",
"description": "URL to the Zookeeper that Spark cluster framework connects to persist state to. (l.e: zk://0.0.0.0:2181)",
Expand Down
21 changes: 17 additions & 4 deletions package/marathon.json.mustache
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,24 @@
"cpus": {{spark.cpus}},
"mem": {{spark.mem}},
"instances": {{spark.instances}},
"cmd": "mv ./conf/log4j-dispatcher.properties ./conf/log4j.properties && ./bin/spark-class org.apache.spark.deploy.mesos.MesosClusterDispatcher --port $PORT0 --webui-port $PORT1 --master {{mesos.master}} --zk {{spark.zookeeper}} --host {{spark.host}} --name {{spark.framework-name}}",
"cmd": "/sbin/init.sh",
"env": {
"APPLICATION_WEB_PROXY_BASE": "/service/spark",
"SPARK_USER": "{{spark.mesos-user}}"
"SPARK_USER": "{{spark.mesos-user}}",
"MESOS_MASTER": "{{mesos.master}}",
"ZK": "{{spark.zookeeper}}",
"FRAMEWORK_NAME": "{{spark.framework-name}}",
"DNS_SUFFIX": "{{spark.dns-suffix}}",
"SPARK_DISPATCHER_MESOS_ROLE": "{{spark.mesos-role}}",
"SPARK_DAEMON_JAVA_OPTS": "{{spark.dispatcher-java-opts}}",
"ENABLE_HISTORY_SERVER": "{{spark.history-server.enabled}}",
"HISTORY_LOG_DIR": "{{spark.history-server.log-dir}}",
"HISTORY_CLEANER_ENABLED": "{{spark.history-server.cleaner-enabled}}",
"HISTORY_CLEANER_INTERVAL": "{{spark.history-server.cleaner-interval}}",
"HISTORY_CLEANER_MAXAGE": "{{spark.history-server.cleaner-maxage}}"
},
"ports": [
0,
0,
0,
0
],
Expand All @@ -32,6 +44,7 @@
],
"labels": {
{{#hdfs.config-url}}"SPARK_HDFS_CONFIG_URL": "{{hdfs.config-url}}",{{/hdfs.config-url}}
"SPARK_URI": "{{spark.uri}}"
"SPARK_URI": "{{spark.uri}}",
"DCOS_SERVICE": "http:{{spark.framework-name}}:4"
}
}

0 comments on commit b3c6281

Please sign in to comment.