diff --git a/5-run-spark-mesos-client-ipython.sh b/5-run-spark-mesos-client-ipython.sh index 3f9d36d..ff86f42 100755 --- a/5-run-spark-mesos-client-ipython.sh +++ b/5-run-spark-mesos-client-ipython.sh @@ -48,10 +48,11 @@ __dns=$(dns_detect) echo "starting $__image..." __container=$(docker run -d \ --net="host" \ + --publish=8888:8888 \ --env "SPARK_MASTER=$__spark_master" \ --env "SPARK_BINARY=$__spark_binary" \ - --env "SPARK_RAM_DRIVER=8G" \ - --env "SPARK_RAM_WORKER=4G" \ + --env "SPARK_RAM_DRIVER=64G" \ + --env "SPARK_RAM_WORKER=8G" \ --env "CONTAINER_USER=$__spark_user" \ --volume=$__host_dir_hadoop_conf:/etc/hadoop/conf \ --volume=$__host_dir_hive_conf:/etc/hive/conf \ diff --git a/dockerfiles/lab41/spark-mesos-client-ipython/Dockerfile b/dockerfiles/lab41/spark-mesos-client-ipython/Dockerfile index 6a5c600..91b486f 100644 --- a/dockerfiles/lab41/spark-mesos-client-ipython/Dockerfile +++ b/dockerfiles/lab41/spark-mesos-client-ipython/Dockerfile @@ -24,8 +24,8 @@ RUN apt-get install --assume-yes libstdc++6 RUN pip install --upgrade pyzmq # update spark libraries latest standalone install -RUN curl http://d3kbcqa49mib13.cloudfront.net/spark-1.3.1-bin-hadoop2.6.tgz | tar -xz -C /usr/local/ && \ - cd /usr/local && rm spark && ln -s spark-1.3.1-bin-hadoop2.6 spark && \ +RUN curl http://d3kbcqa49mib13.cloudfront.net/spark-1.4.1-bin-hadoop2.4.tgz | tar -xz -C /usr/local/ && \ + cd /usr/local && rm spark && ln -s spark-1.4.1-bin-hadoop2.4 spark && \ rm /usr/bin/spark-shell && \ ln --symbolic /usr/local/spark/bin/spark-shell /usr/bin/spark-shell @@ -48,3 +48,9 @@ RUN export CONFIGURE_OPTS=--disable-audit && \ # add mesos-specific pyspark shell.py ADD config/pyspark/shell.py $SPARK_HOME/python/pyspark/shell.py + +# expose the IPython notebook port +EXPOSE 8888 + +# add data volume +VOLUME ["/data"] diff --git a/dockerfiles/lab41/spark-mesos-client-ipython/config/pyspark/shell.py b/dockerfiles/lab41/spark-mesos-client-ipython/config/pyspark/shell.py index 9292d83..e76e8ed 100644 --- a/dockerfiles/lab41/spark-mesos-client-ipython/config/pyspark/shell.py +++ b/dockerfiles/lab41/spark-mesos-client-ipython/config/pyspark/shell.py @@ -40,6 +40,33 @@ from pyspark.sql import SQLContext, HiveContext from pyspark.storagelevel import StorageLevel + +# find an available port for SparkUI +def ui_get_available_port(): + import socket; + + # default UI host/port + host = "127.0.0.1" + port = 4040 + + # check + check = notfound = 0 + + # find the first available unoccupied port + while (check==notfound): + + # check if available + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + check = sock.connect_ex((host, port)) + + # try again if port unavailable + if check == notfound: + port += 1 + + # return the first available port + return port + + # this is the deprecated equivalent of ADD_JARS add_files = None if os.environ.get("ADD_FILES") is not None: @@ -50,10 +77,22 @@ # setup mesos-based connection conf = (SparkConf() - .setMaster(os.environ["SPARK_MASTER"]) - .set("spark.executor.uri", os.environ["SPARK_BINARY"]) - .set("spark.driver.memory", os.environ["SPARK_RAM_DRIVER"]) - .set("spark.executor.memory", os.environ["SPARK_RAM_WORKER"])) + .setMaster(os.environ["SPARK_MASTER"])) + +# optionally set memory limits +if os.environ.get("SPARK_RAM_DRIVER"): + conf.set("spark.driver.memory", os.environ["SPARK_RAM_DRIVER"]) +if os.environ.get("SPARK_RAM_WORKER"): + conf.set("spark.executor_memory", os.environ["SPARK_RAM_WORKER"]) + +# set the UI port +conf.set("spark.ui.port", ui_get_available_port()) + +# optionally set the Spark binary +if os.environ.get("SPARK_BINARY"): + conf.set("spark.executor.uri", os.environ["SPARK_BINARY"]) + +# establish config-based context sc = SparkContext(appName="DockerIPythonShell", pyFiles=add_files, conf=conf) atexit.register(lambda: sc.stop()) diff --git a/runtime/ipython/notebooks/Example_Pi.ipynb b/runtime/ipython/notebooks/Example_Pi.ipynb index 82b5f2e..37ef03d 100644 --- a/runtime/ipython/notebooks/Example_Pi.ipynb +++ b/runtime/ipython/notebooks/Example_Pi.ipynb @@ -45,26 +45,7 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "# update SparkContext to use HDFS Spark binary\n", - "# TODO: setup this portion to configure on server startup\n", - "from pyspark import SparkConf, SparkContext\n", - "sc.stop()\n", - "conf = (SparkConf()\n", - " .setMaster(\"mesos://mesos-master-fqdn:5050\")\n", - " .setAppName(\"Calculate Pi\")\n", - " .set(\"spark.executor.uri\", \"hdfs:///spark/spark-1.3.0-bin-hadoop2.4.tgz\"))\n", - "sc = SparkContext(conf = conf)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": { "collapsed": false }, @@ -73,7 +54,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Pi is roughly 3.036000\n" + "Pi is roughly 3.068000\n" ] } ], @@ -90,6 +71,28 @@ "count = sc.parallelize(xrange(0, NUM_SAMPLES)).map(sample).reduce(lambda a, b: a + b)\n", "print \"Pi is roughly %f\" % (4.0 * count / NUM_SAMPLES)" ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "u'4041'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "conf.get(\"spark.ui.port\")" + ] } ], "metadata": { diff --git a/runtime/ipython/profile_spark/static/base/images/logo.png b/runtime/ipython/profile_spark/static/base/images/logo.png new file mode 100644 index 0000000..1a41d4d Binary files /dev/null and b/runtime/ipython/profile_spark/static/base/images/logo.png differ diff --git a/runtime/ipython/profile_spark/static/custom/custom.css b/runtime/ipython/profile_spark/static/custom/custom.css index 9f4abda..c7d1a49 100644 --- a/runtime/ipython/profile_spark/static/custom/custom.css +++ b/runtime/ipython/profile_spark/static/custom/custom.css @@ -1,7 +1,138 @@ /* -Placeholder for custom user CSS -mainly to be overridden in profile/static/custom/custom.css + Theme modified from Base16 harmonic16 Dark by Jannik Siebert (https://github.com/janniks) -This will always be an empty file in IPython -*/ \ No newline at end of file + CodeMirror template adapted for IPython Notebook by Nikhil Sonnad (https://github.com/nsonnad/base16-ipython-notebook) + CodeMirror template by Jan T. Sott (https://github.com/idleberg/base16-chrome-devtools) + Original Base16 color scheme by Chris Kempson (https://github.com/chriskempson/base16) + +*/ + +/* Uncomment to use a custom font +div#notebook, div.CodeMirror, div.output_area pre, div.output_wrapper, div.prompt { + font-family: 'Custom Font Name', monospace !important; +} +*/ + +#notebook img{ + display:block; + background: url(/static/custom/logo.png) no-repeat; + width: 500px; + height: 115px; + padding-left: 233px; + -moz-box-sizing: border-box; + box-sizing: border-box; +} + +.notebook_app { + background-color: #222; +} + +/* GLOBALS */ +body {background-color: #222;color: #FFF} +a {color: #FF8282;} + +/* INTRO PAGE */ +.toolbar_info, .list_container { + color: #e5ebf1; +} + +#running .panel-group .panel .panel-heading, +.panel-heading, +.list_header { + background-color: #0E3A42; +} + +#header { + background-color: #0E3A42; + border-bottom: 1px solid maroon; + padding: 10px 0; +} + +#header .header-bar { + width: 100%; + height: 1px; + background: transparent; + margin-bottom: -1px; +} + +.panel { + background-color: transparent; +} + +#notebook-container { + background-color: #EEE; +} + +.checkpoint_status, +.autosave_status { + color: #AAA; + font-size: 100% ! important; +} + +#kernel_logo_widget { + display: none; +} + +/* NOTEBOOK */ + +/* comment out this line to bring the toolbar back */ +/*div#maintoolbar, div#header {display: none !important;}*/ +div#notebook {border-top: none;} + +div.input_prompt {color: #bf5656;} +div.output_prompt {color: #bf568b;} +div.input_area { + border-radius: 0px; + border: 1px solid #405c79; +} +div.output_area pre {font-weight: normal; color: #3F474F;} +div.output_subarea {font-weight: normal; color: #3F474F;} + +.rendered_html table, .rendered_html th, .rendered_html tr, .rendered_html td { + border: 1px #3F474F solid; + color: #3F474F; +} +div.output_html { font-family: sans-serif; } +table.dataframe tr {border: 1px #3F474F;} + +div.cell.selected {border-radius: 0px;} +div.cell.edit_mode {border-radius: 0px; border: thin solid #bf568b;} +div.text_cell_render, div.output_html {color: #3F474F;} + +span.ansiblack {color: #223b54;} +span.ansiblue {color: #568bbf;} +span.ansigray {color: #aabcce;} +span.ansigreen {color: #56bf8b;} +span.ansipurple {color: #bf568b;} +span.ansired {color: #bf8b56;} +span.ansiyellow {color: #8bbf56;} + +div.output_stderr {background-color: #bf8b56;} +div.output_stderr pre {color: #e5ebf1;} + +.cm-s-ipython.CodeMirror {background: #0b1c2c; color: #e5ebf1;} +.cm-s-ipython div.CodeMirror-selected {background: #223b54 !important;} +.cm-s-ipython .CodeMirror-gutters {background: #0b1c2c; border-right: 0px;} +.cm-s-ipython .CodeMirror-linenumber {color: #627e99;} +.cm-s-ipython .CodeMirror-cursor {border-left: 1px solid #aabcce !important;} + +.cm-s-ipython span.cm-comment {color: #bf5656;} +.cm-s-ipython span.cm-atom {color: #bf568b;} +.cm-s-ipython span.cm-number {color: #bf568b;} + +.cm-s-ipython span.cm-property, .cm-s-ipython span.cm-attribute {color: #56bf8b;} +.cm-s-ipython span.cm-keyword {color: #bf8b56;} +.cm-s-ipython span.cm-string {color: #8bbf56;} +.cm-s-ipython span.cm-operator {color: #bf5656;} +.cm-s-ipython span.cm-builtin {color: #bf568b;} + +.cm-s-ipython span.cm-variable {color: #56bf8b;} +.cm-s-ipython span.cm-variable-2 {color: #8b56bf;} +.cm-s-ipython span.cm-def {color: #bfbf56;} +.cm-s-ipython span.cm-error {background: #bf8b56; color: #aabcce;} +.cm-s-ipython span.cm-bracket {color: #3F474F;} +.cm-s-ipython span.cm-tag {color: #bf8b56;} +.cm-s-ipython span.cm-link {color: #bf568b;} + +.cm-s-ipython .CodeMirror-matchingbracket { text-decoration: underline; color: #e5ebf1 !important;}