Merge branch 'master' into versioning/get-note-revision-api

SWC-SENSE · Jul 19, 2016 · f1ab994 · f1ab994
2 parents 683b481 + 473dc72
commit f1ab994
Show file tree

Hide file tree

Showing 95 changed files with 3,018 additions and 2,028 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -33,9 +33,13 @@ addons:
 
 matrix:
   include:
-    # Test all modules
+    # Test all modules with scala 2.10
     - jdk: "oraclejdk7"
-      env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples" BUILD_FLAG="package -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS="-Dpython.test.exclude=''"
+      env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples" BUILD_FLAG="package -Dscala-2.10 -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
+
+    # Test all modules with scala 2.11
+    - jdk: "oraclejdk7"
+      env: SPARK_VER="1.6.1" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Pr -Phadoop-2.3 -Ppyspark -Psparkr -Pscalding -Pexamples -Pscala-2.11" BUILD_FLAG="package -Dscala-2.11 -Pbuild-distr" TEST_FLAG="verify -Pusing-packaged-distr" TEST_PROJECTS=""
 
     # Test spark module for 1.5.2
     - jdk: "oraclejdk7"

diff --git a/README.md b/README.md
@@ -69,15 +69,10 @@ First of all, set your proxy configuration on Maven `settings.xml`.
 
 Then, run these commands from shell. 
 ```
-export http_proxy=http://localhost:3128
-export https_proxy=http://localhost:3128
-export HTTP_PROXY=http://localhost:3128
-export HTTPS_PROXY=http://localhost:3128
 npm config set proxy http://localhost:3128
 npm config set https-proxy http://localhost:3128
 npm config set registry "http://registry.npmjs.org/"
 npm config set strict-ssl false
-npm cache clean
 git config --global http.proxy http://localhost:3128
 git config --global https.proxy http://localhost:3128
 git config --global url."http://".insteadOf git://
@@ -94,9 +89,7 @@ git config --global --unset url."http://".insteadOf
 
 _Notes:_ 
  - If you are behind NTLM proxy you can use [Cntlm Authentication Proxy](http://cntlm.sourceforge.net/).
- - If you are on Windows replace `export` with `set` to set env variables
- - Replace `localhost:3128` with the standard pattern `http://user:pwd@host:port`
- - For zeppelin-web: currently there is no way to reach Bower main repo through NTLM proxy
+ - Replace `localhost:3128` with the standard pattern `http://user:pwd@host:port`.
 
 #### Install maven
 ```
@@ -294,6 +287,14 @@ And browse [localhost:8080](localhost:8080) in your browser.
 
 For configuration details check __`./conf`__ subdirectory.
 
+### Building for Scala 2.11
+
+To produce a Zeppelin package compiled with Scala 2.11, use the -Pscala-2.11 profile:
+
+```
+mvn clean package -Pspark-1.6 -Phadoop-2.4 -Pyarn -Ppyspark -Pscala-2.11 -DskipTests clean install
+```
+
 ### Package
 To package the final distribution including the compressed archive, run:
 

diff --git a/cassandra/pom.xml b/cassandra/pom.xml
@@ -38,14 +38,11 @@
         <cassandra.driver.version>3.0.1</cassandra.driver.version>
         <snappy.version>1.0.5.4</snappy.version>
         <lz4.version>1.3.0</lz4.version>
-        <scala.version>2.10.4</scala.version>
-        <scala.binary.version>2.10</scala.binary.version>
         <commons-lang.version>3.3.2</commons-lang.version>
         <scalate.version>1.7.1</scalate.version>
         <cassandra.guava.version>16.0.1</cassandra.guava.version>
 
         <!--TEST-->
-        <scalatest.version>2.2.4</scalatest.version>
         <junit.version>4.12</junit.version>
         <achilles.version>3.2.4-Zeppelin</achilles.version>
         <assertj.version>1.7.0</assertj.version>
@@ -173,6 +170,7 @@
             <plugin>
                 <groupId>org.scala-tools</groupId>
                 <artifactId>maven-scala-plugin</artifactId>
+                <version>2.15.2</version>
                 <executions>
                     <execution>
                         <id>compile</id>

diff --git a/docs/development/writingzeppelininterpreter.md b/docs/development/writingzeppelininterpreter.md
@@ -40,15 +40,15 @@ In 'Separate Interpreter(scoped / isolated) for each note' mode which you can se
 ## Make your own Interpreter
 
 Creating a new interpreter is quite simple. Just extend [org.apache.zeppelin.interpreter](https://github.com/apache/zeppelin/blob/master/zeppelin-interpreter/src/main/java/org/apache/zeppelin/interpreter/Interpreter.java) abstract class and implement some methods.
-You can include `org.apache.zeppelin:zeppelin-interpreter:[VERSION]` artifact in your build system. And you should your jars under your interpreter directory with specific directory name. Zeppelin server reads interpreter directories recursively and initializes interpreters including your own interpreter.
+You can include `org.apache.zeppelin:zeppelin-interpreter:[VERSION]` artifact in your build system. And you should put your jars under your interpreter directory with a specific directory name. Zeppelin server reads interpreter directories recursively and initializes interpreters including your own interpreter.
 
-There are three locations where you can store your interpreter group, name and other information. Zeppelin server tries to find the location below. Next, Zeppelin tries to find `interpareter-setting.json` in your interpreter jar. 
+There are three locations where you can store your interpreter group, name and other information. Zeppelin server tries to find the location below. Next, Zeppelin tries to find `interpreter-setting.json` in your interpreter jar. 
 
 ```
 {ZEPPELIN_INTERPRETER_DIR}/{YOUR_OWN_INTERPRETER_DIR}/interpreter-setting.json
 ```
 
-Here is an example of `interpareter-setting.json` on your own interpreter.
+Here is an example of `interpreter-setting.json` on your own interpreter.
 
 ```json
 [
@@ -57,7 +57,7 @@ Here is an example of `interpareter-setting.json` on your own interpreter.
     "name": "your-name",
     "className": "your.own.interpreter.class",
     "properties": {
-      "propertiies1": {
+      "properties1": {
         "envName": null,
         "propertyName": "property.1.name",
         "defaultValue": "propertyDefaultValue",
@@ -216,4 +216,4 @@ We welcome contribution to a new interpreter. Please follow these few steps:
  - Add documentation on how to use your interpreter under `docs/interpreter/`. Follow the Markdown style as this [example](https://github.com/apache/zeppelin/blob/master/docs/interpreter/elasticsearch.md). Make sure you list config settings and provide working examples on using your interpreter in code boxes in Markdown. Link to images as appropriate (images should go to `docs/assets/themes/zeppelin/img/docs-img/`). And add a link to your documentation in the navigation menu (`docs/_includes/themes/zeppelin/_navigation.html`).
  - Most importantly, ensure licenses of the transitive closure of all dependencies are list in [license file](https://github.com/apache/zeppelin/blob/master/zeppelin-distribution/src/bin_license/LICENSE).
  - Commit your changes and open a [Pull Request](https://github.com/apache/zeppelin/pulls) on the project [Mirror on GitHub](https://github.com/apache/zeppelin); check to make sure Travis CI build is passing.
-
+
diff --git a/docs/interpreter/python.md b/docs/interpreter/python.md
@@ -46,7 +46,7 @@ To access the help, type **help()**
 ## Python modules
 The interpreter can use all modules already installed (with pip, easy_install...)
 
-## Use Zeppelin Dynamic Forms
+## Using Zeppelin Dynamic Forms
 You can leverage [Zeppelin Dynamic Form]({{BASE_PATH}}/manual/dynamicform.html) inside your Python code.
 
 **Zeppelin Dynamic Form can only be used if py4j Python library is installed in your system. If not, you can install it with `pip install py4j`.**
@@ -65,6 +65,7 @@ print (z.select("f1",[("o1","1"),("o2","2")],"2"))
 print("".join(z.checkbox("f3", [("o1","1"), ("o2","2")],["1"])))
 ```
 
+
 ## Zeppelin features not fully supported by the Python Interpreter
 
 * Interrupt a paragraph execution (`cancel()` method) is currently only supported in Linux and MacOs. If interpreter runs in another operating system (for instance MS Windows) , interrupt a paragraph will close the whole interpreter. A JIRA ticket ([ZEPPELIN-893](https://issues.apache.org/jira/browse/ZEPPELIN-893)) is opened to implement this feature in a next release of the interpreter.
@@ -94,7 +95,7 @@ z.show(plt, height='150px')
 
 
 ## Pandas integration
-[Zeppelin Display System]({{BASE_PATH}}/displaysystem/basicdisplaysystem.html#table) provides simple API to visualize data in Pandas DataFrames, same as in Matplotlib.
+Apache Zeppelin [Table Display System]({{BASE_PATH}}/displaysystem/basicdisplaysystem.html#table) provides built-in data visualization capabilities. Python interpreter leverages it to visualize Pandas DataFrames though similar `z.show()` API, same as with [Matplotlib integration](#matplotlib-integration).
 
 Example:
 
@@ -104,6 +105,34 @@ rates = pd.read_csv("bank.csv", sep=";")
 z.show(rates)
 ```
 
+## SQL over Pandas DataFrames
+
+There is a convenience `%python.sql` interpreter that matches Apache Spark experience in Zeppelin and enables usage of SQL language to query [Pandas DataFrames](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html) and visualization of results though built-in [Table Display System]({{BASE_PATH}}/displaysystem/basicdisplaysystem.html#table).
+
+ **Pre-requests**
+
+  - Pandas `pip install pandas`
+  - PandaSQL `pip install -U pandasql`
+
+In case default binded interpreter is Python (first in the interpreter list, under the _Gear Icon_), you can just use it as `%sql` i.e
+
+ - first paragraph
+
+  ```python
+import pandas as pd
+rates = pd.read_csv("bank.csv", sep=";")
+  ```
+
+ - next paragraph
+
+  ```sql
+%sql
+SELECT * FROM rates WHERE age < 40
+  ```
+
+Otherwise it can be referred to as `%python.sql`
+
+
 ## Technical description
 
-For in-depth technical details on current implementation plese reffer [python/README.md](https://github.com/apache/zeppelin/blob/master/python/README.md).
+For in-depth technical details on current implementation please refer to [python/README.md](https://github.com/apache/zeppelin/blob/master/python/README.md).
diff --git a/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java b/elasticsearch/src/main/java/org/apache/zeppelin/elasticsearch/ElasticsearchInterpreter.java
@@ -103,19 +103,6 @@ public class ElasticsearchInterpreter extends Interpreter {
   public static final String ELASTICSEARCH_CLUSTER_NAME = "elasticsearch.cluster.name";
   public static final String ELASTICSEARCH_RESULT_SIZE = "elasticsearch.result.size";
 
-  static {
-    Interpreter.register(
-      "elasticsearch",
-      "elasticsearch",
-      ElasticsearchInterpreter.class.getName(),
-        new InterpreterPropertyBuilder()
-          .add(ELASTICSEARCH_HOST, "localhost", "The host for Elasticsearch")
-          .add(ELASTICSEARCH_PORT, "9300", "The port for Elasticsearch")
-          .add(ELASTICSEARCH_CLUSTER_NAME, "elasticsearch", "The cluster name for Elasticsearch")
-          .add(ELASTICSEARCH_RESULT_SIZE, "10", "The size of the result set of a search query")
-          .build());
-  }
-
   private final Gson gson = new GsonBuilder().setPrettyPrinting().create();
   private Client client;
   private String host = "localhost";
@@ -128,7 +115,13 @@ public ElasticsearchInterpreter(Properties property) {
     this.host = getProperty(ELASTICSEARCH_HOST);
     this.port = Integer.parseInt(getProperty(ELASTICSEARCH_PORT));
     this.clusterName = getProperty(ELASTICSEARCH_CLUSTER_NAME);
-    this.resultSize = Integer.parseInt(getProperty(ELASTICSEARCH_RESULT_SIZE));
+    try {
+      this.resultSize = Integer.parseInt(getProperty(ELASTICSEARCH_RESULT_SIZE));
+    } catch (NumberFormatException e) {
+      this.resultSize = 10;
+      logger.error("Unable to parse " + ELASTICSEARCH_RESULT_SIZE + " : " +
+        property.get(ELASTICSEARCH_RESULT_SIZE), e);
+    }
   }
 
   @Override

diff --git a/elasticsearch/src/main/resources/interpreter-setting.json b/elasticsearch/src/main/resources/interpreter-setting.json
@@ -0,0 +1,33 @@
+[
+  {
+    "group": "elasticsearch",
+    "name": "elasticsearch",
+    "className": "org.apache.zeppelin.elasticsearch.ElasticsearchInterpreter",
+    "properties": {
+      "elasticsearch.host": {
+        "envName": "ELASTICSEARCH_HOST",
+        "propertyName": "elasticsearch.host",
+        "defaultValue": "localhost",
+        "description": "The host for Elasticsearch"
+      },
+      "elasticsearch.port": {
+        "envName": "ELASTICSEARCH_PORT",
+        "propertyName": "elasticsearch.port",
+        "defaultValue": "9300",
+        "description": "The port for Elasticsearch"
+      },
+      "elasticsearch.cluster.name": {
+        "envName": "ELASTICSEARCH_CLUSTER_NAME",
+        "propertyName": "elasticsearch.cluster.name",
+        "defaultValue": "elasticsearch",
+        "description": "The cluster name for Elasticsearch"
+      },
+      "elasticsearch.result.size": {
+        "envName": "ELASTICSEARCH_RESULT_SIZE",
+        "propertyName": "elasticsearch.result.size",
+        "defaultValue": "10",
+        "description": "The size of the result set of a search query"
+      }
+    }
+  }
+]
diff --git a/file/src/main/java/org/apache/zeppelin/file/HDFSFileInterpreter.java b/file/src/main/java/org/apache/zeppelin/file/HDFSFileInterpreter.java
@@ -262,19 +262,19 @@ public boolean isDirectory(String path) {
   @Override
   public List<InterpreterCompletion> completion(String buf, int cursor) {
     logger.info("Completion request at position\t" + cursor + " in string " + buf);
-    final List suggestions = new ArrayList<>();
+    final List<InterpreterCompletion> suggestions = new ArrayList<>();
     if (StringUtils.isEmpty(buf)) {
-      suggestions.add("ls");
-      suggestions.add("cd");
-      suggestions.add("pwd");
+      suggestions.add(new InterpreterCompletion("ls", "ls"));
+      suggestions.add(new InterpreterCompletion("cd", "cd"));
+      suggestions.add(new InterpreterCompletion("pwd", "pwd"));
       return suggestions;
     }
 
     //part of a command == no spaces
     if (buf.split(" ").length == 1){
-      if ("cd".contains(buf)) suggestions.add("cd");
-      if ("ls".contains(buf)) suggestions.add("ls");
-      if ("pwd".contains(buf)) suggestions.add("pwd");
+      if ("cd".contains(buf)) suggestions.add(new InterpreterCompletion("cd", "cd"));
+      if ("ls".contains(buf)) suggestions.add(new InterpreterCompletion("ls", "ls"));
+      if ("pwd".contains(buf)) suggestions.add(new InterpreterCompletion("pwd", "pwd"));
 
       return suggestions;
     }
@@ -311,7 +311,7 @@ public List<InterpreterCompletion> completion(String buf, int cursor) {
                 String beforeLastPeriod = unfinished.substring(0, unfinished.lastIndexOf('.') + 1);
                 //beforeLastPeriod should be the start of fs.pathSuffix, so take the end of it.
                 String suggestedFinish = fs.pathSuffix.substring(beforeLastPeriod.length());
-                suggestions.add(suggestedFinish);
+                suggestions.add(new InterpreterCompletion(suggestedFinish, suggestedFinish));
               }
             }
             return suggestions;

diff --git a/file/src/test/java/org/apache/zeppelin/file/HDFSFileInterpreterTest.java b/file/src/test/java/org/apache/zeppelin/file/HDFSFileInterpreterTest.java
@@ -22,9 +22,13 @@
 import junit.framework.TestCase;
 import static org.junit.Assert.*;
 import org.apache.zeppelin.interpreter.InterpreterResult;
+import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
 import org.junit.Test;
 import org.slf4j.Logger;
+
+import java.util.Arrays;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Properties;
 import java.lang.Override;
 import java.lang.String;
@@ -100,6 +104,17 @@ public void test() {
       // we should be back to first result after all this navigation
       assertEquals(result1.message(), result11.message());
 
+      // auto completion test
+      List expectedResultOne = Arrays.asList(
+        new InterpreterCompletion("ls", "ls"));
+      List expectedResultTwo = Arrays.asList(
+        new InterpreterCompletion("pwd", "pwd"));
+      List<InterpreterCompletion> resultOne = t.completion("l", 0);
+      List<InterpreterCompletion> resultTwo = t.completion("p", 0);
+
+      assertEquals(expectedResultOne, resultOne);
+      assertEquals(expectedResultTwo, resultTwo);
+
       t.close();
     }
   }