# Elasticsearch datasource example with Spark 1.6
The following notebook shows how to use the elasticsearch datasource connector from pyspark.

In [1]:
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext

In [2]:
sc.stop()

In [3]:
conf = SparkConf(True).setMaster("local[*]").setAppName("juptyer pyspark").set("es.nodes", "elasticsearch")
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)

### Load some test data to then store in HDFS.

In [5]:
accounts = sqlContext.read.json("accounts.json")
sqlContext.registerDataFrameAsTable(accounts,"accounts")
accounts.printSchema()

root
 |-- account_number: long (nullable = true)
 |-- address: string (nullable = true)
 |-- age: long (nullable = true)
 |-- balance: long (nullable = true)
 |-- city: string (nullable = true)
 |-- email: string (nullable = true)
 |-- employer: string (nullable = true)
 |-- firstname: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- index: struct (nullable = true)
 |    |-- _id: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- state: string (nullable = true)



In [6]:
sqlContext.sql("select account_number, firstname, lastname, balance from accounts limit 5").show()

+--------------+---------+--------+-------+
|account_number|firstname|lastname|balance|
+--------------+---------+--------+-------+
|          null|     null|    null|   null|
|             1|    Amber|    Duke|  39225|
|          null|     null|    null|   null|
|             6|   Hattie|    Bond|   5686|
|          null|     null|    null|   null|
+--------------+---------+--------+-------+



In [7]:
accounts.write.mode("overwrite").save("hdfs://hadoop:9000/user/hive/warehouse/accounts")

### Check HDFS to see the written data files
http://localhost:50070/explorer.html#/user/hive/warehouse