# Cassandra datasource example with Spark 1.6
The following notebook shows how to use the Cassandra datasource connector from pyspark.

In [1]:
from pyspark import SparkConf, SparkContext
from pyspark.sql import SQLContext

#### We need to stop the default SparkContext (sc) and create one configured for Cassandra.

In [2]:
sc.stop()

#### Configure Spark to use the Cassandra connector

In [3]:
conf = (SparkConf(True)
            .setMaster("local[*]")
            .setAppName("juptyer pyspark")
            .set("spark.cassandra.connection.host", "cassandra")
            .set("spark.cassandra.input.consistency.level", "LOCAL_ONE")
        )
sc = SparkContext(conf=conf)
sqlContext = SQLContext(sc)

#### Create the test keyspace and table and insert a test row.

In [4]:
from cassandra.cluster import Cluster
cluster = Cluster(['cassandra'])
session = cluster.connect()
session.execute("CREATE KEYSPACE IF NOT EXISTS test WITH replication={'class':'SimpleStrategy', 'replication_factor':1}")
session.execute("CREATE TABLE IF NOT EXISTS test.users (username text PRIMARY KEY, emails SET<text>)")
session.execute("INSERT INTO test.users (username,emails) VALUES('someone',{'someone@email.com', 's@email.com'})")

<cassandra.cluster.ResultSet at 0x7f26b8063ad0>

#### Load the test data from Cassandra.

In [5]:
users = ( sqlContext
             .read
             .format("org.apache.spark.sql.cassandra")
             .load(cluster="Cluster Dock", table="users", keyspace="test")
         )
sqlContext.registerDataFrameAsTable(users,"users")
users.printSchema()

root
 |-- username: string (nullable = true)
 |-- emails: array (nullable = true)
 |    |-- element: string (containsNull = true)



In [6]:
sqlContext.sql("select * from users").show()

+--------+--------------------+
|username|              emails|
+--------+--------------------+
| someone|[s@email.com, som...|
+--------+--------------------+

