# Pre Test Setup

In [1]:
import random
import os
from os import listdir

import seaborn as sns

import pyspark
from pyspark.context import SparkContext
from pyspark.sql import SparkSession, SQLContext
from pyspark.sql.types import *
from pyspark.sql.window import Window
import pyspark.sql.functions as F

from ipywidgets import interact, widgets

DATA_PATH = './data'
DATA_FILE = './data.zip'
KUDU_MASTER = 'kudu-master-1:7051'

# Kudu Test

In [2]:
os.environ['PYSPARK_SUBMIT_ARGS'] = f'--packages org.apache.kudu:kudu-spark3_2.12:1.13.0.7.1.5.17-1,org.neo4j:neo4j-connector-apache-spark_2.12:5.0.1_for_spark_3 --repositories https://repository.cloudera.com/artifactory/cloudera-repos/ pyspark-shell'

In [3]:
spark = SparkSession.builder.config('spark.packages', 'org.apache.kudu:kudu-spark3_2.12:1.13.0.7.1.5.17-1,org.neo4j:neo4j-connector-apache-spark_2.12:5.0.1_for_spark_3').getOrCreate()
sc = SparkContext.getOrCreate()
sc.setLogLevel('OFF')

# Neo4j Test

## Initial read from neo4j

In [4]:
df = spark.read.format("org.neo4j.spark.DataSource")\
 .option("url", "bolt://neo4j:7687")\
 .option("authentication.type", "none")\
 .option("labels", "Person")\
 .load()
display(df)
df.show(truncate=False)

DataFrame[<id>: bigint, <labels>: array<string>, name: string, id: bigint]

+----+--------+------+---+
|<id>|<labels>|name  |id |
+----+--------+------+---+
|0   |[Person]|John  |1  |
|1   |[Person]|Thomas|2  |
|2   |[Person]|Carlos|3  |
|3   |[Person]|Jarles|4  |
+----+--------+------+---+



## Writing 2 nodes into neo4j

In [5]:
df = spark.createDataFrame(
 [(3, "Carlos"),(4, "Jarles")],
 ["id", "name"]
)
df.write.format("org.neo4j.spark.DataSource")\
 .option("url", "bolt://neo4j:7687")\
 .option("authentication.type", "none")\
 .option("labels", ":Person")\
 .option("node.keys", "id")\
 .mode("Overwrite")\
 .save()

## Reading neo4j again, should return the new nodes inserted in the above cell

In [6]:
df = spark.read.format("org.neo4j.spark.DataSource")\
 .option("url", "bolt://neo4j:7687")\
 .option("authentication.type", "none")\
 .option("labels", "Person")\
 .load()
display(df)
df.show(truncate=False)

DataFrame[<id>: bigint, <labels>: array<string>, name: string, id: bigint]

+----+--------+------+---+
|<id>|<labels>|name  |id |
+----+--------+------+---+
|0   |[Person]|John  |1  |
|1   |[Person]|Thomas|2  |
|2   |[Person]|Carlos|3  |
|3   |[Person]|Jarles|4  |
+----+--------+------+---+

