In [1]:
from pyspark.sql import SparkSession
from pyspark import SparkConf
from pyspark.sql.types import StructType, StructField, LongType, StringType, DoubleType
from time import sleep

sparkConf = SparkConf()
sparkConf.setMaster("spark://spark-master:7077")
sparkConf.setAppName("Lab9_1")
sparkConf.set("spark.driver.memory", "2g")
sparkConf.set("spark.executor.cores", "1")
sparkConf.set("spark.driver.cores", "1")
# create the spark session, which is the entry point to Spark SQL engine.
spark = SparkSession.builder.config(conf=sparkConf).getOrCreate()

dataSchema = StructType(
        [StructField("Arrival_Time", LongType(), True),
         StructField("Creation_Time", LongType(), True),
         StructField("Device", StringType(), True),
         StructField("Index", LongType(), True),
         StructField("Model", StringType(), True),
         StructField("User", StringType(), True),
         StructField("gt", StringType(), True),
         StructField("x", DoubleType(), True),
         StructField("y", DoubleType(), True),
         StructField("z", DoubleType(), True)
         ])

# Read from a source 
sdf = spark.readStream.schema(dataSchema).option("maxFilesPerTrigger", 1) \
        .json("/home/jovyan/data/activity-data")

# Do a calculation
activityCounts = sdf.groupBy("gt").count()

# Write to a sink - here, the output is memory (only for testing). The query name (i.e., activity_counts) will be the Spark SQL table name.
activityQuery = activityCounts.writeStream.queryName("activity_counts") \
                    .format("memory").outputMode("complete") \
                    .start()
# Testing 
for x in range(10):
    spark.sql("SELECT * FROM activity_counts").show()
    sleep(5)

+---+-----+
| gt|count|
+---+-----+
+---+-----+

+---+-----+
| gt|count|
+---+-----+
+---+-----+

+---+-----+
| gt|count|
+---+-----+
+---+-----+

+---+-----+
| gt|count|
+---+-----+
+---+-----+

+---+-----+
| gt|count|
+---+-----+
+---+-----+

+----------+-----+
|        gt|count|
+----------+-----+
|  stairsup|10452|
|       sit|12309|
|     stand|11384|
|      walk|13256|
|      bike|10796|
|stairsdown| 9365|
|      null|10449|
+----------+-----+

+----------+-----+
|        gt|count|
+----------+-----+
|  stairsup|20905|
|       sit|24619|
|     stand|22769|
|      walk|26512|
|      bike|21593|
|stairsdown|18729|
|      null|20896|
+----------+-----+

+----------+-----+
|        gt|count|
+----------+-----+
|  stairsup|20905|
|       sit|24619|
|     stand|22769|
|      walk|26512|
|      bike|21593|
|stairsdown|18729|
|      null|20896|
+----------+-----+

+----------+-----+
|        gt|count|
+----------+-----+
|  stairsup|20905|
|       sit|24619|
|     stand|22769|
|      walk

In [2]:
# Stop the spark context
spark.stop()