In [26]:
from delta import *
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [27]:
builder = (SparkSession.builder
           .appName("connect-kafka-streaming")
           .master("spark://spark-master:7077")
           .config("spark.executor.memory", "512m")
           .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
           .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog"))

spark = configure_spark_with_delta_pip(builder,['org.apache.spark:spark-sql-kafka-0-10_2.12:3.4.1']).getOrCreate()
spark.sparkContext.setLogLevel("ERROR")

In [28]:
df = (spark.readStream
      .format("kafka")
      .option("kafka.bootstrap.servers", "kafka:9092")
      .option("subscribe", "users")
      .option("startingOffsets", "earliest")
      .load())

In [29]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType

schema = StructType([
    StructField('id', IntegerType(), True),
    StructField('name', StringType(), True),
    StructField('age', IntegerType(), True),
    StructField('gender', StringType(), True),
    StructField('country', StringType(), True)])

df = df.withColumn('value', from_json(col('value').cast("STRING"), schema))

In [30]:
from pyspark.sql.functions import col

df = df.select(
    col('value.id').alias('id'),
    col('value.name').alias('name'),
    col('value.age').alias('age'),
    col('value.gender').alias('gender'),
    col('value.country').alias('country'))

In [31]:
query = (df.writeStream
    .outputMode('append')
    .format('console')
    .start())

                                                                                

-------------------------------------------
Batch: 0
-------------------------------------------
+---+-------+---+------+---------+
| id|   name|age|gender|  country|
+---+-------+---+------+---------+
|100| user43| 65|     F|Australia|
| 61| user68| 27|     M|Australia|
| 35| user54| 37|     M|   Brazil|
|  9| user30| 18|     M|    China|
|  9| user82| 24|     M|   Canada|
| 22| user40| 19|     F|   Brazil|
| 78| user81| 43|     F|Australia|
|  4| user90| 60|     F|    China|
| 16| user31| 54|     F|Australia|
| 36|user100| 48|     M|Australia|
| 20| user14| 23|     F|    China|
| 82| user21| 60|     M|      USA|
| 90| user39| 50|     M|Australia|
| 52|  user6| 51|     M|       UK|
| 86| user61| 65|     F|       UK|
| 46| user38| 46|     F|    China|
|  3| user54| 45|     F|   Canada|
| 32| user24| 49|     M|       UK|
| 29|  user2| 59|     M|    India|
| 57| user72| 46|     F|   Brazil|
+---+-------+---+------+---------+
only showing top 20 rows



                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+---+------+---+------+-------+
| id|  name|age|gender|country|
+---+------+---+------+-------+
| 38| user3| 47|     F|  India|
|  6|user67| 53|     F|  China|
+---+------+---+------+-------+



                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
+---+------+---+------+-------+
| id|  name|age|gender|country|
+---+------+---+------+-------+
| 33|user15| 28|     M|     UK|
+---+------+---+------+-------+



                                                                                

-------------------------------------------
Batch: 3
-------------------------------------------
+---+------+---+------+-------+
| id|  name|age|gender|country|
+---+------+---+------+-------+
| 72|user21| 46|     M|    USA|
+---+------+---+------+-------+



                                                                                

-------------------------------------------
Batch: 4
-------------------------------------------
+---+------+---+------+---------+
| id|  name|age|gender|  country|
+---+------+---+------+---------+
| 69|user39| 47|     F|Australia|
+---+------+---+------+---------+



                                                                                

-------------------------------------------
Batch: 5
-------------------------------------------
+---+------+---+------+---------+
| id|  name|age|gender|  country|
+---+------+---+------+---------+
| 44|user20| 37|     M|Australia|
+---+------+---+------+---------+

-------------------------------------------
Batch: 6
-------------------------------------------
+---+------+---+------+-------+
| id|  name|age|gender|country|
+---+------+---+------+-------+
| 70|user95| 65|     M| Canada|
+---+------+---+------+-------+

-------------------------------------------
Batch: 7
-------------------------------------------
+---+------+---+------+-------+
| id|  name|age|gender|country|
+---+------+---+------+-------+
| 79|user76| 63|     F|  India|
+---+------+---+------+-------+

-------------------------------------------
Batch: 8
-------------------------------------------
+---+------+---+------+-------+
| id|  name|age|gender|country|
+---+------+---+------+-------+
| 82|user35| 61|     F|

                                                                                

-------------------------------------------
Batch: 9
-------------------------------------------
+---+------+---+------+-------+
| id|  name|age|gender|country|
+---+------+---+------+-------+
| 11|user18| 42|     M| Canada|
+---+------+---+------+-------+

-------------------------------------------
Batch: 10
-------------------------------------------
+---+------+---+------+-------+
| id|  name|age|gender|country|
+---+------+---+------+-------+
| 23|user60| 45|     M| Brazil|
+---+------+---+------+-------+

-------------------------------------------
Batch: 11
-------------------------------------------
+---+------+---+------+---------+
| id|  name|age|gender|  country|
+---+------+---+------+---------+
| 42|user95| 50|     M|Australia|
+---+------+---+------+---------+

-------------------------------------------
Batch: 12
-------------------------------------------
+---+------+---+------+-------+
| id|  name|age|gender|country|
+---+------+---+------+-------+
| 15|user36| 53|    

In [32]:
query.stop()

In [None]:
spark.stop()