In [2]:
# Instala o findspark
!pip install findspark

Defaulting to user installation because normal site-packages is not writeable


In [3]:
# Importa o findspark e inicializa
import findspark
findspark.init()

# Import required modules
import pyspark
from pyspark.streaming import StreamingContext
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from pyspark.sql.functions import col, from_json

In [4]:
# Conector
import os
os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.0 pyspark-shell'

In [5]:
# Cria a sessão Spark
spark = SparkSession.builder.appName("projeto").getOrCreate()

24/05/13 19:08:55 WARN Utils: Your hostname, cj resolves to a loopback address: 127.0.1.1; using 192.168.15.34 instead (on interface enp2s0)
24/05/13 19:08:55 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address


:: loading settings :: url = jar:file:/opt/spark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml


Ivy Default Cache set to: /home/cj/.ivy2/cache
The jars for the packages stored in: /home/cj/.ivy2/jars
org.apache.spark#spark-sql-kafka-0-10_2.12 added as a dependency
:: resolving dependencies :: org.apache.spark#spark-submit-parent-f3cb5e16-060c-476a-960a-0c5f540370b7;1.0
	confs: [default]
	found org.apache.spark#spark-sql-kafka-0-10_2.12;3.3.0 in central
	found org.apache.spark#spark-token-provider-kafka-0-10_2.12;3.3.0 in central
	found org.apache.kafka#kafka-clients;2.8.1 in central
	found org.lz4#lz4-java;1.8.0 in central
	found org.xerial.snappy#snappy-java;1.1.8.4 in central
	found org.slf4j#slf4j-api;1.7.32 in central
	found org.apache.hadoop#hadoop-client-runtime;3.3.2 in central
	found org.spark-project.spark#unused;1.0.0 in central
	found org.apache.hadoop#hadoop-client-api;3.3.2 in central
	found commons-logging#commons-logging;1.1.3 in central
	found com.google.code.findbugs#jsr305;3.0.0 in central
	found org.apache.commons#commons-pool2;2.11.1 in central
:: resolution r

In [6]:
# Vamos criar uma subscrição no tópico que tem o streaming de dados que desejamos "puxar" os dados.
df = spark \
  .readStream \
  .format("kafka") \
  .option("kafka.bootstrap.servers", "localhost:9092") \
  .option("subscribe", "vandas-deshboard-bronze") \
  .load()

In [7]:
# Definimos o schema dos dados que desejamos capturar para análise (temperatura)
esquema_dados_temp = StructType([StructField("leitura", 
                                             StructType([StructField("temperatura", DoubleType(), True)]), True)])

In [8]:
# Definimos o schema global dos dados no streaming
esquema_dados = StructType([ 
    StructField("id_sensor", StringType(), True), 
    StructField("id_equipamento", StringType(), True), 
    StructField("sensor", StringType(), True), 
    StructField("data_evento", StringType(), True), 
    StructField("padrao", esquema_dados_temp, True)
])

In [9]:
# Capturamos cada linha de dado (cada valor) como string
df_conversao = df.selectExpr("CAST(value AS STRING)")

# Parse do formato JSON em dataframe
df_conversao = df_conversao.withColumn("jsonData", from_json(col("value"), esquema_dados)).select("jsonData.*")

df_conversao.printSchema()

root
 |-- id_sensor: string (nullable = true)
 |-- id_equipamento: string (nullable = true)
 |-- sensor: string (nullable = true)
 |-- data_evento: string (nullable = true)
 |-- padrao: struct (nullable = true)
 |    |-- leitura: struct (nullable = true)
 |    |    |-- temperatura: double (nullable = true)



In [10]:
# Renomeamos as colunas para simplificar nossa análise
df_conversao_temp_sensor = df_conversao.select(col("padrao.leitura.temperatura").alias("temperatura"), 
                                               col("sensor"))

In [11]:
# Aqui temos o objeto que irá conter nossa análise, o cálculo da média das temperaturas por sensor
df_media_temp_sensor = df_conversao_temp_sensor.groupby("sensor").mean("temperatura")

# Renomeamos as colunas para simplificar nossa análise
df_media_temp_sensor = df_media_temp_sensor.select(col("sensor").alias("sensor"), 
                                                   col("avg(temperatura)").alias("media_temp"))

In [12]:
# Objeto que inicia a consulta ao streaming com formato de console
query = df_media_temp_sensor.writeStream.outputMode("complete").format("console").start()

24/05/13 19:09:02 WARN ResolveWriteToStream: Temporary checkpoint location created which is deleted normally when the query didn't fail: /tmp/temporary-df681c48-1301-491f-ac26-6b4cfacf2768. If it's required to delete it under any circumstances, please set spark.sql.streaming.forceDeleteTempCheckpointLocation to true. Important to know deleting temp checkpoint folder is best effort.
24/05/13 19:09:02 WARN ResolveWriteToStream: spark.sql.adaptive.enabled is not supported in streaming DataFrames/Datasets and will be disabled.
24/05/13 19:09:03 WARN AdminClientConfig: The configuration 'key.deserializer' was supplied but isn't a known config.
24/05/13 19:09:03 WARN AdminClientConfig: The configuration 'value.deserializer' was supplied but isn't a known config.
24/05/13 19:09:03 WARN AdminClientConfig: The configuration 'enable.auto.commit' was supplied but isn't a known config.
24/05/13 19:09:03 WARN AdminClientConfig: The configuration 'max.poll.records' was supplied but isn't a known con

-------------------------------------------
Batch: 0
-------------------------------------------
+------+----------+
|sensor|media_temp|
+------+----------+
+------+----------+



                                                                                

-------------------------------------------
Batch: 1
-------------------------------------------
+--------+----------+
|  sensor|media_temp|
+--------+----------+
|    NULL|      NULL|
| sensor8|      48.3|
|sensor43|      57.5|
| sensor3|      28.8|
|sensor15|      45.2|
|sensor36|       8.2|
|sensor22|      47.5|
|sensor16|      58.5|
|sensor21|      59.3|
+--------+----------+



                                                                                

-------------------------------------------
Batch: 2
-------------------------------------------
+--------+----------+
|  sensor|media_temp|
+--------+----------+
|    NULL|      NULL|
| sensor8|      48.3|
|sensor43|      57.5|
| sensor3|      28.8|
|sensor15|      45.2|
|sensor36|       8.2|
|sensor22|      47.5|
|sensor16|      58.5|
|sensor21|      59.3|
+--------+----------+



                                                                                

-------------------------------------------
Batch: 3
-------------------------------------------
+--------+----------+
|  sensor|media_temp|
+--------+----------+
|    NULL|      NULL|
| sensor8|      48.3|
|sensor43|      57.5|
| sensor3|      28.8|
|sensor15|      45.2|
|sensor36|       8.2|
|sensor22|      47.5|
|sensor16|      58.5|
|sensor21|      59.3|
+--------+----------+



                                                                                

-------------------------------------------
Batch: 4
-------------------------------------------
+--------+----------+
|  sensor|media_temp|
+--------+----------+
|    NULL|      NULL|
| sensor8|      48.3|
|sensor43|      57.5|
| sensor3|      28.8|
|sensor15|      45.2|
|sensor36|       8.2|
|sensor22|      47.5|
|sensor16|      58.5|
|sensor21|      59.3|
+--------+----------+



                                                                                

-------------------------------------------
Batch: 5
-------------------------------------------
+--------+----------+
|  sensor|media_temp|
+--------+----------+
|    NULL|      NULL|
| sensor8|      48.3|
|sensor43|      57.5|
| sensor3|      28.8|
|sensor15|      45.2|
|sensor36|       8.2|
|sensor22|      47.5|
|sensor16|      58.5|
|sensor21|      59.3|
+--------+----------+



                                                                                

-------------------------------------------
Batch: 6
-------------------------------------------
+--------+----------+
|  sensor|media_temp|
+--------+----------+
|    NULL|      NULL|
| sensor8|      48.3|
|sensor43|      57.5|
| sensor3|      28.8|
|sensor15|      45.2|
|sensor36|       8.2|
|sensor22|      47.5|
|sensor16|      58.5|
|sensor21|      59.3|
+--------+----------+



In [None]:
{"id_sensor":"S-DSA-MP6-CAP15-02468-374DM","id_equipamento":"E-DSA-MP6-CAP15-13579-374DM","sensor":"sensor25", "data_evento":"2022-11-05T15:22:16.968007Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":42.0}}}
{"id_sensor":"S-DSA-MP6-CAP15-02468-476JW","id_equipamento":"E-DSA-MP6-CAP15-13579-476JW","sensor":"sensor21", "data_evento":"2022-11-05T15:22:16.968353Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":59.3}}}
{"id_sensor":"S-DSA-MP6-CAP15-02468-377TT","id_equipamento":"E-DSA-MP6-CAP15-13579-377TT","sensor":"sensor16", "data_evento":"2022-11-05T15:22:16.968423Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":58.5}}}
{"id_sensor":"S-DSA-MP6-CAP15-02468-417ZG","id_equipamento":"E-DSA-MP6-CAP15-13579-417ZG","sensor":"sensor3", "data_evento":"2022-11-05T15:22:16.968476Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":28.8}}}
{"id_sensor":"S-DSA-MP6-CAP15-02468-806CG","id_equipamento":"E-DSA-MP6-CAP15-13579-806CG","sensor":"sensor15", "data_evento":"2022-11-05T15:22:16.968526Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":45.2}}}
{"id_sensor":"S-DSA-MP6-CAP15-02468-851CT","id_equipamento":"E-DSA-MP6-CAP15-13579-851CT","sensor":"sensor22", "data_evento":"2022-11-05T15:22:16.968578Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":47.5}}}
{"id_sensor":"S-DSA-MP6-CAP15-02468-872ZA","id_equipamento":"E-DSA-MP6-CAP15-13579-872ZA","sensor":"sensor43", "data_evento":"2022-11-05T15:22:16.968625Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":57.5}}}
{"id_sensor":"S-DSA-MP6-CAP15-02468-037JC","id_equipamento":"E-DSA-MP6-CAP15-13579-037JC","sensor":"sensor8", "data_evento":"2022-11-05T15:22:16.968673Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":48.3}}}
{"id_sensor":"S-DSA-MP6-CAP15-02468-560NJ","id_equipamento":"E-DSA-MP6-CAP15-13579-560NJ","sensor":"sensor36", "data_evento":"2022-11-05T15:22:16.968722Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":8.2}}}
{"id_sensor":"S-DSA-MP6-CAP15-02468-458UK","id_equipamento":"E-DSA-MP6-CAP15-13579-458UK","sensor":"sensor35", "data_evento":"2022-11-05T15:22:16.968773Z", "padrao":{"formato":"iot:leitura:sensor:temp", "leitura":{"temperatura":76.0}}}