In [1]:
import pandas as pd
import tqdm
import random
import numpy as np

In [2]:

import pyspark.pandas as ps
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, FloatType




In [3]:
class SparkData():
    def __init__(self, session_name, schema) -> None:
        self.session_name = session_name
        self.schema = schema
        self.spark = SparkSession.builder.appName(self.session_name).getOrCreate()
        self.dataframe = self.spark.createDataFrame(self.spark.sparkContext.emptyRDD(), schema)
    

    def add_line(self,line):
        new_line = self.spark.createDataFrame(line, self.schema)
        self.dataframe = self.dataframe.union(new_line)
    
    def save_parquet(self, path_save):
        self.dataframe.write.mode("overwrite").parquet(path_save)


In [20]:
import findspark
findspark.init()

In [4]:
# Defina o esquema do DataFrame
schema = StructType([
    StructField("data", StringType(), True),
    StructField("temperatura", FloatType(), True),
    StructField("umidade", FloatType(), True)
])


In [5]:
myspark = SparkData(session_name='teste',schema=schema)

24/09/03 14:48:59 WARN Utils: Your hostname, debian resolves to a loopback address: 127.0.1.1; using 192.168.100.4 instead (on interface wlp4s0)
24/09/03 14:48:59 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/09/03 14:48:59 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/09/03 14:49:00 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


In [6]:
for i in tqdm.tqdm(range(0,100)):
    temperatura = random.random()
    umidade = random.random()
    myspark.add_line(line=[( "2024-09-03", temperatura, umidade )])

100%|██████████| 100/100 [00:02<00:00, 40.43it/s]


In [16]:
myspark.dataframe.show(50)



+----------+------------+-----------+
|      data| temperatura|    umidade|
+----------+------------+-----------+
|2024-09-03|  0.30986512|   0.780457|
|2024-09-03|   0.7466063| 0.86541337|
|2024-09-03|  0.58183557| 0.04761888|
|2024-09-03|  0.29605022|  0.4119609|
|2024-09-03| 0.029822059|   0.602383|
|2024-09-03|  0.36185965| 0.17977515|
|2024-09-03|  0.02846358| 0.48384297|
|2024-09-03|  0.02631612|  0.9758619|
|2024-09-03|  0.37667465| 0.65551656|
|2024-09-03|  0.55032027|  0.5316353|
|2024-09-03|   0.1181135|0.078024514|
|2024-09-03|  0.15258212| 0.12880369|
|2024-09-03|  0.19670658| 0.95416725|
|2024-09-03|   0.8332387|  0.8670788|
|2024-09-03|  0.96362346| 0.80392283|
|2024-09-03|   0.6376718| 0.77230304|
|2024-09-03|   0.5932955| 0.52701753|
|2024-09-03|  0.67832094| 0.47431764|
|2024-09-03|   0.8677575| 0.52518415|
|2024-09-03|  0.74355125| 0.80102795|
|2024-09-03|    0.821113|  0.8384065|
|2024-09-03|  0.57701856| 0.20602085|
|2024-09-03|   0.6559576|  0.8865137|
|2024-09-03|

                                                                                

In [17]:
myspark.dataframe.limit(50).show(50)



+----------+------------+-----------+
|      data| temperatura|    umidade|
+----------+------------+-----------+
|2024-09-03|  0.30986512|   0.780457|
|2024-09-03|   0.7466063| 0.86541337|
|2024-09-03|  0.58183557| 0.04761888|
|2024-09-03|  0.29605022|  0.4119609|
|2024-09-03| 0.029822059|   0.602383|
|2024-09-03|  0.36185965| 0.17977515|
|2024-09-03|  0.02846358| 0.48384297|
|2024-09-03|  0.02631612|  0.9758619|
|2024-09-03|  0.37667465| 0.65551656|
|2024-09-03|  0.55032027|  0.5316353|
|2024-09-03|   0.1181135|0.078024514|
|2024-09-03|  0.15258212| 0.12880369|
|2024-09-03|  0.19670658| 0.95416725|
|2024-09-03|   0.8332387|  0.8670788|
|2024-09-03|  0.96362346| 0.80392283|
|2024-09-03|   0.6376718| 0.77230304|
|2024-09-03|   0.5932955| 0.52701753|
|2024-09-03|  0.67832094| 0.47431764|
|2024-09-03|   0.8677575| 0.52518415|
|2024-09-03|  0.74355125| 0.80102795|
|2024-09-03|    0.821113|  0.8384065|
|2024-09-03|  0.57701856| 0.20602085|
|2024-09-03|   0.6559576|  0.8865137|
|2024-09-03|

                                                                                