In [34]:
#iniciar spark
from pyspark import SparkConf
from pyspark.sql import SparkSession
import boto3
import json

conf = SparkConf()
conf.set('spark.jars.packages', 'org.apache.hadoop:hadoop-aws:3.2.0')
conf.set('spark.hadoop.fs.s3a.aws.credentials.provider', 'com.amazonaws.auth.InstanceProfileCredentialsProvider')
spark = SparkSession.builder.config(conf=conf).getOrCreate()

In [35]:
#carregar tabela ( atenção ao caminho)
a = spark.read.json('s3a://raw-dev-fernando/167083e4-d01e-002c-43be-91858106a752.json')

In [36]:
#exibir a tabela

a.show()

+--------------------+--------------------+------------------+--------------------+
|                Body|     EnqueuedTimeUtc|        Properties|    SystemProperties|
+--------------------+--------------------+------------------+--------------------+
|{0.031, 0.052, 0....|2023-05-28T23:39:...|{sensor-movimento}|{{"scope":"device...|
|{0.002, 0.024, -0...|2023-05-28T23:39:...|{sensor-movimento}|{{"scope":"device...|
|{0.054, 0.012, 0....|2023-05-28T23:39:...|{sensor-movimento}|{{"scope":"device...|
|{0.022, 0.004, 0....|2023-05-28T23:39:...|{sensor-movimento}|{{"scope":"device...|
|{-0.002, -0.013, ...|2023-05-28T23:39:...|{sensor-movimento}|{{"scope":"device...|
|{0.03, 0.006, 0.0...|2023-05-28T23:39:...|{sensor-movimento}|{{"scope":"device...|
|{0.027, 0.01, 0.0...|2023-05-28T23:39:...|{sensor-movimento}|{{"scope":"device...|
|{0.062, 0.033, 0....|2023-05-28T23:39:...|{sensor-movimento}|{{"scope":"device...|
|{0.018, 0.01, 0.0...|2023-05-28T23:39:...|{sensor-movimento}|{{"scope":"dev

In [37]:
arquivo_trusted = a.select(a.Body.aceleracao_eixo_x.alias('aceleracao_eixo_x'),
                a.Body.aceleracao_eixo_y.alias('aceleracao_eixo_y'),
                a.Body.aceleracao_eixo_z.alias('aceleracao_eixo_z'),
                a.Body.horario_leitura.alias('horario_leitura'),
                a.Body.bateria_dispositivo.alias('bateria_dispositivo'))
arquivo_trusted.show()

+-----------------+-----------------+-----------------+---------------+-------------------+
|aceleracao_eixo_x|aceleracao_eixo_y|aceleracao_eixo_z|horario_leitura|bateria_dispositivo|
+-----------------+-----------------+-----------------+---------------+-------------------+
|            0.031|            0.052|            0.026|       00:39:25|              92.97|
|            0.002|            0.024|           -0.011|       20:39:30|              92.96|
|            0.054|            0.012|            0.021|       21:39:30|              92.95|
|            0.022|            0.004|            0.015|       22:39:30|              92.94|
|           -0.002|           -0.013|            0.018|       23:39:30|              92.93|
|             0.03|            0.006|            0.027|       00:39:30|              92.92|
|            0.027|             0.01|            0.016|       20:39:36|              92.91|
|            0.062|            0.033|            0.018|       21:39:36|         

In [38]:
json_trusted = json.dumps(arquivo_trusted.collect())
print(json_trusted)

[[0.031, 0.052, 0.026, "00:39:25", 92.97], [0.002, 0.024, -0.011, "20:39:30", 92.96], [0.054, 0.012, 0.021, "21:39:30", 92.95], [0.022, 0.004, 0.015, "22:39:30", 92.94], [-0.002, -0.013, 0.018, "23:39:30", 92.93], [0.03, 0.006, 0.027, "00:39:30", 92.92], [0.027, 0.01, 0.016, "20:39:36", 92.91], [0.062, 0.033, 0.018, "21:39:36", 92.9], [0.018, 0.01, 0.021, "22:39:36", 92.89], [-0.007, 0.09, 0.037, "23:39:36", 92.89], [0.009, -0.004, 0.017, "00:39:36", 92.88], [0.026, 0.006, 0.01, "20:39:51", 92.87], [0.003, 0.02, 0.006, "21:39:51", 92.86], [0.007, 0.006, 0.015, "22:39:51", 92.85], [0.02, -0.018, 0.04, "23:39:51", 92.84], [0.02, 0.031, 0.022, "00:39:51", 92.83], [0.025, 0.031, 0.033, "20:39:56", 92.82], [0.009, 0.021, 0.02, "21:39:56", 92.81], [0.021, 0.011, 0.02, "22:39:56", 92.8], [0.04, 0.039, 0.027, "23:39:56", 92.79], [0.013, 0.019, 0.008, "00:39:56", 92.78]]


In [39]:
session = boto3.Session()

s3 = session.client('s3')

bucket_name = 'trusted-dev-fernando'
file_name = 'trusted-2023/05/28.json'

s3.put_object(Body=json_trusted, Bucket=bucket_name, Key=file_name)

{'ResponseMetadata': {'RequestId': 'GX2ES58AT2E92Z9H',
  'HostId': 'lf5x9zkNv8TVvwUdBB9D5hLyZcpSXeC9ek8YFi4beag1nwI9W/BueKDIMMbsyx67vEIKeNkePjs=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'lf5x9zkNv8TVvwUdBB9D5hLyZcpSXeC9ek8YFi4beag1nwI9W/BueKDIMMbsyx67vEIKeNkePjs=',
   'x-amz-request-id': 'GX2ES58AT2E92Z9H',
   'date': 'Tue, 30 May 2023 00:48:05 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"76d91db6664100144d638fa8d4561436"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"76d91db6664100144d638fa8d4561436"',
 'ServerSideEncryption': 'AES256'}

In [40]:
session = boto3.Session()

s3 = session.client('s3')

bucket_name = 'trusted-dev-fernando'
file_name = 'trusted-2023/05/28.json'

s3.put_object(Body=json_trusted, Bucket=bucket_name, Key=file_name)

{'ResponseMetadata': {'RequestId': 'R4H90VPNMBESX56B',
  'HostId': 'PVq3Gq9Q7toL+FQDqkur+e7p0OWQbQHB1dBq/YKG0Oj7sj44I6Gq2kxWiTgPqwMrymXbUXESjZY=',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'PVq3Gq9Q7toL+FQDqkur+e7p0OWQbQHB1dBq/YKG0Oj7sj44I6Gq2kxWiTgPqwMrymXbUXESjZY=',
   'x-amz-request-id': 'R4H90VPNMBESX56B',
   'date': 'Tue, 30 May 2023 00:48:07 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"76d91db6664100144d638fa8d4561436"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"76d91db6664100144d638fa8d4561436"',
 'ServerSideEncryption': 'AES256'}

In [41]:
#carregar tabela ( atenção ao caminho)
b = spark.read.json('s3a://trusted-dev-fernando/trusted-2023/05/28.json')

In [42]:
arquivo_refined = a.select(a.Body.aceleracao_eixo_x.alias('aceleracao_eixo_x'),
                a.Body.aceleracao_eixo_y.alias('aceleracao_eixo_y'),
                a.Body.aceleracao_eixo_z.alias('aceleracao_eixo_z'),
                a.Body.horario_leitura.alias('horario_leitura'))
arquivo_refined.show()

+-----------------+-----------------+-----------------+---------------+
|aceleracao_eixo_x|aceleracao_eixo_y|aceleracao_eixo_z|horario_leitura|
+-----------------+-----------------+-----------------+---------------+
|            0.031|            0.052|            0.026|       00:39:25|
|            0.002|            0.024|           -0.011|       20:39:30|
|            0.054|            0.012|            0.021|       21:39:30|
|            0.022|            0.004|            0.015|       22:39:30|
|           -0.002|           -0.013|            0.018|       23:39:30|
|             0.03|            0.006|            0.027|       00:39:30|
|            0.027|             0.01|            0.016|       20:39:36|
|            0.062|            0.033|            0.018|       21:39:36|
|            0.018|             0.01|            0.021|       22:39:36|
|           -0.007|             0.09|            0.037|       23:39:36|
|            0.009|           -0.004|            0.017|       00

In [43]:
json_refined = json.dumps(arquivo_refined.collect())
print(json_refined)

[[0.031, 0.052, 0.026, "00:39:25"], [0.002, 0.024, -0.011, "20:39:30"], [0.054, 0.012, 0.021, "21:39:30"], [0.022, 0.004, 0.015, "22:39:30"], [-0.002, -0.013, 0.018, "23:39:30"], [0.03, 0.006, 0.027, "00:39:30"], [0.027, 0.01, 0.016, "20:39:36"], [0.062, 0.033, 0.018, "21:39:36"], [0.018, 0.01, 0.021, "22:39:36"], [-0.007, 0.09, 0.037, "23:39:36"], [0.009, -0.004, 0.017, "00:39:36"], [0.026, 0.006, 0.01, "20:39:51"], [0.003, 0.02, 0.006, "21:39:51"], [0.007, 0.006, 0.015, "22:39:51"], [0.02, -0.018, 0.04, "23:39:51"], [0.02, 0.031, 0.022, "00:39:51"], [0.025, 0.031, 0.033, "20:39:56"], [0.009, 0.021, 0.02, "21:39:56"], [0.021, 0.011, 0.02, "22:39:56"], [0.04, 0.039, 0.027, "23:39:56"], [0.013, 0.019, 0.008, "00:39:56"]]


In [44]:
session = boto3.Session()

s3 = session.client('s3')

bucket_name = 'refined-dev-fernando'
file_name = 'refined-2023/05/28.json'

s3.put_object(Body=json_refined, Bucket=bucket_name, Key=file_name)

{'ResponseMetadata': {'RequestId': 'KNC8MRZRK9T8Q0TT',
  'HostId': 'LS67SegRHsrqV6BeGv4K0tLb8oqnC8me2uKKbP+ihmohmHMRZxiPXqSZIwh71a4EuUq7TkI6D96nAV6tnL8eGXG7YBfPyrYv',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amz-id-2': 'LS67SegRHsrqV6BeGv4K0tLb8oqnC8me2uKKbP+ihmohmHMRZxiPXqSZIwh71a4EuUq7TkI6D96nAV6tnL8eGXG7YBfPyrYv',
   'x-amz-request-id': 'KNC8MRZRK9T8Q0TT',
   'date': 'Tue, 30 May 2023 00:48:27 GMT',
   'x-amz-server-side-encryption': 'AES256',
   'etag': '"e6ab783735149aed7253133351a4f5a1"',
   'server': 'AmazonS3',
   'content-length': '0'},
  'RetryAttempts': 0},
 'ETag': '"e6ab783735149aed7253133351a4f5a1"',
 'ServerSideEncryption': 'AES256'}