In [20]:

from pyspark.sql.types import StringType
from pyspark.sql import SparkSession


spark = ( 
    SparkSession
    .builder
        .master("local[*]")
    .appName('spark_dataframe_api')
    .getOrCreate()
)

base_dataframe = (
    spark
    .read
    .option('delimiter', ';')
    .option('header', 'true')
    .option('inferSchema', 'true')
    .option('enconding', 'ISO-8859-1')
    .csv('./data/amostragem.csv')
)


df = base_dataframe.withColumnsRenamed(
    {'Nome da Tarefa': 'nome', 
     'Data de Criação': 'data', 
     'Data de Conclusão': 'data_conclusao', 
     'Status': 'status', 
     'Tipo da Tarefa': 'tipo_tarefa', 
     'ID do Usuário': 'user_id',
     'Usuário': 'usuario'
     }
    )

df = df.withColumn('status', df['status'].cast(StringType()))
df = df.withColumn('data', df['data'].cast(StringType()))
df = df.withColumn('data_conclusao', df['data_conclusao'].cast(StringType()))


df = (
  df
  .filter((df['usuario'] == 'Jeferson Klau') & (df['status'] != 3))
)



In [21]:

from pyspark.sql.functions import when, date_format, lit, col, concat, udf
import uuid

def generate_uuid():
  return str(uuid.uuid4())

uuid_udf = udf(generate_uuid, StringType())

df = df.withColumn('status', when(df['status'] == 1, 'TODO').otherwise('DONE'))
df = df.withColumn('user_id', when(df['user_id'] == 'b4853fc1f03a3a4cec530a98a94d89ad', '035c6ada-4091-703a-1837-677cad18d4a5'))
df = df.withColumn('PK', concat(lit('LIST#'), date_format(col('data'), 'yyyyMMdd')))
df = df.withColumn('SK', concat(lit('ITEM#'), uuid_udf()))  
df = df.select(df.PK, df.SK, df.user_id, df.usuario, df.nome, df.data, df.data_conclusao, df.status, df.tipo_tarefa)

df.show()

+-------------+--------------------+--------------------+-------------+--------------------+-------------------+-------------------+------+------------------+
|           PK|                  SK|             user_id|      usuario|                nome|               data|     data_conclusao|status|       tipo_tarefa|
+-------------+--------------------+--------------------+-------------+--------------------+-------------------+-------------------+------+------------------+
|LIST#20240718|ITEM#f60553ab-7c1...|035c6ada-4091-703...|Jeferson Klau|Cum iure exercita...|2024-07-18 22:51:51|2025-02-12 22:51:51|  DONE|Tarefa a Ser Feita|
|LIST#20250529|ITEM#9d1944ce-1ec...|035c6ada-4091-703...|Jeferson Klau|    Asperiores iusto|2025-05-29 09:07:49|               NULL|  TODO|Tarefa a Ser Feita|
|LIST#20230912|ITEM#ea5c438c-0fd...|035c6ada-4091-703...|Jeferson Klau|Asperiores pariat...|2023-09-12 19:56:45|               NULL|  TODO|Tarefa a Ser Feita|
|LIST#20241127|ITEM#86594c55-676...|035c6ada-4

In [22]:
import boto3

dynamodb = boto3.resource('dynamodb', region_name='sa-east-1')
table = dynamodb.Table('teste')

pandas_df = df.toPandas()

with table.batch_writer() as batch:
  for _, row in pandas_df.iterrows():
    item = row.to_dict()

    batch.put_item(Item=item) 

print('isnserido no dynamodb') 

isnserido no dynamodb
