In [1]:
from utils import read_file, upload_to_s3
from validacao import Validator 
from datetime import datetime
import pandas as pd

In [2]:
name_arquivo = 'ofertas'
data_atual = datetime.now().strftime("%Y-%m-%d")
extensao = 'csv'

nome_final = f'{name_arquivo}_{data_atual}.{extensao}'

In [3]:
bucket_name = 'data-lake-02'
camada = 'bronze'

In [4]:
df = read_file(bucket_name, nome_final, camada)

✅ Arquivo 'ofertas_2025-02-14.csv' lido com sucesso!


In [5]:
df.head(2
)

Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits,Ticker
0,2025-02-14 00:00:00-05:00,241.070007,245.050003,241.0,244.339996,10832000,0.0,0.0,AAPL
1,2025-02-14 00:00:00-05:00,407.649994,408.709991,406.170013,406.579987,5747192,0.0,0.0,MSFT


In [6]:
validator = Validator(df)

In [7]:
if validator.has_data() and validator.has_columns(["Date", "Open", "High", "Low", "Close", "Volume", "Dividends", "Stock Splits", "Ticker"]) and validator.check_null_or_empty(["Date"]):
    df_tratado = df.rename(columns={
        "Date": "dt_referencia",
        "Open": "vl_abertura",
        "High": "vl_max",
        "Low": "vl_min",
        "Close": "vl_close",
        "Volume": "vl_volume",
        "Dividends": "vl_dividends",
        "Stock Splits": "vl_stock",
        "Ticker": "nm_ativo"
    }) 

    df_tratado["dt_referencia"] = pd.to_datetime(df_tratado["dt_referencia"]).dt.date
    df_tratado = df_tratado.astype({col: float for col in df_tratado.columns if col.startswith("vl_")}) 
    df_tratado = df_tratado.round({col: 2 for col in df_tratado.columns if col.startswith("vl_")})

    df_final = df_tratado

    print("O DataFrame está válido!") 
else:
    pass  



O DataFrame está válido!


In [8]:
validator_final = Validator(df_final)
if validator.has_data():
    df.to_parquet(f'dados_tratados/{name_arquivo}_{data_atual}.parquet')
    print("Arquivo .parquet criado com sucesso!")

    upload_to_s3(f"dados_tratados/{name_arquivo}_{data_atual}.parquet", "data-lake-02", "silver")
else:
   pass

Arquivo .parquet criado com sucesso!
✅ Upload bem-sucedido: dados_tratados/ofertas_2025-02-14.parquet → s3://data-lake-02/silver/2025-02-14/ofertas_2025-02-14.parquet
