In [5]:
import pandas as pd

# Configuração para acesso ao MinIO
s3_url = "http://54.161.237.227:9000/"
access_key = "minio_access_key"
secret_key = "minio_secret_key"

storage_options = {
    "key": access_key,
    "secret": secret_key,
    "client_kwargs": {
        "endpoint_url": s3_url
    }
}

file_path = 's3://bronze/data/customers.csv'

# Tentar ler o arquivo CSV com tratamento para aspas e delimitadores, e lidando com linhas ruins
try:
    df = pd.read_csv(
        file_path,
        sep=',',
        quotechar='"',  # Assegura que as vírgulas dentro de aspas sejam tratadas corretamente
        storage_options=storage_options,
        on_bad_lines='skip'  # Pula linhas que não têm o número correto de delimitadores
    )
    display(df)
except Exception as e:
    print("Erro ao ler o arquivo:", e)


Unnamed: 0,customerID,companyName,contactName,contactTitle,address,city,region,postalCode,country,phone,fax
0,ALFKI,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,,12209,Germany,030-0074321,030-0076545
1,ANATR,Ana Trujillo Emparedados y helados,Ana Trujillo,Owner,Avda. de la Constitución 2222,México D.F.,,05021,Mexico,(5) 555-4729,(5) 555-3745
2,ANTON,Antonio Moreno Taquería,Antonio Moreno,Owner,Mataderos 2312,México D.F.,,05023,Mexico,(5) 555-3932,
3,AROUT,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,,WA1 1DP,UK,(171) 555-7788,(171) 555-6750
4,BERGS,Berglunds snabbköp,Christina Berglund,Order Administrator,Berguvsvägen 8,Luleå,,S-958 22,Sweden,0921-12 34 65,0921-12 34 67
...,...,...,...,...,...,...,...,...,...,...,...
62,WANDK,Die Wandernde Kuh,Rita Müller,Sales Representative,Adenauerallee 900,Stuttgart,,70563,Germany,0711-020361,0711-035428
63,WARTH,Wartian Herkku,Pirkko Koskitalo,Accounting Manager,Torikatu 38,Oulu,,90110,Finland,981-443655,981-443655
64,WHITC,White Clover Markets,Karl Jablonski,Owner,305 - 14th Ave. S. Suite 3B,Seattle,WA,98128,USA,(206) 555-4112,(206) 555-4115
65,WILMK,Wilman Kala,Matti Karttunen,Owner/Marketing Assistant,Keskuskatu 45,Helsinki,,21240,Finland,90-224 8858,90-224 8858


In [None]:
df_renamed = df.rename(columns={
    'customerID': 'cd_produto', 
    'companyName': 'nm_produto',
    'contactName':'nm_contato',
    'contactTitle':'nm_titulo_contato',
    'address':'nm_endereco',
    'city':'nm_cidade',
    'region':'nm_estado',
    'postalCode':'cd_postal',
    'country':'nm_pais',
    'phone':'num_telefone', 
    'fax':'num_fax' 
                   })

In [16]:
df_renamed.numero_telefone.replace('(',' ')

0        030-0074321
1       (5) 555-4729
2       (5) 555-3932
3     (171) 555-7788
4      0921-12 34 65
           ...      
62       0711-020361
63        981-443655
64    (206) 555-4112
65       90-224 8858
66     (26) 642-7012
Name: numero_telefone, Length: 67, dtype: object

In [None]:

df_formatted = df_renamed.astype({
    'cd_produto':'string', 
    'nm_produto':'string',
    'nm_contato':'string',
    'nm_titulo_contato':'string',
    'nm_endereco':'string',
    'nm_cidade':'string',
    'nm_estado':'string',
    'cd_postal':'string',
    'nm_pais':'string',
    'num_telefone':'string', 
    'num_fax':'string'
})

display(df_formatted)

Unnamed: 0,cd_produto,nome_produto,nome_contato,titulo_contato,endereco,nome_cidade,nome_estado,cd_postal,nome_pais,numero_telefone,numero_fax
0,ALFKI,Alfreds Futterkiste,Maria Anders,Sales Representative,Obere Str. 57,Berlin,,12209,Germany,030-0074321,030-0076545
1,ANATR,Ana Trujillo Emparedados y helados,Ana Trujillo,Owner,Avda. de la Constitución 2222,México D.F.,,05021,Mexico,(5) 555-4729,(5) 555-3745
2,ANTON,Antonio Moreno Taquería,Antonio Moreno,Owner,Mataderos 2312,México D.F.,,05023,Mexico,(5) 555-3932,
3,AROUT,Around the Horn,Thomas Hardy,Sales Representative,120 Hanover Sq.,London,,WA1 1DP,UK,(171) 555-7788,(171) 555-6750
4,BERGS,Berglunds snabbköp,Christina Berglund,Order Administrator,Berguvsvägen 8,Luleå,,S-958 22,Sweden,0921-12 34 65,0921-12 34 67
...,...,...,...,...,...,...,...,...,...,...,...
62,WANDK,Die Wandernde Kuh,Rita Müller,Sales Representative,Adenauerallee 900,Stuttgart,,70563,Germany,0711-020361,0711-035428
63,WARTH,Wartian Herkku,Pirkko Koskitalo,Accounting Manager,Torikatu 38,Oulu,,90110,Finland,981-443655,981-443655
64,WHITC,White Clover Markets,Karl Jablonski,Owner,305 - 14th Ave. S. Suite 3B,Seattle,WA,98128,USA,(206) 555-4112,(206) 555-4115
65,WILMK,Wilman Kala,Matti Karttunen,Owner/Marketing Assistant,Keskuskatu 45,Helsinki,,21240,Finland,90-224 8858,90-224 8858


In [14]:
parquet_path = 's3://silver/orders.parquet'


try:
    df.to_parquet(
        parquet_path,
        engine='pyarrow',
        index=False,       
        storage_options=storage_options
    )
    print("Arquivo Parquet salvo com sucesso em:", parquet_path)
except Exception as e:
    print("Erro ao salvar o arquivo Parquet:", e)


Arquivo Parquet salvo com sucesso em: s3://silver/orders.parquet
