In [243]:
import pandas as pd
import requests

In [244]:
df = pd.read_csv("../../camada_bronze/acao_petroleo.csv", delimiter=";")

In [245]:
display(df)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2010-01-04 16:56:00,3756762146,3793762146,3743762146,3793762146,1330374387
1,2010-01-05 16:56:00,3815421972,3820421972,3757421972,3777421972,2139651948
2,2010-01-06 16:56:00,3810535714,3880535714,3810535714,3880535714,1872054715
3,2010-01-07 16:56:00,3654592902,3672592902,3634592902,3642592902,1096464619
4,2010-01-08 16:56:00,3524778748,3547778748,3494778748,3503778748,1462407831
...,...,...,...,...,...,...
3647,2024-12-20 16:56:00,3915559512,3934559512,3870559512,3884559512,5927743174
3648,2024-12-23 16:56:00,3946015699,3951015699,3918015699,3932015699,4378553281
3649,2024-12-26 16:56:00,3772760865,3809760865,3769760865,3786760865,2292077753
3650,2024-12-27 16:56:00,364068181,364068181,360168181,360668181,2416728236


In [246]:
df.isna().sum()

Date      0
Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [247]:
df = df.fillna(0)

In [248]:
df = df.drop_duplicates()

In [249]:
df = df.drop(columns=["Open","High","Low"])

In [250]:
df = df.map(lambda x: x.strip() if isinstance(x, str) else x)

In [251]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3652 entries, 0 to 3651
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Date    3652 non-null   object
 1   Close   3652 non-null   object
 2   Volume  3652 non-null   object
dtypes: object(3)
memory usage: 85.7+ KB


In [253]:
colunas = ["Close","Volume"]

df[colunas] = df[colunas].map(lambda x: x.replace(",","."))

In [254]:
df[colunas] = df[colunas].apply(lambda x: pd.to_numeric(x))

In [255]:
df["Date"] = pd.to_datetime(df["Date"])

In [256]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3652 entries, 0 to 3651
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    3652 non-null   datetime64[ns]
 1   Close   3652 non-null   float64       
 2   Volume  3652 non-null   float64       
dtypes: datetime64[ns](1), float64(2)
memory usage: 85.7 KB


In [257]:
df = df.rename(columns={"Date":"Datetime"})

In [258]:
df["Date"] = df["Datetime"].dt.date

In [259]:
last_column = df.columns[-1]
df = df[[last_column] + [col for col in df.columns if col != last_column]]

In [260]:
#Função para coletar dados da cotação do dólar em determinado tempo através de uma API

def get_cotacao_dolar(data):
    
    if isinstance(data, str):
        data = pd.to_datetime(data)

    #Converte o Timestamp para string no formato DD-MM-YYYY
    data_inicial = f"{data.iloc[0].day}/{data.iloc[0].month}/{data.iloc[0].year}"
    data_final = f"{data.iloc[-1].day}/{data.iloc[-1].month}/{data.iloc[-1].year}"

    url = f"https://api.bcb.gov.br/dados/serie/bcdata.sgs.1/dados?dataInicial={data_inicial}&dataFinal={data_final}&formato=json"
    response = requests.get(url)

    if response.status_code == 200:
        try:
            data_dolar = response.json()
            # Verificar se a resposta está vazia
            if data_dolar:
                return data_dolar
            else:
                print(f"Nenhum dado encontrado entre as datas {data_inicial} e {data_final}")
                return None  # Caso não tenha dados para aquela data
        except Exception as e:
            print(f"Erro ao tentar decodificar JSON entre as datas {data_inicial} e {data_final}: {e}")
    else:
        print(f"Erro na requisição entre as datas {data_inicial} e {data_final}, código de status: {response.status_code}")
    return None

In [261]:
df_dolar = pd.DataFrame(get_cotacao_dolar(df["Date"]))

In [262]:
df_dolar.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3766 entries, 0 to 3765
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   data    3766 non-null   object
 1   valor   3766 non-null   object
dtypes: object(2)
memory usage: 59.0+ KB


In [263]:
df_dolar["valor"] = df_dolar["valor"].astype(float)

In [264]:
df["Valor_Dolar"] = df["Close"]/df_dolar["valor"]

In [265]:
# Adicionar uma nova coluna 'PriceR$' para o preço em reais, ignorando datas com erro
#df['PriceR$'] = df['Date'].apply(lambda x: get_cotacao_dolar(x)).multiply(df['Price'])

In [266]:
display(df)

Unnamed: 0,Date,Datetime,Close,Volume,Valor_Dolar
0,2010-01-04,2010-01-04 16:56:00,37.937621,13303743.87,22.005581
1,2010-01-05,2010-01-05 16:56:00,37.774220,21396519.48,21.927335
2,2010-01-06,2010-01-06 16:56:00,38.805357,18720547.15,22.382971
3,2010-01-07,2010-01-07 16:56:00,36.425929,10964646.19,20.918813
4,2010-01-08,2010-01-08 16:56:00,35.037787,14624078.31,20.148239
...,...,...,...,...,...
3647,2024-12-20,2024-12-20 16:56:00,38.845595,59277431.74,7.123841
3648,2024-12-23,2024-12-23 16:56:00,39.320157,43785532.81,7.206377
3649,2024-12-26,2024-12-26 16:56:00,37.867609,22920777.53,6.977118
3650,2024-12-27,2024-12-27 16:56:00,36.066818,24167282.36,6.597186


In [267]:
df.to_csv('../../camada_prata/eyac2_clean.csv', index=False)