# Notebook para atualização do banco de dados.

Cada diretório é um banco de dados com resolução temporal diferente. O ajuste é feito dentro do arquivo database.yaml. Para dados horários existe um limite do yahoo para até 60 dias da data atual. 

Para refazer o banco de dados apagar os arquivos dos papeis no diretório. O banco é gerado automaticamente após isso.

Para atualizar, somente executar a função abaixo.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import yfinance as yf
import os.path
import yaml
yf.pdr_override()

In [5]:
# Faz update dos bancos de dados de acordo com arquivo de configuracao
def update_data(cfg_file):
    fpath = os.path.dirname(os.path.abspath(cfg_file))
    with open(cfg_file, "r") as stream:
        data_cfg = yaml.safe_load(stream)

    # Obtem papeis disponiveis no banco, assim como  periodo dos dados. 
    # Um arquivo csv para cada papel acho que é mais facil de organizar as coisas por enquanto
    for ticker in data_cfg['tickers']:
        print('Updating: ' + ticker)
        fname = os.path.join(fpath,ticker)
        if os.path.isfile(fname):
            print('Reading file {}'.format(fname))
            df = pd.read_csv(fname, index_col=0)
            df.index = pd.DatetimeIndex(pd.to_datetime(df.index, infer_datetime_format=True))
        else:
            df = pd.DataFrame()

        ## Pega data inicial/ final do arquivo e atualizar dados que faltam.
        if df.empty:
            t = data_cfg['startDate']
            first_day = datetime.datetime(t.year, t.month, t.day)
            last_day = first_day - datetime.timedelta(days=1)
        else:
            last_day = max(df.index).tz_localize(None)
            first_day = min(df.index).tz_localize(None)
            
        # Determina data inicial e final para download de dados
        end = datetime.datetime.today()
        start = last_day + datetime.timedelta(days=1)

        # Faz download de dados em lotes de no maxi maxDays dias.
        delta = end - start
        maxDays = data_cfg['maxDaysFetch']
        p_end = end
        while start.date() < end.date():
            if delta.days > maxDays:
                p_end = start + datetime.timedelta(days=maxDays)
                if p_end > end:
                    p_end = end


            print('Period {} - {}'.format(start.strftime('%Y-%m-%d'), p_end.strftime('%Y-%m-%d')))

            df_new = yf.download(ticker, start=start.strftime('%Y-%m-%d'), end=p_end.strftime('%Y-%m-%d'), interval=data_cfg['interval'])
            df = pd.concat([df, df_new])
            start = p_end

        df.to_csv(fname)

## Update banco de 15 min

In [7]:

update_data("./data/15m/database.yaml")

Updating: WEGE3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\15m\WEGE3.SA
Updating: PETR4.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\15m\PETR4.SA
Updating: VAMO3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\15m\VAMO3.SA
Updating: ITUB4.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\15m\ITUB4.SA
Updating: PRIO3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\15m\PRIO3.SA
Updating: VALE3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\15m\VALE3.SA
Updating: USIM5.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\15m\USIM5.SA
Updating: MGLU3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\15m\MGLU3.SA
Updating: PETZ3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\15m\PETZ3.SA


## Update da banco de 1h

In [9]:
update_data("./data/1h/database.yaml")

Updating: WEGE3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\1h\WEGE3.SA
Updating: PETR4.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\1h\PETR4.SA
Updating: VAMO3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\1h\VAMO3.SA
Updating: ITUB4.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\1h\ITUB4.SA
Updating: PRIO3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\1h\PRIO3.SA
Updating: VALE3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\1h\VALE3.SA
Updating: USIM5.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\1h\USIM5.SA
Updating: MGLU3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\1h\MGLU3.SA
Updating: PETZ3.SA
Reading file c:\Users\jairo\OneDrive\Documentos\GitHub\bovespa\bovespa\data\1h\PETZ3.SA


## Update de banco de 1 dia

In [10]:
update_data("./data/1d/database.yaml")

Updating: WEGE3.SA
Period 2022-01-01 - 2022-03-02
[*********************100%***********************]  1 of 1 completed
Period 2022-03-02 - 2022-05-01
[*********************100%***********************]  1 of 1 completed
Period 2022-05-01 - 2022-06-30
[*********************100%***********************]  1 of 1 completed
Period 2022-06-30 - 2022-08-29
[*********************100%***********************]  1 of 1 completed
Period 2022-08-29 - 2022-10-28
[*********************100%***********************]  1 of 1 completed
Period 2022-10-28 - 2022-11-10
[*********************100%***********************]  1 of 1 completed
Updating: PETR4.SA
Period 2022-01-01 - 2022-03-02
[*********************100%***********************]  1 of 1 completed
Period 2022-03-02 - 2022-05-01
[*********************100%***********************]  1 of 1 completed
Period 2022-05-01 - 2022-06-30
[*********************100%***********************]  1 of 1 completed
Period 2022-06-30 - 2022-08-29
[*********************100%*****