In [372]:
import yfinance as yf
from datetime import datetime, timedelta
from get_all_tickers import get_tickers as gt
import seaborn as sns
import pandas as pd
import numpy as np

## Step 1: Collect and transforming the data from yfinance


In [373]:
br_df = pd.read_html("https://www.dadosdemercado.com.br/bolsa/acoes").copy()
tickers = br_df[0]['Ticker']
tickers = [tickers + ".SA" for tickers in tickers]

In [374]:
# def get_data(stock_df_name, per=None ,start=None , end=None ):
# get all major data to the project
big_data = yf.Ticker('ITUB4.SA')
# start_date = '1990-01-01'
# end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

stock_df = big_data.history(period='max' ).copy()

# Transforming the data to analysis
stock_df = pd.DataFrame(stock_df[["Close", 'Volume', 'Dividends']])
stock_df['Yield'] = stock_df['Dividends'] / stock_df['Close']
stock_df = stock_df.astype('float')

# return stock_df

In [375]:
today = (datetime.now() - timedelta(days=1) ).strftime('%Y-%m-%d')
month_1 = (datetime.now() - timedelta(days=31)).strftime('%Y-%m-%d')
year_1 = (datetime.now() - timedelta(days=366)).strftime('%Y-%m-%d')
year_5 = (datetime.now() - timedelta(days=(365 * 5  + 1))).strftime('%Y-%m-%d')

In [376]:
stock_df


Unnamed: 0_level_0,Close,Volume,Dividends,Yield
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-12-21 00:00:00-02:00,1.688832,74224.0,0.00000,0.000000
2000-12-22 00:00:00-02:00,1.615630,23312.0,0.00000,0.000000
2000-12-25 00:00:00-02:00,1.615630,0.0,0.00000,0.000000
2000-12-26 00:00:00-02:00,1.615159,7851.0,0.00000,0.000000
2000-12-27 00:00:00-02:00,1.669940,26996.0,0.00000,0.000000
...,...,...,...,...
2023-12-26 00:00:00-03:00,33.632519,23135400.0,0.00000,0.000000
2023-12-27 00:00:00-03:00,33.862396,18176500.0,0.00000,0.000000
2023-12-28 00:00:00-03:00,33.952351,12555300.0,0.00000,0.000000
2024-01-02 00:00:00-03:00,33.520000,18948200.0,0.01765,0.000527


In [377]:
# initial and final values from time series to create a condicional line chart color
init_value = stock_df.index.min()
last_value = stock_df.index.max()

## Step 2: Collect and transforming the data from vriconsulting


In [378]:


all_dfs = []
# Create a df with a web data and get all values from currenty month until january 2020

for i in range(0,100):
    url = f"https://www.vriconsulting.com.br/indices/cdi.php?pagina={i}"
    df = pd.read_html(url)[0].copy()
    all_dfs.append(df)
    if 'Jan/2000' in df['Mês/Ano'].values:
        break
cdi_df = pd.concat(all_dfs)
cdi_df.rename(columns={"Índice do mês (em %)": "CDI/Month"}, inplace=True)
cdi_df = cdi_df[["CDI/Month", "Mês/Ano"]]

# Avoid lost of data
cdi_df["CDI/Month"][0] = cdi_df["CDI/Month"][1]
cdi_df["CDI/Month"] =  cdi_df["CDI/Month"].astype('float')


In [379]:
cdi_df["Mês/Ano"].str[:3]

0     dez
1     Nov
2     Out
3     Set
4     Ago
     ... 
35    Ago
36    Jul
37    Jun
38    Mai
39    Abr
Name: Mês/Ano, Length: 321, dtype: object

In [380]:
cdi_df

Unnamed: 0,CDI/Month,Mês/Ano
0,9160.0,dez/2023
1,9160.0,Nov/2023
2,9976.0,Out/2023
3,9729.0,Set/2023
4,11375.0,Ago/2023
...,...,...
35,15808.0,Ago/1997
36,16059.0,Jul/1997
37,15919.0,Jun/1997
38,15784.0,Mai/1997


In [381]:
months = {
    'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
    'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
    'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
}

cdi_df["Mês/Ano"] = cdi_df["Mês/Ano"].str.capitalize().str.strip()
cdi_df["Mês/Ano"] = cdi_df["Mês/Ano"].str[:3].map(months) + cdi_df["Mês/Ano"].str[3:]
cdi_df["Mês/Ano"] = pd.to_datetime(cdi_df['Mês/Ano'])


  cdi_df["Mês/Ano"] = pd.to_datetime(cdi_df['Mês/Ano'])


In [382]:
cdi_df

Unnamed: 0,CDI/Month,Mês/Ano
0,9160.0,2023-12-01
1,9160.0,2023-11-01
2,9976.0,2023-10-01
3,9729.0,2023-09-01
4,11375.0,2023-08-01
...,...,...
35,15808.0,1997-08-01
36,16059.0,1997-07-01
37,15919.0,1997-06-01
38,15784.0,1997-05-01


In [383]:
cdi_df = cdi_df.set_index("Mês/Ano")


In [384]:
# fill the dataframe with day values

cdi_df = cdi_df.resample('D').ffill()
cdi_df

Unnamed: 0_level_0,CDI/Month
Mês/Ano,Unnamed: 1_level_1
1997-04-01,16563.0
1997-04-02,16563.0
1997-04-03,16563.0
1997-04-04,16563.0
1997-04-05,16563.0
...,...
2023-11-27,9160.0
2023-11-28,9160.0
2023-11-29,9160.0
2023-11-30,9160.0


In [385]:
index_list = stock_df.index.strftime('%Y-%m-%d')

cdi_df = cdi_df.asfreq('D', method='ffill', fill_value=None, normalize=True)
cdi_df = cdi_df[cdi_df.index.isin(index_list)]


In [386]:
cdi_df = (cdi_df.astype('float') / 100000)

In [387]:
stock_df.index = stock_df.index.tz_localize(None)
df = pd.merge(cdi_df, stock_df, left_index=True, right_index=True, how='outer')
df['CDI/Month'] = df['CDI/Month'].ffill()

In [388]:
df['Return_Stock'] = (df['Close'] + df['Yield'] - df['Close'].shift(1) ) / df['Close'].loc[df.index.min()] 
df['Return_CDI'] = ((1 + df['CDI/Month']) * df['Close'].loc[df.index.min()] - df['Close'].loc[df.index.min()] )  / 365 / df['Close'].loc[df.index.min()]


In [389]:
df['Return_Stock'] = df['Return_Stock'].cumsum()
df['Return_CDI'] = df['Return_CDI'].cumsum()

In [390]:
df

Unnamed: 0,CDI/Month,Close,Volume,Dividends,Yield,Return_Stock,Return_CDI
2000-12-21,0.11938,1.688832,74224.0,0.00000,0.000000,,0.032707
2000-12-22,0.11938,1.615630,23312.0,0.00000,0.000000,-4.334469,0.065414
2000-12-25,0.11938,1.615630,0.0,0.00000,0.000000,-4.334469,0.098121
2000-12-26,0.11938,1.615159,7851.0,0.00000,0.000000,-4.362358,0.130827
2000-12-27,0.11938,1.669940,26996.0,0.00000,0.000000,-1.118632,0.163534
...,...,...,...,...,...,...,...
2023-12-26,0.09160,33.632519,23135400.0,0.00000,0.000000,1935.962382,149.634937
2023-12-27,0.09160,33.862396,18176500.0,0.00000,0.000000,1949.574009,149.660033
2023-12-28,0.09160,33.952351,12555300.0,0.00000,0.000000,1954.900435,149.685129
2024-01-02,0.09160,33.520000,18948200.0,0.01765,0.000527,1929.331067,149.710225


In [391]:
br_df = pd.read_html("https://www.dadosdemercado.com.br/bolsa/acoes").copy()
tickers = br_df[0]['Ticker']
tickers = [tickers + ".SA" for tickers in tickers]
big_data = yf.Ticker("ITUB4.SA")

stock_df = big_data.history(period='max').copy()

# Transforming data to analysis
stock_df = pd.DataFrame(stock_df[["Close", 'Volume', 'Dividends']])
stock_df['Yield'] = stock_df['Dividends'] / stock_df['Close']
stock_df = stock_df.astype('float')


# today = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
month_1 = (datetime.now() - timedelta(days=31)).strftime('%Y-%m-%d')
year_1 = (datetime.now() - timedelta(days=366)).strftime('%Y-%m-%d')
year_5 = (datetime.now() - timedelta(days=(365 * 5 + 1))).strftime('%Y-%m-%d')



init_date= stock_df.index.min()
init_date = stock_df.index.max()


all_dfs = []
# Create a df with a web data and get all values from currenty month until january 2020

for i in range(0, 100):
    url = f"https://www.vriconsulting.com.br/indices/cdi.php?pagina={i}"
    df = pd.read_html(url)[0].copy()
    all_dfs.append(df)
    if 'Jan/2000' in df['Mês/Ano'].values:
        break
cdi_df = pd.concat(all_dfs)
cdi_df.rename(columns={"Índice do mês (em %)": "CDI/Month"}, inplace=True)
cdi_df = cdi_df[["CDI/Month", "Mês/Ano"]]

# Avoid lost of data
cdi_df["CDI/Month"][0] = cdi_df["CDI/Month"][1]
cdi_df["CDI/Month"] = cdi_df["CDI/Month"].astype('float')


months = {
    'Jan': '01', 'Fev': '02', 'Mar': '03', 'Abr': '04',
    'Mai': '05', 'Jun': '06', 'Jul': '07', 'Ago': '08',
    'Set': '09', 'Out': '10', 'Nov': '11', 'Dez': '12'
}

cdi_df["Mês/Ano"] = cdi_df["Mês/Ano"].str.capitalize().str.strip()
cdi_df["Mês/Ano"] = cdi_df["Mês/Ano"].str[:3].map(
    months) + cdi_df["Mês/Ano"].str[3:]
cdi_df["Mês/Ano"] = pd.to_datetime(cdi_df['Mês/Ano'])


cdi_df = cdi_df.set_index("Mês/Ano")



cdi_df = cdi_df.resample('D').ffill()

index_list = stock_df.index.strftime('%Y-%m-%d')

cdi_df = cdi_df.asfreq('D', method='ffill', fill_value=None, normalize=True)
cdi_df = cdi_df[cdi_df.index.isin(index_list)]


cdi_df = (cdi_df.astype('float') / 100000)

stock_df.index = stock_df.index.tz_localize(None)
df = pd.merge(cdi_df, stock_df, left_index=True, right_index=True, how='outer')
df['CDI/Month'] = df['CDI/Month'].ffill()

df['Return_Stock'] = (df['Close'] + df['Yield'] - df['Close'].shift(1)
                ) / df['Close'].loc[df.index.min()]
df['Return_CDI'] = ((1 + df['CDI/Month']) ** (1/30) - 1) * \
    df['Close'].loc[df.index.min()] / df['Close'].loc[df.index.min()]

df['Return_CDI'] = df['Return_CDI'].cumsum()
df['Return_Stock'] = df['Return_Stock'].cumsum()



def transform_data(stock_df_name, per=None, start=None, end=None):
    # get all major data to the project
    big_data = yf.Ticker(stock_df_name)
    # start_date = '1990-01-01'
    # end_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')

    stock_df = big_data.history(period=per, start=start, end=end).copy()

    # Transforming the data to analysis
    stock_df = pd.DataFrame(stock_df[["Close", 'Volume', 'Dividends']])
    stock_df['Yield'] = stock_df['Dividends'] / stock_df['Close']
    stock_df = stock_df.astype('float')

    stock_df.index = stock_df.index.tz_localize(None)
    df = pd.merge(cdi_df, stock_df, left_index=True,
                  right_index=True, how='outer')
    df['CDI/Month'] = df['CDI/Month'].ffill()
    df = df.loc[stock_df.index.min():]
    df['Return_Stock'] = (df['Close'] + df['Yield'] -
                    df['Close'].shift(1)) / df['Close'].loc[df.index.min()]
    df['Return_CDI'] = ((1 + df['CDI/Month']) * df['Close'].loc[df.index.min()] - df['Close'].loc[df.index.min()] )  / 365 / df['Close'].loc[df.index.min()]

    df['Return_CDI'] = df['Return_CDI'].cumsum()
    df['Return_Stock'] = df['Return_Stock'].cumsum()
    df = df.iloc[1:]
    return df



  cdi_df["Mês/Ano"] = pd.to_datetime(cdi_df['Mês/Ano'])


In [392]:
df = transform_data("ITUB4.SA")
df

Unnamed: 0,CDI/Month,Close,Volume,Dividends,Yield,Return_Stock,Return_CDI
2000-12-22,0.11938,1.615630,23312.0,0.00000,0.000000,-0.043344,0.000654
2000-12-25,0.11938,1.615630,0.0,0.00000,0.000000,-0.043344,0.000981
2000-12-26,0.11938,1.615158,7851.0,0.00000,0.000000,-0.043624,0.001308
2000-12-27,0.11938,1.669942,26996.0,0.00000,0.000000,-0.011185,0.001635
2000-12-28,0.11938,1.747393,77425.0,0.00000,0.000000,0.034676,0.001962
...,...,...,...,...,...,...,...
2023-12-26,0.09160,33.632519,23135400.0,0.00000,0.000000,19.359632,1.496349
2023-12-27,0.09160,33.862396,18176500.0,0.00000,0.000000,19.495749,1.496600
2023-12-28,0.09160,33.952351,12555300.0,0.00000,0.000000,19.549013,1.496851
2024-01-02,0.09160,33.520000,18948200.0,0.01765,0.000527,19.293319,1.497102
