In [2]:
!pip install yfinance pandas numpy --quiet


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [47]:
import yfinance as yf
import pandas as pd

from typing import Dict
from pathlib import Path
import pickle

In [41]:
# Lista de ativos
assets = {
    'VAMO3': 'VAMO3.SA',
    'VBBR3': 'VBBR3.SA',
    'VIVA3': 'VIVA3.SA',
    'VIVT3': 'VIVT3.SA',
    'VLID3': 'VLID3.SA',
    'VULC3': 'VULC3.SA',
    'WEGE3': 'WEGE3.SA',
    'WIZC3': 'WIZC3.SA',
    'YDUQ3': 'YDUQ3.SA',
    'ZAMP3': 'ZAMP3.SA'
}

In [42]:
def get_correlations(ticker: str) -> pd.DataFrame:
    stock = yf.Ticker(ticker)

    # Coletando dados financeiros e históricos de dividendos
    dividends = stock.dividends
    financials = stock.financials.transpose()
    balance_sheet = stock.balance_sheet.transpose()
    cash_flow = stock.cashflow.transpose()

    # Agrupar dividendos por ano
    dividends = dividends.to_frame().reset_index()
    grouped_dividends = dividends.groupby(pd.Grouper(key='Date', freq='YE')).sum()
    grouped_dividends.index = grouped_dividends.index.tz_localize(None)

    # Consolidando todos os dados em um único DataFrame
    data_frames = [financials, balance_sheet, cash_flow]
    df = pd.concat(data_frames, axis=1)

    # Converter colunas para tipos numéricos, ignorando erros
    df = df.apply(pd.to_numeric, errors='coerce')

    # Tratar dados ausentes com interpolação
    df.interpolate(method='linear', limit_direction='both', inplace=True)
    df = df.infer_objects(copy=False)

    # Juntar "grouped_dividends" com "df" apenas onde "df" possui a data (index)
    df = df.merge(grouped_dividends, left_index=True, right_index=True)

    # Calculando a correlação
    correlation_matrix = df.corr()
    
    if 'Dividends' not in correlation_matrix:
        return None
    
    # Retornar apenas a coluna 'Dividends' da correlação
    else:
        dividend_corr = correlation_matrix['Dividends'].dropna()
        return dividend_corr

In [43]:
def get_strong_features_for_dividends(tickers: Dict[str, str]) -> pd.Series:
    correlations = []
    
    # Coletar correlações para todos os ativos
    for code, ticker in tickers.items():
        corr = get_correlations(ticker)

        if corr is None:
            continue
            
        correlations.append(corr)

    # Convertendo a lista de correlações em um DataFrame
    correlation_df = pd.DataFrame(correlations)
    
    # Calculando a média das correlações
    mean_correlation = correlation_df.mean().dropna()
    
    # Filtrando os componentes financeiros com forte correlação
    strong_correlations = mean_correlation[(abs(mean_correlation) >= 0.7) & (abs(mean_correlation) < 1.0)]
    
    return strong_correlations

# Testing

In [51]:
features = get_strong_features_for_dividends(assets)
features

Gains Losses Not Affecting Retained Earnings      0.780313
Other Equity Adjustments                          0.780313
Non Current Deferred Assets                      -0.780946
Restricted Cash                                  -0.738575
Cash Flow From Continuing Investing Activities    0.729320
Operating Gains Losses                           -0.709049
Amortization                                     -0.881914
Investment Properties                            -0.877511
Sale Of Intangibles                               0.717746
dtype: float64

In [52]:
path = Path("data/dividend_features.pkl")
path.parent.mkdir(exist_ok=True)

with open(path, mode="wb") as file:
    pickle.dump(features.to_dict(), file)

In [53]:
with open(path, mode="rb") as file:
    loaded_features = pickle.load(file)
    print(loaded_features)

{'Gains Losses Not Affecting Retained Earnings': 0.7803131830314887, 'Other Equity Adjustments': 0.7803131830314887, 'Non Current Deferred Assets': -0.78094572622172, 'Restricted Cash': -0.7385747996935287, 'Cash Flow From Continuing Investing Activities': 0.7293202641395593, 'Operating Gains Losses': -0.7090485662485018, 'Amortization': -0.8819141198750613, 'Investment Properties': -0.8775108555742024, 'Sale Of Intangibles': 0.7177459085570809}
