In [4]:
import pandas as pd 
import yfinance as yf
import numpy as np
from scipy.stats import kurtosis, skew


In [5]:
def fetch_yahoo_data(ticker: str, period: str = "1y", interval: str = "1d") -> pd.DataFrame:
    """
    Busca dados históricos de OHLCV para o ticker especificado no Yahoo Finance.

    Parâmetros:
    - ticker: código do ativo (ex: "VALE3.SA" para ações brasileiras)
    - period: período de dados (ex: "180d", "1y", "2y")
    - interval: intervalo de tempo (ex: "1d", "1wk", "1mo")

    Retorna:
    - DataFrame com colunas: Open, High, Low, Close, Adj Close e Volume
    """
    df = yf.download(ticker,
                     period=period,
                     interval=interval,
                     auto_adjust=True)  # ajusta splits/dividends
    return df


In [21]:
weights = {
    "ABEV3.SA": 0.25,
    "B3SA3.SA": 0.25,
    "JNJ": 0.20,
    "V": 0.15,
    "EGIE3.SA": 0.15
}
tickers = weights.keys()
time = "10y"
risk_threshold = -0.05

In [7]:
def fetch_data(ticker):
    df = fetch_yahoo_data(ticker, period=time, interval="1d")
    df = df.xs(ticker, axis=1, level='Ticker')

    # PnL diário (retorno)
    df[f"pnl_{ticker}"] = df['Close'].pct_change()

    # Volume relativo
    df[f"volrel_{ticker}"] = df['Volume'] / df['Volume'].rolling(window=10).mean()

    # Amplitude média (High - Low)
    df[f"amp_{ticker}"] = (df["High"] - df["Low"]).rolling(window=10).mean()

    # Gaps > 1%
    gaps = (df["Open"].shift(-1) - df["Close"]) / df["Close"]
    df[f"gaps_{ticker}"] = gaps.rolling(window=10).apply(lambda x: (np.abs(x) > 0.01).sum(), raw=True)

    # Manter apenas colunas relevantes
    metric_cols = [col for col in df.columns if col.startswith((
        'pnl_', 
        'volrel_', 
        # 'amp_', 
        # 'gaps_'
        ))]
    df = df.dropna(subset=metric_cols)

    return df[metric_cols]


### Métricas

In [8]:
# pnl acumulado em janela de 5 dias
def calculate_metrics(df):
    # label de classificacao de risco
    df["pnl_5d"] = (
        (1 + df["PnL"])[::-1]
        .rolling(window=5)
        .apply(lambda x: np.prod(x) - 1, raw=True)
    )[::-1]
    df['risk'] = df.apply(lambda x: True if x['pnl_5d'] < risk_threshold else False, axis=1)
    df = df.drop(columns=["pnl_5d"]) 

    # pnl acumulado dos últimos 5 dias
    df["pnl_cum"] = df["PnL"].rolling(window=5).apply(np.prod)

    # volatility
    df['volatility'] = df['PnL'].rolling(window=20).std()

    # skewness
    df['skewness'] = df['PnL'].rolling(window=20).apply(lambda x: skew(x), raw=False)

    # kurtosis
    df["kurtosis"] = df["PnL"].rolling(window=20).apply(lambda x: kurtosis(x), raw=False)

    # sharpe ratio
    df["ret_medio"] = df["PnL"].rolling(window=20).mean()
    df['sharpe'] = df["ret_medio"] / df["volatility"]
    df = df.drop(columns=["ret_medio"])
    return df

In [22]:
portfolio = pd.DataFrame()
for ticker in tickers:
    data = fetch_data(ticker)
    portfolio = pd.concat([portfolio, data], axis=1)
    portfolio = portfolio.dropna()
portfolio
pnl_cols = [col for col in portfolio.columns if col.startswith('pnl_')]
portfolio['PnL'] = portfolio[pnl_cols].apply(lambda x: x.mul(weights[x.name.split('_')[1]]), axis=0).sum(axis=1)
portfolio = portfolio.drop(columns=pnl_cols)
portfolio = calculate_metrics(portfolio) 
portfolio = portfolio.dropna()


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [23]:
portfolio.to_parquet('portfolio.parquet')

In [24]:
portfolio = pd.read_parquet('portfolio.parquet')
portfolio

Price,volrel_ABEV3.SA,volrel_B3SA3.SA,volrel_JNJ,volrel_V,volrel_EGIE3.SA,PnL,risk,pnl_cum,volatility,skewness,kurtosis,sharpe
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2015-07-06,0.783367,0.769414,0.924641,0.894781,1.033602,0.003258,False,4.271942e-13,0.007757,0.020573,-0.319813,0.025208
2015-07-07,1.054004,1.289009,0.939641,1.085389,1.199323,0.007449,False,-2.004355e-13,0.007573,-0.164630,-0.149141,0.136992
2015-07-08,0.923013,0.862644,0.893666,0.963816,1.502258,-0.008829,False,3.123093e-13,0.007885,-0.047346,-0.478693,0.065288
2015-07-10,1.610972,0.787336,0.904515,0.816980,1.230612,0.016583,False,1.285491e-12,0.008022,0.029316,-0.384527,0.072554
2015-07-13,1.026815,0.787240,0.931565,1.095682,1.048735,0.008405,False,-2.986721e-11,0.008203,-0.094315,-0.559886,0.125715
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-19,0.566162,0.332595,1.419414,0.968266,1.017943,0.004219,False,-1.699629e-11,0.007827,0.084810,-0.628933,0.621318
2025-05-20,0.711779,0.389442,0.837707,0.972216,0.542036,0.002881,False,-8.433870e-12,0.007805,0.180403,-0.585884,0.590938
2025-05-21,0.687458,0.639689,0.808889,1.113833,0.931687,-0.014933,False,-1.510900e-11,0.008731,-0.030844,-0.266009,0.373937
2025-05-22,0.850642,0.930971,0.866730,0.884258,0.725002,-0.007917,False,7.854662e-12,0.009066,0.090881,-0.526573,0.288704
