In [20]:
import pandas as pd 
import yfinance as yf
import numpy as np
from scipy.stats import kurtosis, skew


In [21]:
def fetch_yahoo_data(ticker: str, period: str = "1y", interval: str = "1d") -> pd.DataFrame:
    """
    Busca dados históricos de OHLCV para o ticker especificado no Yahoo Finance.

    Parâmetros:
    - ticker: código do ativo (ex: "VALE3.SA" para ações brasileiras)
    - period: período de dados (ex: "180d", "1y", "2y")
    - interval: intervalo de tempo (ex: "1d", "1wk", "1mo")

    Retorna:
    - DataFrame com colunas: Open, High, Low, Close, Adj Close e Volume
    """
    df = yf.download(ticker,
                     period=period,
                     interval=interval,
                     auto_adjust=True)  # ajusta splits/dividends
    return df


In [22]:
weights = {
    "VALE3.SA": 0.25,
    "PETR4.SA": 0.20,
    "ITUB4.SA": 0.15,
    "AAPL": 0.25,
    "MSFT": 0.15
}
tickers = weights.keys()
time = "5y"
risk_threshold = -0.02

In [23]:
def fetch_data(ticker):
    df = fetch_yahoo_data(ticker, period=time, interval="1d")
    df = df.xs(ticker, axis=1, level='Ticker')

    # PnL diário (retorno)
    df[f"pnl_{ticker}"] = df['Close'].pct_change()

    # Volume relativo
    df[f"volrel_{ticker}"] = df['Volume'] / df['Volume'].rolling(window=10).mean()

    # Amplitude média (High - Low)
    df[f"amp_{ticker}"] = (df["High"] - df["Low"]).rolling(window=10).mean()

    # Gaps > 1%
    gaps = (df["Open"].shift(-1) - df["Close"]) / df["Close"]
    df[f"gaps_{ticker}"] = gaps.rolling(window=10).apply(lambda x: (np.abs(x) > 0.01).sum(), raw=True)

    # Manter apenas colunas relevantes
    metric_cols = [col for col in df.columns if col.startswith((
        'pnl_', 
        'volrel_', 
        # 'amp_', 
        # 'gaps_'
        ))]
    df = df.dropna(subset=metric_cols)

    return df[metric_cols]


### Métricas

In [24]:
# pnl acumulado em janela de 5 dias
def calculate_metrics(df):
    # label de classificacao de risco
    df["pnl_5d"] = (
        (1 + df["PnL"])[::-1]
        .rolling(window=5)
        .apply(lambda x: np.prod(x) - 1, raw=True)
    )[::-1]
    df['risk'] = df.apply(lambda x: True if x['pnl_5d'] < risk_threshold else False, axis=1)
    df = df.drop(columns=["pnl_5d"]) 

    # pnl acumulado dos últimos 5 dias
    df["pnl_cum"] = df["PnL"].rolling(window=5).apply(np.prod)

    # volatility
    df['volatility'] = df['PnL'].rolling(window=20).std()

    # skewness
    df['skewness'] = df['PnL'].rolling(window=20).apply(lambda x: skew(x), raw=False)

    # kurtosis
    df["kurtosis"] = df["PnL"].rolling(window=20).apply(lambda x: kurtosis(x), raw=False)

    # sharpe ratio
    df["ret_medio"] = df["PnL"].rolling(window=20).mean()
    df['sharpe'] = df["ret_medio"] / df["volatility"]
    df = df.drop(columns=["ret_medio"])
    return df

In [None]:
portfolio = pd.DataFrame()
for ticker in tickers:
    data = fetch_data(ticker)
    portfolio = pd.concat([portfolio, data], axis=1)
    portfolio = portfolio.dropna()
portfolio
pnl_cols = [col for col in portfolio.columns if col.startswith('pnl_')]
portfolio['PnL'] = portfolio[pnl_cols].apply(lambda x: x.mul(weights[x.name.split('_')[1]]), axis=0).sum(axis=1)
portfolio = portfolio.drop(columns=pnl_cols)
portfolio = calculate_metrics(portfolio) 
portfolio = portfolio.dropna()


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Price,volrel_VALE3.SA,volrel_PETR4.SA,volrel_ITUB4.SA,volrel_AAPL,volrel_MSFT,PnL,risk,pnl_cum,volatility,skewness,kurtosis,sharpe
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-06-26,0.908090,0.795075,0.863051,1.234298,1.555614,-0.020773,False,3.635118e-11,0.012983,-0.498450,-0.195786,0.442582
2020-06-29,0.682267,0.909075,0.679300,0.789506,0.773051,0.019781,False,1.608760e-09,0.012979,-0.498997,-0.194635,0.442470
2020-06-30,1.222560,1.034514,0.992723,0.860489,1.017630,0.000372,False,4.365276e-11,0.013014,-0.401529,-0.281472,0.409498
2020-07-01,1.284332,1.226206,0.843458,0.681080,0.933280,-0.000449,False,9.792241e-13,0.012341,-0.348704,-0.092081,0.334923
2020-07-02,1.077230,0.875501,0.921102,0.694052,0.838105,0.011308,False,7.774527e-13,0.012424,-0.379561,-0.141483,0.347995
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-09,1.021116,0.629276,1.997185,0.648717,0.600805,0.011918,False,1.197722e-12,0.021015,1.897643,5.162293,0.270226
2025-05-12,1.734381,1.284682,0.941819,1.086524,0.873386,0.027433,False,-1.940768e-12,0.013627,-0.266832,-0.487776,0.231735
2025-05-13,1.308853,1.261578,0.835118,0.862206,0.874938,0.011481,False,-1.772955e-11,0.012294,-0.276255,-0.241015,0.395740
2025-05-14,0.979684,0.637414,0.619654,0.822228,0.784272,-0.001013,False,-2.457247e-11,0.011681,-0.192614,0.005028,0.318722


In [26]:
portfolio.to_parquet('portfolio.parquet')

In [30]:
portfolio = pd.read_parquet('portfolio.parquet')
portfolio

Price,volrel_VALE3.SA,volrel_PETR4.SA,volrel_ITUB4.SA,volrel_AAPL,volrel_MSFT,PnL,risk,pnl_cum,volatility,skewness,kurtosis,sharpe
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-06-26,0.908090,0.795075,0.863051,1.234298,1.555614,-0.020773,False,3.635118e-11,0.012983,-0.498450,-0.195786,0.442582
2020-06-29,0.682267,0.909075,0.679300,0.789506,0.773051,0.019781,False,1.608760e-09,0.012979,-0.498997,-0.194635,0.442470
2020-06-30,1.222560,1.034514,0.992723,0.860489,1.017630,0.000372,False,4.365276e-11,0.013014,-0.401529,-0.281472,0.409498
2020-07-01,1.284332,1.226206,0.843458,0.681080,0.933280,-0.000449,False,9.792241e-13,0.012341,-0.348704,-0.092081,0.334923
2020-07-02,1.077230,0.875501,0.921102,0.694052,0.838105,0.011308,False,7.774527e-13,0.012424,-0.379561,-0.141483,0.347995
...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-09,1.021116,0.629276,1.997185,0.648717,0.600805,0.011918,False,1.197722e-12,0.021015,1.897643,5.162293,0.270226
2025-05-12,1.734381,1.284682,0.941819,1.086524,0.873386,0.027433,False,-1.940768e-12,0.013627,-0.266832,-0.487776,0.231735
2025-05-13,1.308853,1.261578,0.835118,0.862206,0.874938,0.011481,False,-1.772955e-11,0.012294,-0.276255,-0.241015,0.395740
2025-05-14,0.979684,0.637414,0.619654,0.822228,0.784272,-0.001013,False,-2.457247e-11,0.011681,-0.192614,0.005028,0.318722
