# Análise por Eventos e Categorias

Este notebook calcula preços médios e variações antes, durante e depois de grandes eventos, separados por categorias: Tecnologia, Agronegócio, Fundos Imobiliários, Dólar, Petróleo e Gás.

Parâmetros padrão: 90 dias antes, 14 dias em torno do evento (durante), 90 dias depois.

In [None]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta

pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 160)
print('✅ Imports prontos')

In [None]:
# Definição de categorias e símbolos
categories = {
    'Tecnologia': ['AAPL', 'MSFT', 'GOOGL', 'NVDA', 'TSLA'],
    'Agronegocio': ['SLCE3.SA', 'ADM', 'CTVA'],
    'Fundos Imobiliarios': ['HGLG11.SA', 'MXRF11.SA', 'KNRI11.SA'],
    'Dolar': ['DX=F', 'USDBRL=X'],
    'Petroleo e Gas': ['CL=F', 'XOM', 'CVX', 'PETR4.SA']
}

print('📋 Categorias definidas:', list(categories.keys()))

In [None]:
# Eventos e parâmetros
events = {
    'COVID-19 (WHO Pandemic)': '2020-03-11',
    'Ukraine Invasion': '2022-02-24',
    'Brexit Referendum': '2016-06-23',
    'COVID Oil Crash': '2020-04-20'
}

PRE_DAYS = 90
DURING_DAYS = 14  # janela simétrica: 7 dias antes e 7 depois
POST_DAYS = 90

print('🗓️ Eventos:', events)

In [None]:
# Funções auxiliares
def fetch_close_series(symbol: str, start: pd.Timestamp, end: pd.Timestamp) -> pd.Series:
    try:
        df = yf.download(symbol, start=start.strftime('%Y-%m-%d'), end=end.strftime('%Y-%m-%d'), interval='1d', progress=False)
        if isinstance(df, pd.DataFrame) and 'Close' in df.columns:
            return df['Close'].dropna()
        return pd.Series(dtype=float)
    except Exception as e:
        print(f'Erro ao baixar {symbol}:', e)
        return pd.Series(dtype=float)

def compute_windows(series: pd.Series, event_date: pd.Timestamp, pre_days=90, during_days=14, post_days=90):
    # define janelas
    pre_start = event_date - pd.Timedelta(days=pre_days)
    pre_end = event_date - pd.Timedelta(days=1)
    during_start = event_date - pd.Timedelta(days=during_days // 2)
    during_end = event_date + pd.Timedelta(days=during_days // 2)
    post_start = event_date + pd.Timedelta(days=1)
    post_end = event_date + pd.Timedelta(days=post_days)

    pre = series.loc[(series.index >= pre_start) & (series.index <= pre_end)]
    during = series.loc[(series.index >= during_start) & (series.index <= during_end)]
    post = series.loc[(series.index >= post_start) & (series.index <= post_end)]

    return {
        'pre_mean': float(pre.mean()) if not pre.empty else np.nan,
        'during_mean': float(during.mean()) if not during.empty else np.nan,
        'post_mean': float(post.mean()) if not post.empty else np.nan
    }

def analyze_category_event(category: str, symbols: list, event_name: str, event_date_str: str, pre_days=90, during_days=14, post_days=90) -> dict:
    event_date = pd.to_datetime(event_date_str)
    start = event_date - pd.Timedelta(days=pre_days)
    end = event_date + pd.Timedelta(days=post_days)

    metrics = []
    for sym in symbols:
        series = fetch_close_series(sym, start, end)
        if series.empty:
            continue
        w = compute_windows(series, event_date, pre_days, during_days, post_days)
        metrics.append(w)

    if not metrics:
        return {
            'Categoria': category, 'Evento': event_name, 'DataEvento': event_date_str,
            'PreMean': np.nan, 'DuringMean': np.nan, 'PostMean': np.nan,
            'PctPreToDuring': np.nan, 'PctDuringToPost': np.nan
        }

    pre_mean = float(np.nanmean([m['pre_mean'] for m in metrics]))
    during_mean = float(np.nanmean([m['during_mean'] for m in metrics]))
    post_mean = float(np.nanmean([m['post_mean'] for m in metrics]))

    pct_pre_during = (during_mean / pre_mean - 1) * 100 if pre_mean and not np.isnan(pre_mean) else np.nan
    pct_during_post = (post_mean / during_mean - 1) * 100 if during_mean and not np.isnan(during_mean) else np.nan

    return {
        'Categoria': category, 'Evento': event_name, 'DataEvento': event_date_str,
        'PreMean': round(pre_mean, 4), 'DuringMean': round(during_mean, 4), 'PostMean': round(post_mean, 4),
        'PctPreToDuring': round(pct_pre_during, 3) if not np.isnan(pct_pre_during) else np.nan,
        'PctDuringToPost': round(pct_during_post, 3) if not np.isnan(pct_during_post) else np.nan
    }


In [None]:
# Executar análise para todas as categorias e eventos
results = []
for cat_name, symbols in categories.items():
    for evt_name, evt_date in events.items():
        r = analyze_category_event(cat_name, symbols, evt_name, evt_date, PRE_DAYS, DURING_DAYS, POST_DAYS)
        results.append(r)

df_results = pd.DataFrame(results)
df_results


In [None]:
# Salvar resultados em CSV (na pasta notebooks/data/raw)
out_path = 'notebooks/data/raw/event_category_analysis.csv'
import os
os.makedirs('notebooks/data/raw', exist_ok=True)
df_results.to_csv(out_path, index=False)
print('💾 Resultados salvos em:', out_path)

In [None]:
# Visualização rápida para a categoria Dólar em COVID-19
evt = 'COVID-19 (WHO Pandemic)'
evt_date = pd.to_datetime(events[evt])
symbols = categories['Dolar']
start = evt_date - pd.Timedelta(days=PRE_DAYS)
end = evt_date + pd.Timedelta(days=POST_DAYS)

data = {}
for s in symbols:
    ser = fetch_close_series(s, start, end)
    if not ser.empty:
        data[s] = ser

if data:
    df_plot = pd.DataFrame(data)
    ax = df_plot.plot(figsize=(12,5), title=f'Dólar - {evt}')
    ax.axvline(evt_date, color='red', linestyle='--', label='Evento')
    ax.legend()
else:
    print('Sem dados para visualização')
