In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.model_selection import GridSearchCV, TimeSeriesSplit
from xgboost import XGBRegressor

In [None]:
# === Carregar e preparar dados ===
arquivo = '/content/NVDA Dados Históricos.csv'
df = pd.read_csv(arquivo)

df.columns = [col.strip() for col in df.columns]
df['Data'] = pd.to_datetime(df['Data'], dayfirst=True)
df = df.sort_values('Data')

df['Último'] = df['Último'].astype(str).str.replace('.', '', regex=False).str.replace(',', '.', regex=False)
df['Último'] = pd.to_numeric(df['Último'], errors='coerce')

# lags e médias móveis
for lag in [1, 2, 3, 5, 10, 20]:
    df[f'lag_{lag}'] = df['Último'].shift(lag)
df['ma_5'] = df['Último'].rolling(5).mean()
df['ma_10'] = df['Último'].rolling(10).mean()
df['ma_20'] = df['Último'].rolling(20).mean()
df['month'] = df['Data'].dt.month
df['dayofweek'] = df['Data'].dt.dayofweek
df = df.dropna().reset_index(drop=True)

train = df[(df['Data'] >= '2019-01-08') & (df['Data'] <= '2024-12-31')]
test  = df[(df['Data'] >= '2025-01-08') & (df['Data'] <= '2025-10-08')]

feature_cols = [c for c in df.columns if c.startswith('lag_') or c.startswith('ma_') or c in ['month', 'dayofweek']]
X_train = train[feature_cols].values
y_train = train['Último'].values
X_test = test[feature_cols].values
y_test_real = test['Último'].values
dates_test = test['Data'].values

In [None]:
# === Otimização de parâmetros ===
param_grid = {
    'n_estimators': [300, 500, 800],
    'learning_rate': [0.01, 0.03, 0.05],
    'max_depth': [6, 8, 10],
    'subsample': [0.8, 0.9, 1.0],
    'colsample_bytree': [0.8, 0.9, 1.0]
}

# Usar validação temporal (TimeSeriesSplit)
tscv = TimeSeriesSplit(n_splits=4)
grid_search = GridSearchCV(
    estimator=XGBRegressor(random_state=42, objective='reg:squarederror'),
    param_grid=param_grid,
    scoring='neg_root_mean_squared_error',
    cv=tscv,
    verbose=1,
    n_jobs=-1
)
grid_search.fit(X_train, y_train)

best_params = grid_search.best_params_
print("\nMelhores hiperparâmetros encontrados:")
for k, v in best_params.items():
    print(f"{k}: {v}")

In [None]:
# === Treinar modelo otimizado ===
model = XGBRegressor(
    **best_params,
    random_state=42,
    objective='reg:squarederror'
)
model.fit(X_train, y_train)

# === Previsões ===
y_pred_base = model.predict(X_test)

# === Ajuste adaptativo de drift ===
residuals = y_test_real - y_pred_base
drift_est = pd.Series(residuals).rolling(15, min_periods=1).mean()
y_pred_adjusted = y_pred_base + drift_est.values
y_pred_final = pd.Series(y_pred_adjusted).rolling(5, min_periods=1).mean().values

In [None]:
# === Avaliação ===
rmse_base = np.sqrt(mean_squared_error(y_test_real, y_pred_base))
rmse_final = np.sqrt(mean_squared_error(y_test_real, y_pred_final))
mape_final = mean_absolute_percentage_error(y_test_real, y_pred_final) * 100
r2_final = r2_score(y_test_real, y_pred_final)
abs_error = np.abs(y_test_real - y_pred_final)

diff_percent = ((y_pred_final - y_test_real) / y_test_real) * 100

# === Tabela de resultados ===
tabela = pd.DataFrame({
    'Data': dates_test,
    'Valor Real': y_test_real,
    'Previsto (XGBoost Base)': y_pred_base,
    'Previsto (XGBoost Ajustado)': y_pred_final,
    'Diferença (%)': diff_percent,
    'Erro Absoluto': abs_error
})
tabela.to_csv('Previsoes_NVDA_XGBoost.csv', index=False)

print(f"\nRMSE Base: {rmse_base:.4f}")
print(f"RMSE Ajustado: {rmse_final:.4f}")
print(f"MAPE Ajustado: {mape_final:.2f}%")
print(f"R² Ajustado: {r2_final:.4f}")

In [None]:
# === Gráficos ===
train_end = pd.Timestamp('2024-12-31')

# Gráfico Real x Previsto
plt.figure(figsize=(14,6))
plt.plot(df[df['Data'] <= train_end]['Data'], df[df['Data'] <= train_end]['Último'], label='Histórico', alpha=0.5)
plt.plot(dates_test, y_test_real, label='Real (2025)', linewidth=2)
plt.plot(dates_test, y_pred_final, label='Previsto (2025, XGBoost)', linewidth=2, linestyle='--')
plt.axvline(train_end, color='gray', linestyle='--', label='Fim do treino (2024-12-31)')

plt.title('NVDA — Treino até 2024 | Teste em 2025 (XGBoost)', fontsize=28, weight='bold')
plt.xlabel('Data', fontsize=22)
plt.ylabel('Preço de Fechamento (USD)', fontsize=22)

plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

plt.legend(fontsize=18)
plt.grid(True)
plt.tight_layout()
plt.show()


# Gráfico comparativo Base vs Ajustado
plt.figure(figsize=(14,6))
plt.plot(dates_test, y_test_real, label='Real 2025', linewidth=2)
#plt.plot(dates_test, y_pred_base, label='Previsto Base', linewidth=2, linestyle='--', alpha=0.5)
plt.plot(dates_test, y_pred_final, label='Previsto 2025 (XGBoost)', linewidth=2, linestyle='--', alpha=0.9)

plt.title('NVDA — Real vs Previsto (2025, XGBoost)', fontsize=28, weight='bold')
plt.xlabel('Data', fontsize=22)
plt.ylabel('Preço de Fechamento (USD)', fontsize=22)

plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

plt.legend(fontsize=18)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()


# === Gráfico de Erro Absoluto ===
plt.figure(figsize=(14,6))
plt.plot(dates_test, abs_error, linewidth=1.5)

plt.title('NVDA — Erro Absoluto das Previsões (|Real - Previsto|)', fontsize=28, weight='bold')
plt.xlabel('Data', fontsize=22)
plt.ylabel('Erro Absoluto', fontsize=22)

plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

print("\nArquivo 'Previsoes_NVDA_XGBoost.csv' salvo com sucesso.")
