## 1. Importar Bibliotecas

In [None]:
# Importar bibliotecas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler
import yfinance as yf
from datetime import datetime, timedelta
import pickle
import warnings
warnings.filterwarnings('ignore')

# Configura√ß√µes de visualiza√ß√£o
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("muted")

print("‚úÖ Bibliotecas importadas com sucesso!")

## 2. Carregar Dados

In [None]:
# Carregar dados processados
DATA_PATH = "../data/processed/crypto_processed.csv"
MODEL_DIR = "../models/"

print("üìÇ Carregando dados...")
df = pd.read_csv(DATA_PATH, parse_dates=['Date'])

print(f"‚úÖ Carregado: {len(df):,} registros")
df.head()

## 3. Coletar Dados do √çndice do D√≥lar (DXY)

In [None]:
# Obter dados do √≠ndice do d√≥lar (DXY)
print("üíµ Coletando dados do √çndice do D√≥lar (DXY)...")

# Per√≠odo baseado nos dados de cripto
start_date = df['Date'].min()
end_date = df['Date'].max()

# Baixar dados do DXY
try:
    dxy = yf.Ticker("DX-Y.NYB")  # √çndice do D√≥lar
    df_dxy = dxy.history(start=start_date, end=end_date)
    df_dxy = df_dxy.reset_index()[['Date', 'Close']]
    df_dxy.rename(columns={'Close': 'DXY'}, inplace=True)
    print(f"‚úÖ DXY coletado: {len(df_dxy)} registros")
except:
    print("‚ö†Ô∏è  N√£o foi poss√≠vel coletar DXY, usando dados sint√©ticos")
    # Criar dados sint√©ticos para demonstra√ß√£o
    df_dxy = df[['Date']].drop_duplicates()
    df_dxy['DXY'] = np.random.uniform(95, 105, len(df_dxy))

df_dxy.head()

## 4. Preparar Dados para Regress√£o

In [None]:
# Criar datasets separados para BTC e ETH
df_btc = df[df['Symbol'] == 'BTC-USD'].copy()
df_eth = df[df['Symbol'] == 'ETH-USD'].copy()

# Merge com dados do DXY
df_btc = df_btc.merge(df_dxy, on='Date', how='left')
df_eth = df_eth.merge(df_dxy, on='Date', how='left')

# Preencher valores nulos
df_btc['DXY'].fillna(method='ffill', inplace=True)
df_eth['DXY'].fillna(method='ffill', inplace=True)

print("üìä Dados preparados:")
print(f"   Bitcoin: {len(df_btc)} registros")
print(f"   Ethereum: {len(df_eth)} registros")

df_btc.head()

## 5. An√°lise de Correla√ß√£o

In [None]:
# Matriz de correla√ß√£o para Bitcoin
print("üîç An√°lise de Correla√ß√£o - Bitcoin\n")

btc_features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA7', 'MA30', 'MA90', 'DXY']
correlation_btc = df_btc[btc_features].corr()

# Plotar heatmap
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_btc, annot=True, fmt='.2f', cmap='coolwarm', center=0,
            square=True, linewidths=1, cbar_kws={"shrink": 0.8})
plt.title('Matriz de Correla√ß√£o - Bitcoin', fontsize=16, fontweight='bold')
plt.tight_layout()
plt.savefig('../visualizations/btc_correlation_matrix.png', dpi=300, bbox_inches='tight')
plt.show()

print("\nüìä Correla√ß√£o do Pre√ßo de Fechamento com outras vari√°veis:")
print(correlation_btc['Close'].sort_values(ascending=False))

In [None]:
# An√°lise de correla√ß√£o BTC vs ETH
print("\nüîç An√°lise de Correla√ß√£o BTC vs ETH\n")

# Merge dos pre√ßos de fechamento
btc_eth_comparison = df_btc[['Date', 'Close']].merge(
    df_eth[['Date', 'Close']], 
    on='Date', 
    suffixes=('_BTC', '_ETH')
)

# Calcular correla√ß√£o
correlation_btc_eth = btc_eth_comparison['Close_BTC'].corr(btc_eth_comparison['Close_ETH'])
print(f"üìä Correla√ß√£o BTC vs ETH: {correlation_btc_eth:.4f}")

# Plotar scatter plot
plt.figure(figsize=(10, 6))
plt.scatter(btc_eth_comparison['Close_BTC'], btc_eth_comparison['Close_ETH'], 
            alpha=0.5, s=30, color='purple')
plt.xlabel('Bitcoin Price (USD)', fontsize=12)
plt.ylabel('Ethereum Price (USD)', fontsize=12)
plt.title(f'Correla√ß√£o Bitcoin vs Ethereum (r={correlation_btc_eth:.4f})', 
          fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('../visualizations/btc_eth_correlation.png', dpi=300, bbox_inches='tight')
plt.show()

## 6. Modelo de Regress√£o Linear - Bitcoin vs DXY

In [None]:
print("üîÆ Treinando Regress√£o Linear - Bitcoin vs DXY\n")

# Preparar dados
X_btc = df_btc[['DXY']].values
y_btc = df_btc['Close'].values

# Dividir em treino e teste
X_train_btc, X_test_btc, y_train_btc, y_test_btc = train_test_split(
    X_btc, y_btc, test_size=0.2, random_state=42, shuffle=False
)

print(f"üìä Dados de treino: {len(X_train_btc)}")
print(f"üìä Dados de teste: {len(X_test_btc)}")

# Treinar modelo
model_btc_dxy = LinearRegression()
model_btc_dxy.fit(X_train_btc, y_train_btc)

# Fazer previs√µes
y_pred_btc = model_btc_dxy.predict(X_test_btc)

# Avaliar modelo
r2_btc = r2_score(y_test_btc, y_pred_btc)
mse_btc = mean_squared_error(y_test_btc, y_pred_btc)
rmse_btc = np.sqrt(mse_btc)
mae_btc = mean_absolute_error(y_test_btc, y_pred_btc)

print("\nüìà M√©tricas do Modelo - Bitcoin vs DXY:")
print(f"   R¬≤ Score: {r2_btc:.4f}")
print(f"   RMSE: ${rmse_btc:,.2f}")
print(f"   MAE: ${mae_btc:,.2f}")
print(f"   Coeficiente (slope): {model_btc_dxy.coef_[0]:,.2f}")
print(f"   Intercepto: ${model_btc_dxy.intercept_:,.2f}")

# Salvar modelo
with open(f"{MODEL_DIR}regression_btc_dxy.pkl", 'wb') as f:
    pickle.dump(model_btc_dxy, f)
print("\nüíæ Modelo salvo!")

## 7. Visualizar Regress√£o - Bitcoin vs DXY

In [None]:
# Plotar resultados
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Gr√°fico 1: Scatter plot com linha de regress√£o
axes[0].scatter(X_test_btc, y_test_btc, alpha=0.5, label='Dados Reais', color='blue')
axes[0].plot(X_test_btc, y_pred_btc, color='red', linewidth=2, label='Regress√£o Linear')
axes[0].set_xlabel('√çndice do D√≥lar (DXY)', fontsize=12)
axes[0].set_ylabel('Pre√ßo Bitcoin (USD)', fontsize=12)
axes[0].set_title(f'Regress√£o Linear: BTC vs DXY (R¬≤={r2_btc:.4f})', fontsize=13, fontweight='bold')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Gr√°fico 2: Previs√£o vs Real
axes[1].scatter(y_test_btc, y_pred_btc, alpha=0.5, color='green')
axes[1].plot([y_test_btc.min(), y_test_btc.max()], 
             [y_test_btc.min(), y_test_btc.max()], 
             'r--', linewidth=2, label='Predi√ß√£o Perfeita')
axes[1].set_xlabel('Pre√ßo Real (USD)', fontsize=12)
axes[1].set_ylabel('Pre√ßo Previsto (USD)', fontsize=12)
axes[1].set_title('Predi√ß√£o vs Realidade - Bitcoin', fontsize=13, fontweight='bold')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../visualizations/btc_regression_dxy.png', dpi=300, bbox_inches='tight')
plt.show()

## 8. Regress√£o Multivariada - Bitcoin

In [None]:
print("üîÆ Treinando Regress√£o Multivariada - Bitcoin\n")

# Selecionar m√∫ltiplas features
features = ['Open', 'High', 'Low', 'Volume', 'MA7', 'MA30', 'DXY']
X_multi_btc = df_btc[features].dropna()
y_multi_btc = df_btc.loc[X_multi_btc.index, 'Close']

# Dividir dados
X_train_multi, X_test_multi, y_train_multi, y_test_multi = train_test_split(
    X_multi_btc, y_multi_btc, test_size=0.2, random_state=42, shuffle=False
)

# Normalizar features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_multi)
X_test_scaled = scaler.transform(X_test_multi)

# Treinar modelo
model_multi_btc = LinearRegression()
model_multi_btc.fit(X_train_scaled, y_train_multi)

# Previs√µes
y_pred_multi = model_multi_btc.predict(X_test_scaled)

# M√©tricas
r2_multi = r2_score(y_test_multi, y_pred_multi)
rmse_multi = np.sqrt(mean_squared_error(y_test_multi, y_pred_multi))
mae_multi = mean_absolute_error(y_test_multi, y_pred_multi)

print("üìà M√©tricas do Modelo Multivariado - Bitcoin:")
print(f"   R¬≤ Score: {r2_multi:.4f}")
print(f"   RMSE: ${rmse_multi:,.2f}")
print(f"   MAE: ${mae_multi:,.2f}")

# Import√¢ncia das features
print("\nüìä Import√¢ncia das Features (coeficientes):")
feature_importance = pd.DataFrame({
    'Feature': features,
    'Coefficient': model_multi_btc.coef_
}).sort_values('Coefficient', key=abs, ascending=False)
print(feature_importance)

# Salvar modelo e scaler
with open(f"{MODEL_DIR}regression_multi_btc.pkl", 'wb') as f:
    pickle.dump(model_multi_btc, f)
with open(f"{MODEL_DIR}scaler_btc.pkl", 'wb') as f:
    pickle.dump(scaler, f)
print("\nüíæ Modelos salvos!")

## 9. Visualizar Import√¢ncia das Features

In [None]:
# Plotar import√¢ncia das features
plt.figure(figsize=(10, 6))
plt.barh(feature_importance['Feature'], np.abs(feature_importance['Coefficient']), 
         color='steelblue')
plt.xlabel('Magnitude do Coeficiente (valor absoluto)', fontsize=12)
plt.title('Import√¢ncia das Features na Regress√£o Multivariada - Bitcoin', 
          fontsize=14, fontweight='bold')
plt.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.savefig('../visualizations/btc_feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()

## 10. Comparar Modelos

In [None]:
# Compara√ß√£o de modelos
comparison = pd.DataFrame({
    'Modelo': ['Regress√£o Simples (DXY)', 'Regress√£o Multivariada'],
    'R¬≤': [r2_btc, r2_multi],
    'RMSE': [rmse_btc, rmse_multi],
    'MAE': [mae_btc, mae_multi]
})

print("\nüìä Compara√ß√£o de Modelos - Bitcoin:\n")
print(comparison.to_string(index=False))

# Plotar compara√ß√£o
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

metrics = ['R¬≤', 'RMSE', 'MAE']
for idx, metric in enumerate(metrics):
    axes[idx].bar(comparison['Modelo'], comparison[metric], color=['skyblue', 'coral'])
    axes[idx].set_title(metric, fontsize=12, fontweight='bold')
    axes[idx].set_ylabel(metric)
    axes[idx].tick_params(axis='x', rotation=15)
    axes[idx].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.savefig('../visualizations/model_comparison.png', dpi=300, bbox_inches='tight')
plt.show()

## 11. An√°lise de Res√≠duos

In [None]:
# Calcular res√≠duos
residuals = y_test_multi - y_pred_multi

fig, axes = plt.subplots(1, 2, figsize=(15, 5))

# Histograma dos res√≠duos
axes[0].hist(residuals, bins=50, color='skyblue', edgecolor='black', alpha=0.7)
axes[0].axvline(0, color='red', linestyle='--', linewidth=2)
axes[0].set_xlabel('Res√≠duos (USD)', fontsize=12)
axes[0].set_ylabel('Frequ√™ncia', fontsize=12)
axes[0].set_title('Distribui√ß√£o dos Res√≠duos', fontsize=13, fontweight='bold')
axes[0].grid(True, alpha=0.3)

# Res√≠duos vs Previs√µes
axes[1].scatter(y_pred_multi, residuals, alpha=0.5, color='purple')
axes[1].axhline(0, color='red', linestyle='--', linewidth=2)
axes[1].set_xlabel('Valores Previstos (USD)', fontsize=12)
axes[1].set_ylabel('Res√≠duos (USD)', fontsize=12)
axes[1].set_title('Res√≠duos vs Valores Previstos', fontsize=13, fontweight='bold')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../visualizations/residuals_analysis.png', dpi=300, bbox_inches='tight')
plt.show()

print(f"\nüìä Estat√≠sticas dos Res√≠duos:")
print(f"   M√©dia: ${residuals.mean():,.2f}")
print(f"   Desvio Padr√£o: ${residuals.std():,.2f}")
print(f"   M√≠nimo: ${residuals.min():,.2f}")
print(f"   M√°ximo: ${residuals.max():,.2f}")

## 12. Resumo da An√°lise

In [None]:
print("\n" + "="*70)
print("üìà RESUMO DA AN√ÅLISE DE REGRESS√ÉO")
print("="*70)

print("\nüîç CORRELA√á√ïES:")
print(f"   üìä Bitcoin vs Ethereum: {correlation_btc_eth:.4f} (Correla√ß√£o forte)")
print(f"   üíµ Bitcoin vs DXY: {df_btc[['Close', 'DXY']].corr().iloc[0, 1]:.4f}")

print("\nüéØ MODELOS DE REGRESS√ÉO - BITCOIN:")
print(f"\n   1Ô∏è‚É£ Regress√£o Simples (Bitcoin vs DXY):")
print(f"      R¬≤ Score: {r2_btc:.4f}")
print(f"      RMSE: ${rmse_btc:,.2f}")
print(f"      MAE: ${mae_btc:,.2f}")

print(f"\n   2Ô∏è‚É£ Regress√£o Multivariada ({len(features)} features):")
print(f"      R¬≤ Score: {r2_multi:.4f}")
print(f"      RMSE: ${rmse_multi:,.2f}")
print(f"      MAE: ${mae_multi:,.2f}")

print("\nüí° CONCLUS√ïES:")
if r2_multi > r2_btc:
    improvement = ((r2_multi - r2_btc) / r2_btc) * 100
    print(f"   ‚úÖ O modelo multivariado performou {improvement:.1f}% melhor que o modelo simples")
else:
    print(f"   ‚ö†Ô∏è  O modelo simples teve melhor performance")

if correlation_btc_eth > 0.8:
    print(f"   ‚úÖ Bitcoin e Ethereum possuem correla√ß√£o muito forte ({correlation_btc_eth:.4f})")
    print(f"      Movimento de um tende a acompanhar o outro")

print("\n" + "="*70)
print("‚úÖ An√°lise de Regress√£o finalizada!")
print("üìç Pr√≥ximo passo: 05_visualization.ipynb")
print("="*70)