## 1. Importa√ß√£o de Bibliotecas

In [None]:
# Bibliotecas b√°sicas
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Configurar estilo dos gr√°ficos
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("‚úÖ Bibliotecas importadas com sucesso!")

## 2. Ingest√£o de Dados

Vamos carregar dados hist√≥ricos do Bitcoin usando a API do Yahoo Finance.

In [None]:
from src.data_ingestion import CryptoDataIngestion

# Criar inst√¢ncia do ingestor
ingestion = CryptoDataIngestion()

# Buscar dados do Bitcoin (5 anos)
df_btc = ingestion.fetch_from_yahoo("BTC-USD", period="5y")

# Visualizar informa√ß√µes
info = ingestion.get_data_info(df_btc)
print(f"\nüìä Informa√ß√µes do Dataset:")
print(f"Registros: {info['rows']}")
print(f"Per√≠odo: {info['date_range']['start']} a {info['date_range']['end']}")

# Preview dos dados
df_btc.head()

## 3. An√°lise Explorat√≥ria de Dados (EDA)

In [None]:
# Estat√≠sticas descritivas
print("üìä Estat√≠sticas Descritivas:")
df_btc[['Close', 'Volume']].describe()

In [None]:
# Gr√°fico de pre√ßos hist√≥ricos
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(df_btc['Date'], df_btc['Close'], linewidth=2, color='#2196F3')
ax.set_title('Hist√≥rico de Pre√ßos - Bitcoin (BTC-USD)', fontsize=16, fontweight='bold')
ax.set_xlabel('Data', fontsize=12)
ax.set_ylabel('Pre√ßo (USD)', fontsize=12)
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nüí∞ Pre√ßo Atual: ${df_btc['Close'].iloc[-1]:,.2f}")
print(f"üìà M√°ximo: ${df_btc['Close'].max():,.2f}")
print(f"üìâ M√≠nimo: ${df_btc['Close'].min():,.2f}")

In [None]:
# Distribui√ß√£o de retornos
df_btc['Daily_Return'] = df_btc['Close'].pct_change() * 100

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Histograma
axes[0].hist(df_btc['Daily_Return'].dropna(), bins=50, color='skyblue', edgecolor='black')
axes[0].set_title('Distribui√ß√£o de Retornos Di√°rios', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Retorno (%)', fontsize=12)
axes[0].set_ylabel('Frequ√™ncia', fontsize=12)
axes[0].grid(True, alpha=0.3)

# Boxplot
axes[1].boxplot(df_btc['Daily_Return'].dropna())
axes[1].set_title('Boxplot de Retornos Di√°rios', fontsize=14, fontweight='bold')
axes[1].set_ylabel('Retorno (%)', fontsize=12)
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nüìä Volatilidade (desvio padr√£o): {df_btc['Daily_Return'].std():.2f}%")

## 4. Processamento com Apache Spark

Utilizando PySpark para calcular m√©dias m√≥veis e indicadores t√©cnicos.

In [None]:
from src.spark_processor import SparkDataProcessor

# Inicializar Spark
processor = SparkDataProcessor()

# Calcular m√©dias m√≥veis
print("‚ö° Calculando m√©dias m√≥veis com Spark...")
df_with_ma = processor.calculate_moving_averages(
    df_btc,
    windows=[7, 30, 90, 200]
)

# Calcular indicadores t√©cnicos
print("‚ö° Calculando indicadores t√©cnicos...")
df_processed = processor.calculate_technical_indicators(df_with_ma)

print("\n‚úÖ Processamento conclu√≠do!")
print(f"Colunas adicionadas: {[col for col in df_processed.columns if col not in df_btc.columns]}")

df_processed.tail()

In [None]:
# Visualizar pre√ßos com m√©dias m√≥veis
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(df_processed['Date'], df_processed['Close'], label='Pre√ßo', linewidth=2, color='#2196F3')
ax.plot(df_processed['Date'], df_processed['MA_7'], label='MA 7d', linewidth=1.5, linestyle='--', color='#FFC107')
ax.plot(df_processed['Date'], df_processed['MA_30'], label='MA 30d', linewidth=1.5, linestyle='--', color='#4CAF50')
ax.plot(df_processed['Date'], df_processed['MA_90'], label='MA 90d', linewidth=1.5, linestyle='--', color='#9C27B0')

ax.set_title('Pre√ßo com M√©dias M√≥veis', fontsize=16, fontweight='bold')
ax.set_xlabel('Data', fontsize=12)
ax.set_ylabel('Pre√ßo (USD)', fontsize=12)
ax.legend(loc='best')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 5. Modelagem com Prophet (S√©ries Temporais)

Usando Prophet do Facebook/Meta para prever tend√™ncias futuras.

In [None]:
from models.prophet_model import CryptoProphetModel, PROPHET_AVAILABLE

if PROPHET_AVAILABLE:
    # Criar modelo
    prophet_model = CryptoProphetModel()
    
    # Preparar dados
    df_prophet = prophet_model.prepare_data(df_btc)
    
    # Dividir em treino e teste (80/20)
    split_idx = int(len(df_prophet) * 0.8)
    df_train = df_prophet[:split_idx]
    df_test = df_prophet[split_idx:]
    
    print(f"üìä Treino: {len(df_train)} registros")
    print(f"üìä Teste: {len(df_test)} registros")
    
    # Treinar modelo
    print("\nüîÆ Treinando Prophet...")
    prophet_model.train(df_train, changepoint_prior_scale=0.05, seasonality_mode='multiplicative')
    
    # Fazer previs√µes
    print("üîÆ Fazendo previs√µes...")
    forecast = prophet_model.predict(periods=30)
    
    # Avaliar
    metrics = prophet_model.evaluate(df_test)
    print(f"\nüìä M√©tricas de Avalia√ß√£o:")
    for metric, value in metrics.items():
        print(f"{metric}: {value}")
    
    # An√°lise de tend√™ncia
    trend_analysis = prophet_model.get_trend_analysis()
    print(f"\nüìà An√°lise de Tend√™ncia:")
    print(f"Dire√ß√£o: {trend_analysis['direction']}")
    print(f"Varia√ß√£o: {trend_analysis['trend_change_pct']:.2f}%")
    
    # Salvar modelo
    prophet_model.save_model("models/prophet_btc_model.pkl")
    print("\n‚úÖ Modelo Prophet salvo!")
else:
    print("‚ö†Ô∏è Prophet n√£o est√° dispon√≠vel. Instale com: pip install prophet")

In [None]:
# Visualizar previs√µes Prophet
if PROPHET_AVAILABLE and 'forecast' in locals():
    forecast_summary = prophet_model.get_forecast_summary(30)
    
    fig, ax = plt.subplots(figsize=(14, 6))
    
    # Dados hist√≥ricos
    ax.plot(df_btc['Date'], df_btc['Close'], label='Hist√≥rico', linewidth=2, color='#2196F3')
    
    # Previs√µes
    ax.plot(forecast_summary['Date'], forecast_summary['Predicted'], 
            label='Previs√£o', linewidth=2, linestyle='--', color='#FF5722', marker='o')
    
    # Intervalos de confian√ßa
    ax.fill_between(forecast_summary['Date'], 
                     forecast_summary['Lower_Bound'], 
                     forecast_summary['Upper_Bound'], 
                     alpha=0.3, color='#FF5722')
    
    ax.set_title('Previs√£o Prophet - Pr√≥ximos 30 Dias', fontsize=16, fontweight='bold')
    ax.set_xlabel('Data', fontsize=12)
    ax.set_ylabel('Pre√ßo (USD)', fontsize=12)
    ax.legend(loc='best')
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    print("\nüìä Previs√µes para os pr√≥ximos 7 dias:")
    print(forecast_summary.head(7))

## 6. Modelagem com scikit-learn

Usando Regress√£o Linear para previs√£o de pre√ßos.

In [None]:
from models.sklearn_model import CryptoMLModel, SKLEARN_AVAILABLE
from sklearn.model_selection import train_test_split

if SKLEARN_AVAILABLE:
    # Criar modelo
    ml_model = CryptoMLModel(model_type='linear_regression')
    
    # Preparar features
    print("ü§ñ Preparando features...")
    X, y = ml_model.prepare_features(df_btc)
    
    print(f"Features: {len(ml_model.feature_names)}")
    print(f"Amostras: {len(X)}")
    
    # Dividir dados
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, shuffle=False
    )
    
    # Treinar
    print("\nü§ñ Treinando modelo...")
    ml_model.train(X_train, y_train)
    
    # Avaliar
    metrics = ml_model.evaluate(X_test, y_test)
    print(f"\nüìä M√©tricas de Avalia√ß√£o:")
    for metric, value in metrics.items():
        print(f"{metric}: {value}")
    
    # Prever pr√≥ximos dias
    forecast_ml = ml_model.predict_next_days(df_btc, days=7)
    print(f"\nüìà Previs√µes para pr√≥ximos 7 dias:")
    print(forecast_ml)
    
    # Salvar modelo
    ml_model.save_model("models/sklearn_btc_model.pkl")
    print("\n‚úÖ Modelo scikit-learn salvo!")
else:
    print("‚ö†Ô∏è scikit-learn n√£o est√° dispon√≠vel")

In [None]:
# Visualizar performance do modelo
if SKLEARN_AVAILABLE and 'ml_model' in locals():
    y_pred = ml_model.predict(X_test)
    
    fig, axes = plt.subplots(1, 2, figsize=(14, 5))
    
    # Valores reais vs previstos
    axes[0].scatter(y_test, y_pred, alpha=0.5)
    axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    axes[0].set_xlabel('Valores Reais', fontsize=12)
    axes[0].set_ylabel('Valores Previstos', fontsize=12)
    axes[0].set_title('Reais vs Previstos', fontsize=14, fontweight='bold')
    axes[0].grid(True, alpha=0.3)
    
    # Res√≠duos
    residuals = y_test - y_pred
    axes[1].scatter(y_pred, residuals, alpha=0.5)
    axes[1].axhline(y=0, color='r', linestyle='--', lw=2)
    axes[1].set_xlabel('Valores Previstos', fontsize=12)
    axes[1].set_ylabel('Res√≠duos', fontsize=12)
    axes[1].set_title('An√°lise de Res√≠duos', fontsize=14, fontweight='bold')
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()

## 7. Visualiza√ß√µes Interativas com Plotly

In [None]:
from visualizations.plotly_charts import CryptoVisualizer, PLOTLY_AVAILABLE

if PLOTLY_AVAILABLE:
    visualizer = CryptoVisualizer()
    
    # Gr√°fico de pre√ßos com MAs
    fig_price = visualizer.plot_price_history(
        df_processed,
        show_ma=True,
        ma_windows=[7, 30, 90]
    )
    fig_price.show()
    
    # Salvar
    visualizer.save_figure(fig_price, 'btc_price_analysis.html')
    print("‚úÖ Gr√°fico salvo em: visualizations/btc_price_analysis.html")
else:
    print("‚ö†Ô∏è Plotly n√£o est√° dispon√≠vel")

In [None]:
# Dashboard completo
if PLOTLY_AVAILABLE:
    fig_dashboard = visualizer.plot_dashboard(df_processed)
    fig_dashboard.show()
    
    visualizer.save_figure(fig_dashboard, 'btc_dashboard.html')
    print("‚úÖ Dashboard salvo em: visualizations/btc_dashboard.html")

## 8. Conclus√µes e Insights

### Resumo da An√°lise:

1. **Ingest√£o de Dados**: ‚úÖ Dados hist√≥ricos carregados do Yahoo Finance
2. **Processamento Spark**: ‚úÖ M√©dias m√≥veis e indicadores calculados
3. **Prophet**: ‚úÖ Modelo treinado para previs√£o de tend√™ncias
4. **scikit-learn**: ‚úÖ Regress√£o Linear para previs√£o de pre√ßos
5. **Visualiza√ß√µes**: ‚úÖ Gr√°ficos interativos com Plotly

### Pr√≥ximos Passos:

- Integrar dados de outras equipes
- Testar modelos mais avan√ßados (XGBoost, LSTM)
- Implementar sistema de alertas
- Deploy em produ√ß√£o (Databricks)

In [None]:
# Salvar dados processados
output_path = Path("data/processed/btc_processed.csv")
df_processed.to_csv(output_path, index=False)
print(f"\n‚úÖ Dados processados salvos em: {output_path}")

# Encerrar Spark
if 'processor' in locals():
    processor.stop()
    print("‚úÖ Sess√£o Spark encerrada")

print("\nüéâ An√°lise conclu√≠da com sucesso!")