## 1. Configura√ß√£o do Ambiente Databricks

In [None]:
# Instalar depend√™ncias no cluster Databricks
%pip install yfinance prophet plotly kaleido

In [None]:
# Importar bibliotecas
import pandas as pd
import numpy as np
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, avg, lag, round as spark_round
from pyspark.sql.window import Window
import yfinance as yf
from prophet import Prophet
import plotly.express as px
import plotly.graph_objects as go

print("‚úÖ Bibliotecas importadas!")

## 2. Ingest√£o de Dados

In [None]:
# Buscar dados do Bitcoin
ticker = yf.Ticker("BTC-USD")
df_btc = ticker.history(period="5y")
df_btc.reset_index(inplace=True)
df_btc['Symbol'] = 'BTC-USD'

print(f"üìä Dados carregados: {len(df_btc)} registros")
display(df_btc.head())

## 3. Processamento com Spark (Databricks Cluster)

In [None]:
# Converter para Spark DataFrame
spark_df = spark.createDataFrame(df_btc)

# Mostrar schema
spark_df.printSchema()

# Contar registros
print(f"\nRegistros no Spark DataFrame: {spark_df.count()}")

In [None]:
# Calcular m√©dias m√≥veis com Spark
window_spec = Window.orderBy('Date')

# MA 7 dias
window_7d = window_spec.rowsBetween(-6, 0)
spark_df = spark_df.withColumn('MA_7', spark_round(avg(col('Close')).over(window_7d), 2))

# MA 30 dias
window_30d = window_spec.rowsBetween(-29, 0)
spark_df = spark_df.withColumn('MA_30', spark_round(avg(col('Close')).over(window_30d), 2))

# MA 90 dias
window_90d = window_spec.rowsBetween(-89, 0)
spark_df = spark_df.withColumn('MA_90', spark_round(avg(col('Close')).over(window_90d), 2))

print("‚úÖ M√©dias m√≥veis calculadas com Spark!")
spark_df.select('Date', 'Close', 'MA_7', 'MA_30', 'MA_90').show(10)

In [None]:
# Converter de volta para Pandas
df_processed = spark_df.toPandas()

print(f"‚úÖ DataFrame processado: {len(df_processed)} registros")
print(f"Colunas: {df_processed.columns.tolist()}")

## 4. An√°lise Estat√≠stica

In [None]:
# Agrega√ß√µes com Spark SQL
spark_df.createOrReplaceTempView("btc_data")

# Query SQL
stats = spark.sql("""
    SELECT 
        COUNT(*) as total_records,
        MIN(Close) as min_price,
        MAX(Close) as max_price,
        AVG(Close) as avg_price,
        STDDEV(Close) as volatility
    FROM btc_data
""")

stats.show()

## 5. Modelagem com Prophet

In [None]:
# Preparar dados para Prophet
df_prophet = pd.DataFrame()
df_prophet['ds'] = pd.to_datetime(df_processed['Date'])
df_prophet['y'] = df_processed['Close']

# Treinar modelo
model = Prophet(
    changepoint_prior_scale=0.05,
    seasonality_mode='multiplicative'
)

print("üîÆ Treinando Prophet...")
model.fit(df_prophet)

# Fazer previs√µes
future = model.make_future_dataframe(periods=30)
forecast = model.predict(future)

print("‚úÖ Previs√µes conclu√≠das!")
display(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail(10))

## 6. Visualiza√ß√µes Interativas

In [None]:
# Gr√°fico de pre√ßos com m√©dias m√≥veis
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_processed['Date'],
    y=df_processed['Close'],
    mode='lines',
    name='Pre√ßo',
    line=dict(color='#2196F3', width=2)
))

fig.add_trace(go.Scatter(
    x=df_processed['Date'],
    y=df_processed['MA_30'],
    mode='lines',
    name='MA 30d',
    line=dict(color='#4CAF50', width=1.5, dash='dash')
))

fig.update_layout(
    title='Bitcoin - Pre√ßo e M√©dias M√≥veis',
    xaxis_title='Data',
    yaxis_title='Pre√ßo (USD)',
    template='plotly_dark',
    width=1200,
    height=600
)

fig.show()

In [None]:
# Gr√°fico de previs√µes
fig = go.Figure()

# Hist√≥rico
fig.add_trace(go.Scatter(
    x=df_prophet['ds'],
    y=df_prophet['y'],
    mode='lines',
    name='Hist√≥rico',
    line=dict(color='#2196F3')
))

# Previs√£o
fig.add_trace(go.Scatter(
    x=forecast['ds'],
    y=forecast['yhat'],
    mode='lines',
    name='Previs√£o',
    line=dict(color='#FF5722', dash='dash')
))

# Intervalo de confian√ßa
fig.add_trace(go.Scatter(
    x=forecast['ds'],
    y=forecast['yhat_upper'],
    mode='lines',
    line=dict(width=0),
    showlegend=False
))

fig.add_trace(go.Scatter(
    x=forecast['ds'],
    y=forecast['yhat_lower'],
    mode='lines',
    fill='tonexty',
    line=dict(width=0),
    fillcolor='rgba(255, 87, 34, 0.2)',
    name='Intervalo de Confian√ßa'
))

fig.update_layout(
    title='Previs√£o Prophet - Pr√≥ximos 30 Dias',
    xaxis_title='Data',
    yaxis_title='Pre√ßo (USD)',
    template='plotly_dark',
    width=1200,
    height=600
)

fig.show()

## 7. Salvar Resultados no Delta Lake

In [None]:
# Salvar como Delta Table
spark_df.write.format("delta").mode("overwrite").save("/mnt/delta/btc_processed")

print("‚úÖ Dados salvos no Delta Lake!")

In [None]:
# Criar tabela
spark.sql("""
    CREATE TABLE IF NOT EXISTS btc_analysis
    USING DELTA
    LOCATION '/mnt/delta/btc_processed'
""")

print("‚úÖ Tabela Delta criada!")

## 8. Conclus√£o

### Componentes Utilizados:

‚úÖ **Databricks Cluster** - Processamento distribu√≠do
‚úÖ **Apache Spark** - C√°lculo de m√©dias m√≥veis
‚úÖ **Prophet** - Previs√£o de s√©ries temporais
‚úÖ **Plotly** - Visualiza√ß√µes interativas
‚úÖ **Delta Lake** - Armazenamento otimizado

### M√©tricas:
- Total de registros processados: {}
- Per√≠odo analisado: 5 anos
- Previs√£o: 30 dias futuros