In [0]:
# ============================================
# Instalação de dependências
# ============================================

print("=" * 80)
print("Verifica e instala dependências")
print("=" * 80)

try:
    from prophet import Prophet
    print("\n Prophet já está instalado!")
except ImportError:
    print("\n  Prophet não encontrado. Instalando...")
    
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "prophet", "--quiet"])
    print(" Prophet instalado")
    print(" Reiniciando Python para carregar a biblioteca")
    
    dbutils.library.restartPython()

In [0]:
# ============================================
# Modelo 4: previsão de fluxo de caixa 
# Prophet - Séries Temporais
# ============================================

import mlflow
import mlflow.prophet
from prophet import Prophet
import pandas as pd
import numpy as np
from pyspark.sql import functions as F
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import timedelta
import warnings
warnings.filterwarnings('ignore')

print("=" * 80)
print("Prophet precisão de fluxo de caixa")
print("=" * 80)

In [0]:
# ============================================
# 1. Configuração do Mlflow
# ============================================

experiment_path = "/Users/andre.bomfim99@gmail.com/finance-ml-experiments"

try:
    mlflow.set_experiment(experiment_path)
    print(f"\n Experimento configurado: {experiment_path}")
except Exception as e:
    print(f"  Erro ao configurar experimento: {e}")
    experiment_path = "finance-ml-project"
    mlflow.set_experiment(experiment_path)
    print(f" Usando experimento alternativo: {experiment_path}")

In [0]:
# ============================================
# 2. verifica se df_classification existe
# ============================================

# CORREÇÃO 2: Verificar se os dados existem
if 'df_classification' not in locals():
    print("\n df_classification não encontrado")
    print("Carregando dados da tabela")
    
    SOURCE_TABLE = "finance_silver.transacoes_silver"
    df = spark.read.table(SOURCE_TABLE)
    
    df_pandas = df.select(
        "data", "descricao", "valor", "tipo", "categoria",  
        "ano", "mes", "dia_semana", "trimestre"
    ).toPandas()
    
    df_classification = df_pandas.copy()
    print(f" Dados carregados: {len(df_classification):,} transações")
else:
    print(f"\n Usando df_classification existente: {len(df_classification):,} transações")

In [0]:
# ============================================
# 3. Preparar dados para phophet
# ============================================

print("\n" + "=" * 80)
print("Preparar dados para phophet")
print("=" * 80)

df_daily = df_classification.copy()
df_daily['data'] = pd.to_datetime(df_daily['data'])
df_daily['fluxo'] = df_daily.apply(
    lambda row: row['valor'] if row['tipo'] == 'entrada' else -row['valor'],
    axis=1
)

df_prophet = df_daily.groupby('data').agg({
    'fluxo': 'sum'
}).reset_index()

df_prophet.columns = ['ds', 'y']
df_prophet = df_prophet.sort_values('ds').reset_index(drop=True)
print(f"\n Dados preparados:")
print(f"   Período: {df_prophet['ds'].min().date()} até {df_prophet['ds'].max().date()}")
print(f"   Total de dias: {len(df_prophet)}")
print(f"   Fluxo médio diário: R$ {df_prophet['y'].mean():,.2f}")
print(f"   Fluxo máximo: R$ {df_prophet['y'].max():,.2f}")
print(f"   Fluxo mínimo: R$ {df_prophet['y'].min():,.2f}")
print("\n Amostra dos dados:")
display(df_prophet.head(10))

In [0]:
# ============================================
# 4. treinar modelo prophet
# ============================================

print("\n" + "=" * 80)
print("Treinando modelo Prophet")
print("=" * 80)

with mlflow.start_run(run_name="4_Prophet_Forecasting") as run:
    print("\n Inicializando Prophet")
    model = Prophet(
        yearly_seasonality=True,      
        weekly_seasonality=True,      
        daily_seasonality=False,      
        changepoint_prior_scale=0.05, 
        seasonality_prior_scale=10.0, 
        interval_width=0.95           
    )
    
    print(" Prophet configurado")
    print("\n Treinando modelo")
    model.fit(df_prophet)
    print(" Modelo treinado")


In [0]:


    
    # ============================================
    # 5. Previsões
    # ============================================
    
    print("\n Gerando previsões para os próximos 180 dias ")
    future = model.make_future_dataframe(periods=180, freq='D')
    forecast = model.predict(future)
    print(f" Previsões geradas: {len(forecast)} pontos")
    last_date = df_prophet['ds'].max()
    future_forecast = forecast[forecast['ds'] > last_date].copy()
    print(f"\n Previsões futuras:")
    print(f"   Período: {future_forecast['ds'].min().date()} até {future_forecast['ds'].max().date()}")
    print(f"   Fluxo médio previsto: R$ {future_forecast['yhat'].mean():,.2f}")
    print(f"   Fluxo total previsto (6 meses): R$ {future_forecast['yhat'].sum():,.2f}")

In [0]:
    # ============================================
    # 6. LOG no mlflow
    # ============================================
    
    print("\n Registrando no MLflow")
    mlflow.log_param("model_type", "Prophet")
    mlflow.log_param("training_days", len(df_prophet))
    mlflow.log_param("forecast_days", 180)
    mlflow.log_param("yearly_seasonality", True)
    mlflow.log_param("weekly_seasonality", True)
    mlflow.log_param("changepoint_prior_scale", 0.05)
    mlflow.log_metric("avg_daily_flow", df_prophet['y'].mean())
    mlflow.log_metric("forecast_avg_daily", future_forecast['yhat'].mean())
    mlflow.log_metric("forecast_total_6m", future_forecast['yhat'].sum())
    mlflow.log_metric("std_historical", df_prophet['y'].std())
    mlflow.log_metric("std_forecast", future_forecast['yhat'].std())
    mlflow.prophet.log_model(model, "prophet_model")
    
    print(f"Modelo registrado no MLflow")
    print(f"   Run ID: {run.info.run_id}")

In [0]:

    
    # ============================================
    # 7. gráficos
    # ============================================
    
    print("\n Gerando visualizações")
    fig1 = model.plot(forecast, figsize=(14, 6))
    plt.title('Previsão de Fluxo de Caixa - Próximos 6 Meses', fontsize=16, fontweight='bold', pad=20)
    plt.xlabel('Data', fontsize=12)
    plt.ylabel('Fluxo de Caixa (R$)', fontsize=12)
    plt.axvline(x=last_date, color='red', linestyle='--', linewidth=2, label='Hoje')
    plt.legend()
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.savefig('/tmp/prophet_forecast.png', dpi=300, bbox_inches='tight')
    mlflow.log_artifact('/tmp/prophet_forecast.png')
    plt.show()
    fig2 = model.plot_components(forecast, figsize=(14, 10))
    plt.tight_layout()
    plt.savefig('/tmp/prophet_components.png', dpi=300, bbox_inches='tight')
    mlflow.log_artifact('/tmp/prophet_components.png')
    plt.show()
    
    fig, ax = plt.subplots(figsize=(14, 7))
    last_30_days = df_prophet.tail(30)
    ax.plot(last_30_days['ds'], last_30_days['y'], 
            'o-', color='black', linewidth=2, markersize=4, label='Histórico (30 dias)')
    
    ax.plot(future_forecast['ds'], future_forecast['yhat'], 
            '-', color='blue', linewidth=2, label='Previsão')

    ax.fill_between(future_forecast['ds'], 
                     future_forecast['yhat_lower'], 
                     future_forecast['yhat_upper'],
                     alpha=0.3, color='lightblue', label='Intervalo 95%')
    
    ax.axvline(x=last_date, color='red', linestyle='--', linewidth=2, label='Hoje')
    ax.axhline(y=0, color='gray', linestyle='-', linewidth=1, alpha=0.5)
    
    ax.set_xlabel('Data', fontsize=12)
    ax.set_ylabel('Fluxo de Caixa (R$)', fontsize=12)
    ax.set_title('Fluxo de Caixa: Últimos 30 Dias + Próximos 6 Meses', 
                 fontsize=16, fontweight='bold', pad=20)
    ax.legend(loc='best')
    ax.grid(True, alpha=0.3)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig('/tmp/prophet_forecast_detail.png', dpi=300, bbox_inches='tight')
    mlflow.log_artifact('/tmp/prophet_forecast_detail.png')
    plt.show()
    
    future_forecast['mes'] = pd.to_datetime(future_forecast['ds']).dt.to_period('M')
    monthly_forecast = future_forecast.groupby('mes').agg({
        'yhat': 'sum',
        'yhat_lower': 'sum',
        'yhat_upper': 'sum'
    }).reset_index()
    
    monthly_forecast['mes'] = monthly_forecast['mes'].astype(str)
    
    fig, ax = plt.subplots(figsize=(12, 6))
    x_pos = range(len(monthly_forecast))
    
    ax.bar(x_pos, monthly_forecast['yhat'], color='steelblue', 
           edgecolor='black', linewidth=1.5, alpha=0.8)
    ax.errorbar(x_pos, monthly_forecast['yhat'],
                yerr=[monthly_forecast['yhat'] - monthly_forecast['yhat_lower'],
                      monthly_forecast['yhat_upper'] - monthly_forecast['yhat']],
                fmt='none', color='red', capsize=5, capthick=2, label='Intervalo 95%')
    
    ax.set_xlabel('Mês', fontsize=12)
    ax.set_ylabel('Fluxo de Caixa Total (R$)', fontsize=12)
    ax.set_title('Previsão de Fluxo de Caixa Mensal - Próximos 6 Meses', 
                 fontsize=14, fontweight='bold', pad=20)
    ax.set_xticks(x_pos)
    ax.set_xticklabels(monthly_forecast['mes'], rotation=45, ha='right')
    ax.axhline(y=0, color='black', linestyle='-', linewidth=1)
    ax.grid(True, alpha=0.3, axis='y')
    ax.legend()
    
    for i, v in enumerate(monthly_forecast['yhat']):
        ax.text(i, v, f'R$ {v:,.0f}', ha='center', va='bottom' if v >= 0 else 'top', 
                fontsize=9, fontweight='bold')
    
    plt.tight_layout()
    plt.savefig('/tmp/prophet_monthly_forecast.png', dpi=300, bbox_inches='tight')
    mlflow.log_artifact('/tmp/prophet_monthly_forecast.png')
    plt.show()

In [0]:
# ============================================
# 8. exportar previsões
# ============================================
    
print("\n Exportan")
forecast_export = future_forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].copy()
forecast_export.columns = ['data', 'previsao', 'limite_inferior', 'limite_superior']
forecast_export.to_csv('/tmp/previsoes_fluxo_caixa.csv', index=False)
mlflow.log_artifact('/tmp/previsoes_fluxo_caixa.csv')
print(" Previsões exportadas: previsoes_fluxo_caixa.csv")
print("\n Resumo das Previsões Mensais:")
display(monthly_forecast)


In [0]:
# ============================================
# 9. resumo
# ============================================

print("\n" + "=" * 80)
print(" previsão concluída")
print("=" * 80)

print(f"\n Resumo:")
print(f"   Período histórico: {df_prophet['ds'].min().date()} a {df_prophet['ds'].max().date()}")
print(f"   Dias de histórico: {len(df_prophet)}")
print(f"   Fluxo médio histórico: R$ {df_prophet['y'].mean():,.2f}/dia")
print(f"   {'─' * 76}")
print(f"\n   Período previsto: {future_forecast['ds'].min().date()} a {future_forecast['ds'].max().date()}")
print(f"   Dias previstos: {len(future_forecast)}")
print(f"   Fluxo médio previsto: R$ {future_forecast['yhat'].mean():,.2f}/dia")
print(f"   Fluxo total 6 meses: R$ {future_forecast['yhat'].sum():,.2f}")
print(f"   {'─' * 76}")
print(f"\n  Artefatos salvos: 4 gráficos + 1 CSV")
print(f"    MLflow Run ID: {run.info.run_id}")

print("\n" + "=" * 80)