In [None]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
from prophet import Prophet
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error
import numpy as np
from tqdm import tqdm

# Caminhos
input_dir = r""
output_dir = r""
img_dir = r""
os.makedirs(output_dir, exist_ok=True)
os.makedirs(img_dir, exist_ok=True)

# Datas de corte
valid_cutoff = pd.Timestamp("2024-05-09")
prod_cutoff = pd.Timestamp("2025-05-09")

# Alvos de teste
valid_targets = [
    pd.Timestamp("2024-05-10"),
    pd.Timestamp("2024-06-09"),
    pd.Timestamp("2025-05-09")
]

# Alvos de produção
prod_targets = {
    "Day": pd.Timestamp("2025-05-10"),
    "Week": pd.Timestamp("2025-05-16"),
    "Month": pd.Timestamp("2025-06-09"),
    "Year": pd.Timestamp("2026-05-09")
}

# Carrega DataFrame e aplica média móvel + filtro 3 anos
def prepare_df(data, cutoff):
    df = pd.DataFrame(data)
    df['ds'] = pd.to_datetime(df['Date'], errors='coerce').apply(lambda d: d.tz_localize(None) if d.tzinfo else d)
    df['y_raw'] = df['Close']
    df = df.sort_values('ds')
    df['y'] = df['y_raw'].rolling(window=5).mean()
    df = df.dropna(subset=['ds', 'y'])

    # Filtrar apenas os últimos 3 anos antes do cutoff
    three_years_ago = cutoff - pd.Timedelta(days=3 * 365)
    df = df[df['ds'] >= three_years_ago]
    return df[['ds', 'y', 'y_raw']]

# Prophet com fallback
def predict_with_model(df, cutoff_date, target_date, stock_symbol, mode, plot=False):
    df_trainable = df[df['ds'] <= cutoff_date]
    if len(df_trainable) < 10:
        raise ValueError(f"{stock_symbol} - Dados insuficientes para treino até {cutoff_date.date()}")

    split_index = int(len(df_trainable) * 0.7)
    df_train = df_trainable.iloc[:split_index]
    df_test = df_trainable.iloc[split_index:]

    try:
        model = Prophet(
            daily_seasonality=True,
            weekly_seasonality=True,
            yearly_seasonality=True,
            changepoint_prior_scale=0.1,
            seasonality_mode='multiplicative'
        )
        model.fit(df_train[['ds', 'y']])

        days = (target_date - df_train['ds'].max()).days
        future = model.make_future_dataframe(periods=days + 10)
        forecast = model.predict(future)

        forecast_point = forecast[forecast['ds'] >= target_date]
        if forecast_point.empty:
            raise ValueError(f"{stock_symbol} - Nenhuma previsão disponível após {target_date}")
        forecast_point = forecast_point.iloc[0]
        pred_date = forecast_point['ds']
        pred_value = round(forecast_point['yhat'], 2)

    except Exception as e:
        print(f"[{stock_symbol}] Prophet falhou ({mode}) - tentando regressão linear: {e}")
        df_lin = df_trainable.copy()
        df_lin['days'] = (df_lin['ds'] - df_lin['ds'].min()).dt.days
        X = df_lin[['days']].values
        y = df_lin['y'].values
        model = LinearRegression()
        model.fit(X, y)
        day_index = (target_date - df_lin['ds'].min()).days
        pred_value = round(model.predict([[day_index]])[0], 2)
        pred_date = target_date

    y_true = df[df['ds'] == pred_date]['y_raw']
    mae = mean_absolute_error([y_true.values[0]], [pred_value]) if not y_true.empty else None

    if plot:
        plt.figure(figsize=(10, 5))
        plt.plot(df['ds'], df['y_raw'], label='Histórico (Close)')
        plt.axvline(pred_date, color='r', linestyle='--', label='Previsão')
        plt.scatter([pred_date], [pred_value], label='Previsto', color='green')
        if not y_true.empty:
            plt.scatter([pred_date], [y_true.values[0]], label='Real', color='orange')
        plt.title(f"{stock_symbol} - {mode} - {target_date.date()} | MAE: {mae:.2f}" if mae else f"{stock_symbol} - {mode} - {target_date.date()}")
        plt.xlabel("Data")
        plt.ylabel("Preço")
        plt.legend()
        plt.savefig(os.path.join(img_dir, f"{stock_symbol}_{mode}_{target_date.date()}.png"))
        plt.close()

    return {
        "Date": pred_date.strftime("%Y-%m-%d 00:00:00"),
        "Close": pred_value,
        "MAE": round(mae, 4) if mae else None
    }

# Loop
files = [f for f in os.listdir(input_dir) if f.endswith('.json')]
for filename in tqdm(files, desc="Processando ações"):
    try:
        with open(os.path.join(input_dir, filename), 'r') as f:
            raw_data = json.load(f)

        symbol = raw_data.get("symbol", filename.replace(".json", ""))
        daily_data = raw_data.get("daily_last_10y", [])
        if len(daily_data) < 300:
            print(f"[{symbol}] Dados insuficientes, pulando.")
            continue

        df_valid = prepare_df(daily_data, valid_cutoff)
        df_prod = prepare_df(daily_data, prod_cutoff)

        last_row = daily_data[-1]
        base_entry = {
            "Open": last_row['Open'],
            "High": last_row['High'],
            "Low": last_row['Low'],
            "Volume": last_row['Volume'],
            "Dividends": last_row['Dividends'],
            "Stock Splits": last_row['Stock Splits']
        }

        # === TESTE ===
        print(f"\n🔍 Testando {symbol} (até 2024-05-09, últimos 3 anos)")
        for target in valid_targets:
            result = predict_with_model(df_valid, valid_cutoff, target, symbol, "teste", plot=True)
            print(f"{symbol} [{target.date()}] -> Previsto: {result['Close']} | MAE: {result['MAE']}")

        # === PRODUÇÃO ===
        result_json = {"symbol": symbol}
        for label, target_date in prod_targets.items():
            pred = predict_with_model(df_prod, prod_cutoff, target_date, symbol, "prod")
            result_json[f"prediction{label}"] = [{**base_entry, "Date": pred['Date'], "Close": pred['Close']}]

        with open(os.path.join(output_dir, f"{symbol}.json"), "w") as out:
            json.dump(result_json, out, indent=2)

    except Exception as e:
        print(f"[ERRO] {filename}: {e}")

print("✅ Processo concluído com sucesso!")
