In [None]:
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
import tempfile
import uuid

def carregar_dados_ticker(ticker, pasta_dados_tratados, tipo_experimento):
    if tipo_experimento == "macro":
        nome_arquivo = f"{ticker}_MacroData_Tratado.csv"
    elif tipo_experimento == "unificados":
        nome_arquivo = f"{ticker}_dados_unificados.csv"
    else:
        nome_arquivo = f"{ticker}_dados_unificados.csv"
    
    caminho_arquivo = os.path.join(pasta_dados_tratados, nome_arquivo)

    if not os.path.exists(caminho_arquivo):
        print(f"Erro: Arquivo n√£o encontrado para {ticker} em {caminho_arquivo}")
        return None, None, None

    try:
        print(f"Carregando dados tratados de: {caminho_arquivo}")
        
        if 'Data' in pd.read_csv(caminho_arquivo, nrows=0).columns:
            df = pd.read_csv(caminho_arquivo, parse_dates=['Data'], index_col='Data')
        else:
            df = pd.read_csv(caminho_arquivo, index_col=0)
            df.index = pd.to_datetime(df.index)
            df.index.name = 'Data'

        colunas_renomear = {}
        for col in df.columns:
            if col.startswith("('") and col.endswith("')"):
                print(f"Renomeando coluna original: '{col}' para 'Original_Close'")
                colunas_renomear[col] = 'Original_Close'

        if colunas_renomear:
            df.rename(columns=colunas_renomear, inplace=True)

        if 'Close_Feature' not in df.columns or 'Close_Target' not in df.columns:
            if 'Pre√ßo' in df.columns and tipo_experimento == "micro":
                df['Close_Feature'] = df['Pre√ßo']
                df['Close_Target'] = df['Pre√ßo'].shift(-1)
            else:
                print(f"Erro: Colunas necess√°rias n√£o encontradas no arquivo {nome_arquivo}")
                return None, None, None

        target_col = 'Close_Target'
        feature_cols = [col for col in df.columns if col not in [target_col, 'Original_Close']]

        print(f"Colunas de Features identificadas para {ticker}: {feature_cols}")
        print(f"Coluna Target identificada para {ticker}: {target_col}")

        df.dropna(subset=[target_col], inplace=True)

        return df, feature_cols, target_col

    except Exception as e:
        print(f"Erro ao carregar ou processar o arquivo {caminho_arquivo}: {e}")
        import traceback
        traceback.print_exc()
        return None, None, None

def criar_janelas_multivariadas(features_array, target_array, janela):
    X, y = [], []
    if len(features_array) <= janela:
        print(f"Aviso em criar_janelas: comprimento dos dados ({len(features_array)}) n√£o √© maior que a janela ({janela}). Retornando vazio.")
        return np.array(X), np.array(y)
    for i in range(len(features_array) - janela):
        window_features = features_array[i:(i + janela), :]
        X.append(window_features)
        y.append(target_array[i + janela])
    return np.array(X), np.array(y)

def build_model(hp, input_shape):
    model = keras.Sequential()
    for i in range(hp.Int('num_lstm_layers', 1, 2)):
        return_sequences = i < hp.Int('num_lstm_layers', 1, 2) - 1
        model.add(layers.LSTM(
            units=hp.Int(f'units_lstm_{i}', min_value=50, max_value=150, step=50),
            return_sequences=return_sequences,
            input_shape=input_shape if i == 0 else None
        ))
        model.add(layers.Dropout(hp.Float(f'dropout_lstm_{i}', 0.1, 0.3, step=0.1)))
    for i in range(hp.Int('num_dense_layers', 0, 1)):
        if hp.Int('num_dense_layers', 0, 1) > 0:
            model.add(layers.Dense(
                units=hp.Int(f'units_dense_{i}', min_value=32, max_value=64, step=32),
                activation='relu'
            ))
            model.add(layers.Dropout(hp.Float(f'dropout_dense_{i}', 0.1, 0.3, step=0.1)))
    model.add(layers.Dense(1))
    model.compile(
        optimizer=keras.optimizers.Adam(hp.Choice('learning_rate', values=[1e-3, 5e-4])),
        loss='mean_squared_error'
    )
    return model

tickers = ["BEEF3.SA", "PETR4.SA", "SOJA3.SA", "GGBR3.SA", "CSNA3.SA", 
           "VALE3.SA", "JBSS3.SA", "BRFS3.SA", "SUZB3.SA"]

base_path = r"C:\Users\leona\OneDrive\√Årea de Trabalho\Machine-Learning---Stock-Prediction\CodigoExperimentos\ExperimentoFeatures"

pastas_dados = {
    "macro": os.path.join(base_path, "dados_macro_csv_processados_tratados"),
    "unificados": os.path.join(base_path, "dados_unificados"),
    "micro": os.path.join(base_path, "dataframesMicro")
}

start_date_val = "2020-01-01"
end_date_val = "2022-12-31"
start_date_test = "2023-01-01"
end_date_test = "2023-12-31"

janela_min = 1
janela_max = 4

n_splits_cv = 5
max_trials_tuner = 10
epochs_tuner = 50
epochs_final = 100

def rodar_experimento_especifico(ticker, janela, df_full, feature_cols, target_col, resultados_base_dir):
    print(f"\n{'='*80}")
    print(f"üî¨ EXPERIMENTO: {ticker} | Janela {janela}")
    print(f"{'='*80}")

    ticker_dir = os.path.join(resultados_base_dir, ticker)
    base_filename = f"{ticker}_Janela_{janela}"
    
    metrics_path = os.path.join(ticker_dir, "Metricas", f"{base_filename}_metrics.csv")
    hiperparametros_path = os.path.join(ticker_dir, "Hiperparametros", f"{base_filename}_hiperparametros.csv")
    grafico_path = os.path.join(ticker_dir, "Graficos", f"{base_filename}_grafico_teste_final.png")
    previsoes_path = os.path.join(ticker_dir, "Previsoes", f"{base_filename}_previsoes_teste_final.csv")

    if os.path.exists(metrics_path):
        print(f"‚úì Resultados j√° existem para {ticker}, Janela {janela}. Pulando...")
        return

    try:
        df_val_train = df_full.loc[start_date_val:end_date_val].copy()
        df_test_final = df_full.loc[start_date_test:end_date_test].copy()
    except KeyError as e:
        print(f"‚ùå Erro ao dividir dados por data para {ticker}: {e}")
        return

    if df_val_train.empty or df_test_final.empty:
        print(f"‚ùå Per√≠odo de treino/valida√ß√£o ou teste est√° vazio para {ticker}.")
        return

    if df_val_train[feature_cols].isnull().values.any() or df_val_train[target_col].isnull().values.any():
        print(f"‚ö†Ô∏è NaNs encontrados em treino/valida√ß√£o para {ticker}. Verifique pr√©-processamento.")
        return

    if df_test_final[feature_cols].isnull().values.any() or df_test_final[target_col].isnull().values.any():
        print(f"‚ö†Ô∏è NaNs encontrados em teste para {ticker}. Verifique pr√©-processamento.")
        return

    features_val_train = df_val_train[feature_cols]
    target_val_train = df_val_train[[target_col]]

    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()

    scaled_features_val_train = scaler_features.fit_transform(features_val_train)
    scaled_target_val_train = scaler_target.fit_transform(target_val_train)

    num_features = scaled_features_val_train.shape[1]
    print(f"üìä N√∫mero de features: {num_features}")

    X_val_train_full, y_val_train_full = criar_janelas_multivariadas(
        scaled_features_val_train,
        scaled_target_val_train.flatten(),
        janela
    )

    if X_val_train_full.size == 0 or y_val_train_full.size == 0:
        print(f"‚ùå N√£o foi poss√≠vel criar janelas para {ticker}, Janela {janela}.")
        return

    print("üîç Iniciando busca de hiperpar√¢metros...")
    input_shape = (janela, num_features)
    
    temp_dir = tempfile.mkdtemp(prefix=f"tuner_{ticker}_J{janela}_")
    print(f"   üìÅ Diret√≥rio tempor√°rio criado: {temp_dir}")
    
    try:
        tuner = kt.RandomSearch(
            lambda hp: build_model(hp, input_shape=input_shape),
            objective='val_loss',
            max_trials=max_trials_tuner,
            executions_per_trial=1,
            directory=temp_dir,
            project_name='tuning',
            overwrite=True
        )
    except Exception as e:
        print(f"‚ùå Erro ao criar tuner: {e}")
        shutil.rmtree(temp_dir, ignore_errors=True)
        return

    early_stopping_tuner = EarlyStopping(monitor='val_loss', patience=10, verbose=0, restore_best_weights=True)

    split_index = int(len(X_val_train_full) * 0.8)
    X_tuner_train, y_tuner_train = X_val_train_full[:split_index], y_val_train_full[:split_index]
    X_tuner_val, y_tuner_val = X_val_train_full[split_index:], y_val_train_full[split_index:]

    if len(X_tuner_val) == 0:
        print(f"‚ö†Ô∏è Sem dados de valida√ß√£o suficientes. Usando treino completo.")
        tuner.search(X_val_train_full, y_val_train_full, epochs=epochs_tuner, 
                    callbacks=[early_stopping_tuner], verbose=0)
    else:
        tuner.search(X_tuner_train, y_tuner_train, epochs=epochs_tuner,
                    validation_data=(X_tuner_val, y_tuner_val),
                    callbacks=[early_stopping_tuner], verbose=0)

    try:
        best_hps = tuner.get_best_hyperparameters(1)[0]
        print("‚úì Melhores hiperpar√¢metros encontrados")
    except IndexError:
        print(f"‚ùå Keras Tuner n√£o encontrou hiperpar√¢metros v√°lidos.")
        shutil.rmtree(temp_dir, ignore_errors=True)
        return

    print(f"üìà TimeSeriesSplit CV ({n_splits_cv} folds)...")
    tscv = TimeSeriesSplit(n_splits=n_splits_cv)
    cv_metrics = {'mae': [], 'mse': [], 'rmse': [], 'r2': []}
    
    for fold, (train_index, val_index) in enumerate(tscv.split(X_val_train_full), 1):
        print(f"   Fold {fold}/{n_splits_cv}...", end=" ")
        X_train_fold, X_val_fold = X_val_train_full[train_index], X_val_train_full[val_index]
        y_train_fold, y_val_fold = y_val_train_full[train_index], y_val_train_full[val_index]

        if len(X_train_fold) == 0 or len(X_val_fold) == 0:
            print("Pulando fold (sem dados)")
            continue

        model_cv = tuner.hypermodel.build(best_hps)
        early_stopping_cv = EarlyStopping(monitor='loss', patience=10, verbose=0)
        model_cv.fit(X_train_fold, y_train_fold, epochs=epochs_final, batch_size=32,
                    callbacks=[early_stopping_cv], verbose=0)

        previsoes_val_fold_scaled = model_cv.predict(X_val_fold, verbose=0)
        previsoes_val_fold = scaler_target.inverse_transform(previsoes_val_fold_scaled)
        reais_val_fold = scaler_target.inverse_transform(y_val_fold.reshape(-1, 1))

        mae_fold = mean_absolute_error(reais_val_fold, previsoes_val_fold)
        mse_fold = mean_squared_error(reais_val_fold, previsoes_val_fold)
        rmse_fold = np.sqrt(mse_fold)
        
        try:
            r2_fold = r2_score(reais_val_fold, previsoes_val_fold)
            r2_fold = r2_fold if np.isfinite(r2_fold) else -1.0
        except ValueError:
            r2_fold = -1.0

        cv_metrics['mae'].append(mae_fold)
        cv_metrics['mse'].append(mse_fold)
        cv_metrics['rmse'].append(rmse_fold)
        cv_metrics['r2'].append(r2_fold)
        
        print(f"MAE: {mae_fold:.4f}, RMSE: {rmse_fold:.4f}, R¬≤: {r2_fold:.4f}")

    if cv_metrics['mae']:
        avg_cv_mae = np.mean(cv_metrics['mae'])
        avg_cv_mse = np.mean(cv_metrics['mse'])
        avg_cv_rmse = np.mean(cv_metrics['rmse'])
        avg_cv_r2 = np.mean(cv_metrics['r2'])
        print(f"\n‚úì CV M√©dio: MAE={avg_cv_mae:.4f}, RMSE={avg_cv_rmse:.4f}, R¬≤={avg_cv_r2:.4f}")
    else:
        avg_cv_mae, avg_cv_mse, avg_cv_rmse, avg_cv_r2 = np.nan, np.nan, np.nan, np.nan

    print("\nüéØ Treinando modelo final...")
    model_final = tuner.hypermodel.build(best_hps)
    early_stopping_final = EarlyStopping(monitor='loss', patience=15, verbose=0, restore_best_weights=True)
    history = model_final.fit(X_val_train_full, y_val_train_full, epochs=epochs_final, 
                              batch_size=32, callbacks=[early_stopping_final], verbose=0)

    print("üß™ Avaliando no conjunto de teste (2023)...")
    
    features_test_final = df_test_final[feature_cols]
    target_test_final = df_test_final[[target_col]]

    scaled_features_test_final = scaler_features.transform(features_test_final)
    scaled_target_test_final = scaler_target.transform(target_test_final)

    X_test_final, y_test_final = criar_janelas_multivariadas(
        scaled_features_test_final,
        scaled_target_test_final.flatten(),
        janela
    )

    if X_test_final.size == 0 or y_test_final.size == 0:
        print(f"‚ùå N√£o foi poss√≠vel criar janelas de teste.")
        if not np.isnan(avg_cv_mae):
            metrics_df = pd.DataFrame([{
                'Ticker': ticker, 'Janela': janela, 'Num Features': num_features,
                'CV Avg MAE': avg_cv_mae, 'CV Avg RMSE': avg_cv_rmse, 'CV Avg R2': avg_cv_r2,
                'Teste MAE': np.nan, 'Teste RMSE': np.nan, 'Teste R2': np.nan
            }])
            metrics_df.to_csv(metrics_path, index=False)
        shutil.rmtree(temp_dir, ignore_errors=True)
        return

    previsoes_test_scaled = model_final.predict(X_test_final, verbose=0)
    previsoes_test_final = scaler_target.inverse_transform(previsoes_test_scaled)
    reais_test_final = scaler_target.inverse_transform(y_test_final.reshape(-1, 1))

    mae_test = mean_absolute_error(reais_test_final, previsoes_test_final)
    mse_test = mean_squared_error(reais_test_final, previsoes_test_final)
    rmse_test = np.sqrt(mse_test)
    r2_test = r2_score(reais_test_final, previsoes_test_final)
    
    print(f"‚úì Teste: MAE={mae_test:.4f}, RMSE={rmse_test:.4f}, R¬≤={r2_test:.4f}")

    print("\nüíæ Salvando resultados...")
    
    metrics_df = pd.DataFrame([{
        'Ticker': ticker, 'Janela': janela, 'Num Features': num_features,
        'CV Avg MAE': avg_cv_mae, 'CV Avg RMSE': avg_cv_rmse, 'CV Avg R2': avg_cv_r2,
        'Teste MAE': mae_test, 'Teste RMSE': rmse_test, 'Teste R2': r2_test
    }])
    metrics_df.to_csv(metrics_path, index=False)

    hiperparametros_df = pd.DataFrame([best_hps.values])
    hiperparametros_df.to_csv(hiperparametros_path, index=False)

    datas_teste_final = df_test_final.index[janela:]
    if len(datas_teste_final) == len(reais_test_final):
        previsoes_df = pd.DataFrame({
            'Data': datas_teste_final,
            'Pre√ßo Real': reais_test_final.flatten(),
            'Pre√ßo Previsto': previsoes_test_final.flatten()
        })
        previsoes_df.to_csv(previsoes_path, index=False)

        plt.figure(figsize=(14, 7))
        plt.plot(previsoes_df['Data'], previsoes_df['Pre√ßo Real'], 
                label='Real (2023)', color='blue', linewidth=2)
        plt.plot(previsoes_df['Data'], previsoes_df['Pre√ßo Previsto'], 
                label='Previsto (2023)', color='orange', linestyle='--', linewidth=2)
        plt.title(f'{ticker} - Janela {janela} | Teste 2023\nMAE: {mae_test:.4f} | R¬≤: {r2_test:.4f}', 
                 fontsize=14, fontweight='bold')
        plt.xlabel('Data', fontsize=12)
        plt.ylabel('Pre√ßo (R$)', fontsize=12)
        plt.legend(fontsize=11)
        plt.grid(alpha=0.3)
        plt.tight_layout()
        plt.savefig(grafico_path, dpi=300)
        plt.close()

    shutil.rmtree(temp_dir, ignore_errors=True)
    print(f"‚úÖ Experimento conclu√≠do: {ticker}, Janela {janela}\n")

def rodar_experimentos_cv_multiplas_pastas():
    print("\n" + "="*80)
    print(" "*20 + "üöÄ EXPERIMENTOS LSTM - M√öLTIPLAS FONTES")
    print("="*80)
    
    for tipo_experimento, pasta_dados_tratados in pastas_dados.items():
        print(f"\n{'#'*80}")
        print(f"üìÇ PROCESSANDO EXPERIMENTO: {tipo_experimento.upper()}")
        print(f"üìÅ Pasta: {pasta_dados_tratados}")
        print(f"{'#'*80}\n")
        
        resultados_base_dir = os.path.join(base_path, f"Resultados_{tipo_experimento.capitalize()}")
        
        os.makedirs(resultados_base_dir, exist_ok=True)
        for ticker in tickers:
            ticker_dir = os.path.join(resultados_base_dir, ticker)
            os.makedirs(ticker_dir, exist_ok=True)
            os.makedirs(os.path.join(ticker_dir, "Graficos"), exist_ok=True)
            os.makedirs(os.path.join(ticker_dir, "Metricas"), exist_ok=True)
            os.makedirs(os.path.join(ticker_dir, "Previsoes"), exist_ok=True)
            os.makedirs(os.path.join(ticker_dir, "Hiperparametros"), exist_ok=True)

        print(f"üìä Tickers: {len(tickers)}")
        print(f"üî¢ Janelas: {janela_min} a {janela_max}")
        print(f"üîÑ Total de experimentos: {len(tickers) * (janela_max - janela_min + 1)}")
        print("="*80 + "\n")

        for ticker in tickers:
            df_ticker_full, feature_cols, target_col = carregar_dados_ticker(ticker, pasta_dados_tratados, tipo_experimento)

            if df_ticker_full is None:
                print(f"‚ùå N√£o foi poss√≠vel carregar dados para {ticker}. Pulando...")
                continue

            for janela in range(janela_min, janela_max + 1):
                try:
                    rodar_experimento_especifico(ticker, janela, df_ticker_full, feature_cols, target_col, resultados_base_dir)
                except Exception as e:
                    print(f"‚ùå ERRO CR√çTICO em {ticker}, Janela {janela}: {e}")
                    import traceback
                    traceback.print_exc()

    print("\n" + "="*80)
    print(" "*20 + "‚úÖ TODOS OS EXPERIMENTOS CONCLU√çDOS")
    print("="*80)

if __name__ == "__main__":
    rodar_experimentos_cv_multiplas_pastas()