In [7]:
import traceback
import os
import time
import requests
import numpy as np
import pandas as pd
import pandas_ta
# import yfinance as yf
from datetime import datetime, timedelta
import pywt
import antropy as ant
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, log_loss, roc_auc_score
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.feature_selection import SelectKBest, mutual_info_classif
# import gc # Unused
import warnings
warnings.filterwarnings("ignore", message="Failed to load image Python extension.*_ZN3c1017RegisterOperatorsD1Ev")

# Foundational Model Imports
from transformers import PatchTSTConfig, PatchTSTForPrediction

# --- Global Flags and Initializations for Optional Libraries ---
optuna_available = False
torch_available = False
dowhy_available = False
CausalModel = None
nx = None
# ONNX related flags and types
onnx_available = False
skl2onnx_available = False
onnxmltools_available = False
FloatTensorType = None
SKIP_CAUSAL_ANALYSIS_FOR_DEBUGGING = False

# Optional: Filter specific torchvision warning if it's problematic and not needed
# import warnings
# warnings.filterwarnings("ignore", message="Failed to load image Python extension.*_ZN3c1017RegisterOperatorsD1Ev")

if hasattr(pd.DataFrame, 'ta') is False and pandas_ta is not None:
    try:
        pd.DataFrame.ta = pandas_ta.Core(df=None)
        print("pandas_ta DataFrame accessor registered globally.")
    except Exception as e:
        print(f"Could not globally register pandas_ta accessor: {e}")

try:
    import optuna
    optuna_available = True
    print("Optuna imported successfully.")
except ImportError:
    print("Optuna not found. LightGBM hyperparameter optimization with Optuna will be skipped.")

try:
    import torch
    # import torch.nn as nn # No longer needed directly if Autoformer is removed
    # import torch.optim as optim # No longer needed directly if Autoformer is removed
    torch_available = True
    print("PyTorch imported successfully.")
    if torch.cuda.is_available():
        print(f"PyTorch CUDA available: True, Version: {torch.version.cuda}")
        print(f"Using PyTorch on GPU: {torch.cuda.get_device_name(0)}")
    else:
        print("PyTorch CUDA available: False.")
except ImportError:
    print("PyTorch not found. Foundation Model features will be SKIPPED.")

try:
    from imblearn.over_sampling import SMOTE
    imblearn_available = True
    print("imblearn (for SMOTE) imported successfully.")
except ImportError:
    imblearn_available = False
    print("imblearn not found. SMOTE oversampling will be skipped.")


try:
    import dowhy
    from dowhy import CausalModel
    import networkx as nx
    dowhy_available = True
    print(f"DoWhy {dowhy.__version__} and NetworkX {nx.__version__} imported successfully.")
except ImportError:
    print("DoWhy or NetworkX not found. Causal Discovery will be skipped.")

try:
    import onnx
    onnx_available = True
    # import onnxruntime as ort
    import skl2onnx
    skl2onnx_available = True
    from skl2onnx.common.data_types import FloatTensorType
    import onnxmltools
    onnxmltools_available = True
    print("ONNX, ONNXRuntime, skl2onnx, and onnxmltools imported successfully.")
    if hasattr(onnxmltools, '__version__'):
          print(f"Onnxmltools version: {onnxmltools.__version__}")
except ImportError as e:
    print(f"One or more ONNX components not found: {e}. ONNX features will be skipped.")

print("\nAll libraries and modules conditional imports attempted.")

# --- Constants ---
TWELVE_DATA_API_KEY = "b6dbb92e551a46f2b20de27540aeef0a" # Replace with your actual key
API_KEY = TWELVE_DATA_API_KEY
DEFAULT_SYMBOL = "MSFT"
START_DATE = (datetime.now() - timedelta(days=3*365)).strftime('%Y-%m-%d')
END_DATE = datetime.now().strftime('%Y-%m-%d')
# AUTOFORMER_MODEL_PATH constant removed

# --- Custom AutoformerPredictor and its helpers REMOVED ---

# --- Data Fetching and Feature Engineering Functions ---
def fetch_twelve_data(symbol: str, api_key: str, start_date_str: str = None, end_date_str: str = None) -> pd.DataFrame | None:
    base_url = "https://api.twelvedata.com/time_series"
    params = {"symbol": symbol, "interval": "1day", "apikey": api_key, "format": "JSON", "outputsize": 5000}
    if start_date_str: params["start_date"] = start_date_str
    if end_date_str: params["end_date"] = end_date_str
    print(f"Fetching data for {symbol} from Twelve Data (interval=1day, from {start_date_str} to {end_date_str})...")
    try:
        response = requests.get(base_url, params=params, timeout=30)
        response.raise_for_status()
        data = response.json()
    except requests.exceptions.RequestException as e: print(f"Request failed for {symbol}: {e}"); return None
    except ValueError as e: print(f"Failed to parse JSON for {symbol}: {e}. Response: {response.text[:200]}..."); return None

    if isinstance(data, dict) and (data.get("status") == "error" or "values" not in data):
        print(f"API Error for {symbol}: {data.get('message', 'Unknown error')}"); return None
    if not isinstance(data, dict) or "values" not in data or not data["values"]:
        print(f"No data values for {symbol}, or unexpected format."); return None

    df = pd.DataFrame(data["values"]).rename(columns={'datetime': 'date'})
    for col in ['open', 'high', 'low', 'close', 'volume']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').astype('float64')
        else:
            if col in ['open', 'high', 'low', 'close']: print(f"Critical column '{col}' missing."); return None
            df[col] = 0.0 # Default for volume if missing
    if 'date' not in df.columns: print("Critical 'date' column missing."); return None

    df.index = pd.to_datetime(df['date'])
    df.drop(columns=['date'], inplace=True)
    df.sort_index(inplace=True)
    df.dropna(subset=[col for col in ['open', 'high', 'low', 'close'] if col in df.columns], inplace=True)
    if df.empty: print(f"No data remaining for {symbol} after initial processing."); return None
    print(f"Successfully fetched/processed {len(df)} data points for {symbol}.")
    return df

def add_technical_indicators(df: pd.DataFrame) -> pd.DataFrame:
    df_feat = df.copy()
    if not hasattr(df_feat.ta, 'rsi'): print("pandas_ta not registered. Skipping TIs."); return df_feat
    print("Adding technical indicators...")
    try:
        for col in ['open', 'high', 'low', 'close', 'volume']:
            if col in df_feat.columns: df_feat[col] = df_feat[col].astype('float64')
            else:
                if col in ['high', 'low', 'open', 'close'] and 'close' in df_feat: df_feat[col] = df_feat['close']
                elif col == 'volume': df_feat[col] = 0.0

        c, h, l, v = 'close', 'high', 'low', 'volume'
        required_cols_for_all_ta = [c,h,l,v]
        if not all(col in df_feat.columns for col in required_cols_for_all_ta):
            print("Warning: Not all OHLCV columns present, some TIs might fail or be inaccurate.")

        if c in df_feat:
            df_feat.ta.rsi(close=df_feat[c], length=14, append=True)
            df_feat.ta.rsi(close=df_feat[c], length=9, append=True, col_names='RSI_9')
            df_feat.ta.rsi(close=df_feat[c], length=25, append=True, col_names='RSI_25')
            df_feat.ta.macd(close=df_feat[c], fast=12, slow=26, signal=9, append=True)
            df_feat.ta.macd(close=df_feat[c], fast=5, slow=15, signal=9, append=True, col_names=('MACD_5_15_9', 'MACDh_5_15_9', 'MACDs_5_15_9'))
            for p in [10, 20, 50, 100, 200]:
                df_feat.ta.sma(close=df_feat[c], length=p, append=True)
                df_feat.ta.ema(close=df_feat[c], length=p, append=True)
            df_feat.ta.bbands(close=df_feat[c], length=20, std=2, append=True)
        else:
            print(f"Column '{c}' not found, skipping some TIs.")

        if all(x in df_feat.columns for x in [h,l,c]):
            df_feat.ta.atr(high=df_feat[h], low=df_feat[l], close=df_feat[c], length=14, append=True)
            df_feat.ta.adx(high=df_feat[h], low=df_feat[l], close=df_feat[c], length=14, append=True)
            df_feat.ta.stoch(high=df_feat[h], low=df_feat[l], close=df_feat[c], append=True)
            df_feat.ta.willr(high=df_feat[h], low=df_feat[l], close=df_feat[c], append=True)
            df_feat.ta.cci(high=df_feat[h], low=df_feat[l], close=df_feat[c], append=True)
        else:
            print(f"One or more of '{h}', '{l}', '{c}' not found, skipping some TIs.")

        if all(x in df_feat.columns for x in [h,l,c,v]):
            try:
                # Calculate MFI separately, do not append directly
                mfi_series = df_feat.ta.mfi(high=df_feat[h], low=df_feat[l], close=df_feat[c], volume=df_feat[v], append=False)
                if mfi_series is not None:
                    # Ensure the series is float and assign it
                    df_feat[mfi_series.name] = mfi_series.astype('float64')
                else:
                    print(f"MFI calculation returned None for {symbol if 'symbol' in locals() else 'current symbol'}.") # Added a bit more context
            except Exception as e_mfi:
                print(f"Error calculating MFI: {e_mfi}")
        else:
            print(f"One or more of '{h}', '{l}', '{c}', '{v}' not found, skipping MFI.")

        df_feat.columns = df_feat.columns.str.replace('[^A-Za-z0-9_]+', '', regex=True)
    except Exception as e: print(f"Error adding TIs: {e}\n{traceback.format_exc()}"); return df
    return df_feat

def add_optimized_features(df: pd.DataFrame, price_col='close', volume_col='volume') -> pd.DataFrame:
    df_new = df.copy()
    if price_col not in df_new.columns:
        print(f"Price column '{price_col}' not in DataFrame. Skipping optimized features."); return df_new

    df_new['returns'] = df_new[price_col].pct_change()
    safe_price = df_new[price_col].replace(0, np.nan)
    safe_price_shifted = df_new[price_col].shift(1).replace(0, np.nan)
    df_new['log_returns'] = np.log(safe_price / safe_price_shifted)

    for window in [5, 10, 20, 50]:
        df_new[f'volatility_{window}'] = df_new['log_returns'].rolling(window).std()
        df_new[f'skew_{window}'] = df_new['log_returns'].rolling(window).skew()
        df_new[f'kurtosis_{window}'] = df_new['log_returns'].rolling(window).kurt()

    if volume_col in df_new.columns and df_new[volume_col].isnull().sum() < len(df_new):
        rolling_mean_volume = df_new[volume_col].rolling(20).mean().replace(0, np.nan)
        df_new['volume_ratio'] = df_new[volume_col] / rolling_mean_volume
        df_new['price_volume'] = df_new[price_col] * df_new[volume_col]
        df_new['volume_change'] = df_new[volume_col].pct_change()

    if all(col in df_new.columns for col in ['high', 'low', 'close']):
        safe_low = df_new['low'].replace(0, np.nan)
        safe_high = df_new['high'].replace(0, np.nan)
        df_new['high_low_ratio'] = df_new['high'] / safe_low
        df_new['close_to_high_ratio'] = safe_price / safe_high
        df_new['close_to_low_ratio'] = safe_price / safe_low
        df_new['intraday_range_norm'] = (df_new['high'] - df_new['low']) / safe_price
    else:
        print("High, Low, or Close columns missing for some ratio calculations.")

    if 'RSI_14' in df_new.columns:
        df_new['RSI_signal'] = 0
        df_new.loc[df_new['RSI_14'] < 30, 'RSI_signal'] = 1
        df_new.loc[df_new['RSI_14'] > 70, 'RSI_signal'] = -1

    macd_col_name = 'MACD_12_26_9'
    macds_col_name = 'MACDs_12_26_9'
    if macd_col_name in df_new.columns and macds_col_name in df_new.columns:
        df_new['MACD_signal_line_cross'] = (df_new[macd_col_name] > df_new[macds_col_name]).astype(int)
    return df_new

def add_wavelet_features(df: pd.DataFrame, column='close', wavelet='mexh', scales_range=(1, 32), num_scales_to_extract=5) -> pd.DataFrame:
    df_feat = df.copy()
    if pywt is None: print("PyWavelets not available."); return df_feat
    if column not in df_feat.columns: print(f"Column '{column}' not found for wavelet. Skipping."); return df_feat

    signal = df_feat[column].values
    if len(signal) < scales_range[1] + 5:
        print(f"Signal length {len(signal)} too short for CWT with max scale {scales_range[1]}. Skipping."); return df_feat

    actual_max_scale = min(scales_range[1], len(signal) // 2 - 1)
    if actual_max_scale < scales_range[0]:
        print(f"Max scale {actual_max_scale} too small after constraint (min_scale {scales_range[0]}). Skipping CWT."); return df_feat

    scales = np.arange(scales_range[0], actual_max_scale + 1)
    if len(scales) == 0: print("No valid scales for CWT. Skipping."); return df_feat

    try:
        coefficients, _ = pywt.cwt(signal, scales, wavelet)
        coeffs_df = pd.DataFrame(coefficients.T, index=df_feat.index, columns=[f"cwt_scale_{s}" for s in scales])
        df_feat[f'{column}_cwt_mean'] = coeffs_df.mean(axis=1)
        df_feat[f'{column}_cwt_std'] = coeffs_df.std(axis=1)
        s_indices_to_extract = np.linspace(0, len(scales)-1, min(num_scales_to_extract, len(scales)), dtype=int)
        for s_idx in s_indices_to_extract:
            actual_scale_val = scales[s_idx]
            col_name_for_scale = f"cwt_scale_{actual_scale_val}"
            if col_name_for_scale in coeffs_df.columns:
                df_feat[f'{column}_cwt_energy_s{actual_scale_val}'] = coeffs_df[col_name_for_scale]**2
    except Exception as e: print(f"Error adding wavelet features: {e}\n{traceback.format_exc()}"); return df
    return df_feat

def add_entropy_features(df: pd.DataFrame, column='close', window=40) -> pd.DataFrame:
    df_feat = df.copy()
    if ant is None: print("Antropy not available."); return df_feat
    if column not in df_feat.columns: print(f"Column '{column}' not found for entropy. Skipping."); return df_feat

    if len(df_feat) < window + 15:
        print(f"Data length {len(df_feat)} too short for entropy features with window {window}. Skipping."); return df_feat
    try:
        sig = df_feat[column].astype(float)
        df_feat[f'{column}_entropy_sample'] = sig.rolling(window=window, min_periods=window).apply(
            lambda x: ant.sample_entropy(x.dropna()) if x.dropna().shape[0] >= window//2 and x.dropna().std() > 1e-6 else np.nan, raw=False
        )
        df_feat[f'{column}_entropy_spectral'] = sig.rolling(window=window, min_periods=window).apply(
            lambda x: ant.spectral_entropy(x.dropna(), sf=1.0, method='welch',
                                           nperseg=min(x.dropna().shape[0], window // 2 if window // 2 > 0 else 1) if x.dropna().shape[0] > 1 else None
                                          ) if x.dropna().shape[0] == window and x.dropna().std() > 1e-6 else np.nan, raw=False
        )
    except Exception as e: print(f"Error adding entropy features: {e}\n{traceback.format_exc()}"); return df
    return df_feat

def add_advanced_technical_features(df: pd.DataFrame, price_col='close', high_col='high', low_col='low', volume_col='volume') -> pd.DataFrame:
    df_new = df.copy()
    if not hasattr(df_new.ta, 'mom'): print("pandas_ta not registered. Skipping advanced TIs."); return df_new
    try:
        if not all(c in df_new.columns for c in [price_col, high_col, low_col]):
            print(f"Missing one or more of {price_col}, {high_col}, {low_col} for adv TIs. Skipping."); return df_new
        for col in [price_col, high_col, low_col]: df_new[col] = df_new[col].astype(float)
        if volume_col in df_new.columns: df_new[volume_col] = df_new[volume_col].astype(float)

        df_new.ta.mom(close=df_new[price_col], append=True)
        df_new.ta.roc(close=df_new[price_col], append=True)
        df_new.ta.natr(high=df_new[high_col], low=df_new[low_col], close=df_new[price_col], append=True)
        df_new.ta.aroon(high=df_new[high_col], low=df_new[low_col], append=True)
        df_new.ta.stc(close=df_new[price_col], tclength=23, fast=50, slow=100, factor=0.5, append=True, col_names=('STC_23_50_05', 'STCD_23_50_05', 'STCK_23_50_05'))

        if volume_col in df_new.columns and df_new[volume_col].isnull().sum() < len(df_new):
            df_new.ta.pvol(close=df_new[price_col], volume=df_new[volume_col], append=True)
            df_new.ta.cmf(high=df_new[high_col], low=df_new[low_col], close=df_new[price_col], volume=df_new[volume_col], append=True)

        df_new.columns = df_new.columns.str.replace('[^A-Za-z0-9_]+', '', regex=True)
    except Exception as e: print(f"Error adding advanced TIs: {e}\n{traceback.format_exc()}"); return df
    return df_new

def add_transformer_features_conceptual(df: pd.DataFrame, column='close', sequence_length=20) -> pd.DataFrame:
    df_feat = df.copy()
    if column not in df_feat.columns: print(f"Column '{column}' not found for conceptual Transformer features. Skipping."); return df_feat
    if len(df_feat) < sequence_length + 5: print(f"Data too short for conceptual Transformer features. Skipping."); return df_feat

    feature_col_base = f"{column}_trans_seq"
    for col_suffix in ['mean', 'std', 'trend', 'volatility', 'autocorr1']:
        df_feat[f'{feature_col_base}_{col_suffix}'] = np.nan
    try:
        data_series = df_feat[column].values
        windows = np.lib.stride_tricks.sliding_window_view(data_series, sequence_length)
        results = {key: [np.nan] * (sequence_length -1) for key in ['mean', 'std', 'trend', 'volatility', 'autocorr1']}

        for seq in windows:
            if np.isnan(seq).any():
                for key in results: results[key].append(np.nan)
                continue
            mean_val, std_val = np.mean(seq), np.std(seq)
            norm_seq = (seq - mean_val) / std_val if std_val > 1e-8 else np.zeros_like(seq)

            results['mean'].append(np.mean(norm_seq))
            results['std'].append(np.std(norm_seq))
            current_trend, current_vol, current_ac = 0.0, 0.0, 0.0

            if len(norm_seq) > 1:
                try:
                    fit_params = np.polyfit(np.arange(len(norm_seq)), norm_seq, 1)
                    current_trend = fit_params[0] if not np.isnan(fit_params[0]) else 0.0
                except (np.linalg.LinAlgError, ValueError): pass

                diff_norm_seq = np.diff(norm_seq)
                current_vol = np.std(diff_norm_seq) if len(diff_norm_seq) > 0 else 0.0

                if len(norm_seq) >= 2:
                    s1, s2 = norm_seq[:-1], norm_seq[1:]
                    if len(s1) >= 1 and np.std(s1) > 1e-8 and np.std(s2) > 1e-8:
                        try:
                            corr_matrix = np.corrcoef(s1, s2)
                            current_ac = corr_matrix[0, 1] if not np.isnan(corr_matrix[0, 1]) else 0.0
                        except (ValueError, IndexError): pass
            results['trend'].append(current_trend)
            results['volatility'].append(current_vol)
            results['autocorr1'].append(current_ac)

        for key, values in results.items():
            if len(values) == len(df_feat):
                df_feat[f'{feature_col_base}_{key}'] = values
            else:
                padded_values = np.full(len(df_feat), np.nan)
                if len(values) > 0 : padded_values[-len(values):] = values
                df_feat[f'{feature_col_base}_{key}'] = padded_values
    except Exception as e: print(f"Error in conceptual Transformer features: {e}\n{traceback.format_exc()}"); return df
    return df_feat

def detect_regimes_simple(df: pd.DataFrame, column='close', window=20) -> pd.DataFrame:
    df_reg = df.copy()
    # print(f"Detecting regimes (simplified volatility-based) for {column}...") # Less verbose
    if column not in df_reg.columns:
        print(f"'{column}' not found. Skipping simple regimes.")
        df_reg['regime_simple'] = 0
        return df_reg

    if 'log_returns' not in df_reg.columns:
        safe_price = df_reg[column].replace(0, np.nan)
        safe_price_shifted = df_reg[column].shift(1).replace(0, np.nan)
        df_reg['log_returns_temp_for_regime'] = np.log(safe_price / safe_price_shifted)
        returns_col_for_regime = 'log_returns_temp_for_regime'
    else:
        returns_col_for_regime = 'log_returns'

    returns = df_reg[returns_col_for_regime].dropna()
    if returns.empty:
        print("No valid returns for simple regime detection. Skipping.")
        df_reg['regime_simple'] = 0
        if 'log_returns_temp_for_regime' in df_reg.columns: df_reg.drop(columns=['log_returns_temp_for_regime'], inplace=True, errors='ignore')
        return df_reg

    rolling_vol = returns.rolling(window=window, min_periods=window//2 if window//2 > 0 else 1).std()
    df_reg['regime_simple'] = 0 # Default: medium volatility (class 0)

    if not rolling_vol.dropna().empty:
        vol_low_thresh = rolling_vol.quantile(0.33)
        vol_high_thresh = rolling_vol.quantile(0.67)
        df_reg.loc[rolling_vol.index[rolling_vol <= vol_low_thresh], 'regime_simple'] = 1  # Low vol (class 1)
        df_reg.loc[rolling_vol.index[rolling_vol > vol_high_thresh], 'regime_simple'] = 2   # High vol (class 2)
    else:
        print("Not enough data for rolling volatility percentile calculation. Defaulting simple regimes to 0.")

    df_reg['regime_simple'] = df_reg['regime_simple'].bfill().ffill().fillna(0) # CORRECTED LINE
    
    if 'log_returns_temp_for_regime' in df_reg.columns: df_reg.drop(columns=['log_returns_temp_for_regime'], inplace=True, errors='ignore')

    print(f"Simple Regimes (0:Med,1:Low,2:High):\n{df_reg['regime_simple'].value_counts(normalize=True, dropna=False).sort_index()*100} %")
    return df_reg

def balanced_target_definition(df: pd.DataFrame, column='close', periods=5, lower_q_thresh=0.45, upper_q_thresh=0.55) -> pd.DataFrame:
    df_t = df.copy()
    if column not in df_t.columns:
        print(f"'{column}' not found for target. Defaulting target.")
        df_t['target'] = 0
        return df_t

    df_t[column] = pd.to_numeric(df_t[column], errors='coerce').replace(0, np.nan)
    df_t['future_log_return_target'] = np.log(df_t[column].shift(-periods) / df_t[column])
    valid_returns = df_t['future_log_return_target'].dropna()
    df_t['target'] = 0

    if len(valid_returns) > 20:
        lower_q_val = valid_returns.quantile(lower_q_thresh)
        upper_q_val = valid_returns.quantile(upper_q_thresh)
        if lower_q_val >= upper_q_val and upper_q_val > 0 : lower_q_val = upper_q_val * 0.99
        elif lower_q_val >= upper_q_val and upper_q_val < 0 : upper_q_val = lower_q_val * 0.99
        df_t.loc[df_t['future_log_return_target'] < lower_q_val, 'target'] = 0
        df_t.loc[df_t['future_log_return_target'] > upper_q_val, 'target'] = 1
    else:
        print("Not enough valid returns for quantile-based target balancing. Default target (all 0s) used or target may be skewed.")

    df_t.drop(columns=['future_log_return_target'], inplace=True, errors='ignore')
    print(f"Target distribution:\n{df_t['target'].value_counts(normalize=True, dropna=False)*100}")
    return df_t

def discover_causal_structure(df_features: pd.DataFrame, target_col='target', price_c='close', max_feats=10, symbol="") -> tuple[CausalModel | None, list]:
    print(f"\nDiscovering causal structure for {symbol} using DoWhy...")
    graph_feats = [] # Initialize graph_feats
    if not dowhy_available or CausalModel is None:
        print("DoWhy not available.")
        return None, graph_feats

    df_c = df_features.copy()
    if target_col not in df_c.columns or df_c[target_col].isnull().all():
        print(f"Target '{target_col}' missing for causal discovery.")
        return None, graph_feats
    df_c[target_col] = pd.to_numeric(df_c[target_col], errors='coerce')
    cand_cols = [c for c in df_c.columns if pd.api.types.is_numeric_dtype(df_c[c]) and c != target_col and df_c[c].notnull().any() and df_c[c].var() > 1e-6]
    if not cand_cols:
        print("No numeric candidate columns with variance for causal discovery.")
        return None, graph_feats

    df_subset_for_causal = df_c[cand_cols + [target_col]].copy()
    df_subset_for_causal.replace([np.inf, -np.inf], np.nan, inplace=True)
    if cand_cols:
        scaler_causal = StandardScaler()
        df_subset_for_causal[cand_cols] = scaler_causal.fit_transform(df_subset_for_causal[cand_cols])
    df_subset_for_causal.dropna(inplace=True)
    if df_subset_for_causal.empty or target_col not in df_subset_for_causal.columns or df_subset_for_causal[target_col].nunique() < 1:
        print("Not enough data post-cleaning/scaling for causal discovery.")
        return None, graph_feats

    cwt_mean_col = f"{price_c}_cwt_mean" if f"{price_c}_cwt_mean" in df_subset_for_causal.columns else 'close_cwt_mean'
    cwt_std_col = f"{price_c}_cwt_std" if f"{price_c}_cwt_std" in df_subset_for_causal.columns else 'close_cwt_std'
    entropy_sample_col = f"{price_c}_entropy_sample" if f"{price_c}_entropy_sample" in df_subset_for_causal.columns else 'close_entropy_sample'
    potential_causes = ['RSI_14', 'MACDh_12_26_9', 'ADX_14', 'ATR_14', cwt_mean_col, cwt_std_col, entropy_sample_col, 'regime_simple', 'volatility_20', 'log_returns', 'BBP_2020', 'BBB_2020']
    
    # graph_feats is defined here
    graph_feats = [c for c in potential_causes if c in df_subset_for_causal.columns and c != target_col and df_subset_for_causal[c].nunique() > 1]
    
    if not graph_feats:
        print("Predefined causal graph_feats not suitable or not found, selecting top varying features (after scaling).")
        num_to_select = min(max_feats, len(cand_cols))
        if num_to_select > 0:
            graph_feats = df_subset_for_causal[cand_cols].var().nlargest(num_to_select).index.tolist()
        else:
            print("No candidate columns for graph_feats fallback.")
            return None, [] # Return empty list for graph_feats
    if not graph_feats:
        print("No suitable graph features for causal discovery.")
        return None, [] # Return empty list for graph_feats

    final_df_for_causal_model = df_subset_for_causal[graph_feats + [target_col]].copy()
    if final_df_for_causal_model.empty or final_df_for_causal_model.shape[0] < 20 or final_df_for_causal_model[target_col].nunique() < 1:
        print("Final DF for causal model too small or target has no variation.")
        return None, graph_feats # Return potentially non-empty graph_feats even if model fails
        
    print(f"DoWhy using graph features: {graph_feats} for Outcome: {target_col}")
    treatment_var = graph_feats[0] # Simplistic: pick the first as treatment
    graph_str = "digraph { " + "; ".join([f'"{f}" -> "{target_col}"' for f in graph_feats]) + " }"
    # print(f"Generated Causal Graph:\n{graph_str}") # Less verbose
    try:
        model = CausalModel(data=final_df_for_causal_model, treatment=treatment_var, outcome=target_col, graph=graph_str)
        print("DoWhy CausalModel created.")
        return model, graph_feats # MODIFIED: Return model and graph_feats
    except Exception as e:
        print(f"DoWhy CausalModel error: {e}\n{traceback.format_exc()}")
        return None, graph_feats # Return graph_feats even if model creation fails



def causal_feature_ranking_from_graph_feats(discovered_graph_features: list, X_train_columns: pd.Index) -> list:
    """
    Takes the list of features identified for the causal graph and returns them.
    Ensures they are present in the training data columns.
    This is a simple first step; more advanced methods would rank by causal effect strength.
    """
    if not discovered_graph_features:
        print("No graph features provided from causal discovery.")
        return []
    
    # Filter to ensure features are actually in X_train (should be, but good check)
    ranked_causal_features = [(feat, 1.0) for feat in discovered_graph_features if feat in X_train_columns] # Assign dummy score 1.0
    
    if ranked_causal_features:
        print(f"Causal feature ranking (from graph features): {[feat for feat, score in ranked_causal_features]}")
    else:
        print("No causal features from graph were found in X_train columns or none were discovered.")
    return ranked_causal_features



def prepare_ml_data(df: pd.DataFrame, target_col='target', test_split_size=0.15, min_test_samples=50):
    if target_col not in df.columns:
        print(f"Target '{target_col}' missing.")
        return None, None, None, None, None

    cols_to_drop_base = ['open', 'high', 'low', 'close', 'volume', 'returns']
    cols_to_drop_dynamic = [c for c in df.columns if 'target_' in c and c != target_col] + \
                           [c for c in df.columns if 'future_return' in c]

    all_cols_to_drop = list(set(cols_to_drop_base + cols_to_drop_dynamic))
    if target_col in all_cols_to_drop:
        all_cols_to_drop.remove(target_col)

    X = df.drop(columns=[c for c in all_cols_to_drop if c in df.columns] + [target_col], errors='ignore')
    y = df[target_col].copy()

    if y.isnull().all():
        print("Target is all NaN.")
        return None, None, None, None, None

    valid_target_mask = y.notna()
    X = X.loc[valid_target_mask]
    y = y.loc[valid_target_mask]

    if X.empty or y.empty:
        print("X or y empty after target NaN filter.")
        return None, None, None, None, None

    X.replace([np.inf, -np.inf], np.nan, inplace=True)

    if X.isnull().any().any():
        for col in X.columns:
            if X[col].isnull().any():
                if pd.api.types.is_numeric_dtype(X[col]):
                    X[col] = X[col].fillna(X[col].median())
                else:
                    X[col] = X[col].fillna(X[col].mode()[0] if not X[col].mode().empty else "Unknown")

    if X.isnull().any().any():
        print(f"Warning: NaNs still present after imputation. Dropping rows with NaNs in X. Nulls per col:\n{X.isnull().sum()[X.isnull().sum()>0]}")
        X.dropna(axis=0, how='any', inplace=True)
        y = y.loc[X.index]

    if X.empty or y.empty:
        print("X or y empty after internal NaN handling.")
        return None, None, None, None, None

    if len(X) < min_test_samples * 2:
        print(f"Not enough data ({len(X)} rows) for robust train/test split. Min required for split: {min_test_samples*2}.")
        return None, None, None, None, None

    n_samples = len(X)
    test_size_abs = max(min_test_samples, int(n_samples * test_split_size))

    if n_samples - test_size_abs < min_test_samples:
        test_size_abs = n_samples - min_test_samples

    if test_size_abs < 1 and n_samples > 0:
        test_size_abs = 1
    elif test_size_abs < 1:
        print(f"Cannot make meaningful split (test_size_abs < 1).")
        return None, None, None, None, None

    train_size = n_samples - test_size_abs
    if train_size < 1:
        print(f"Train size too small ({train_size}). Cannot split.")
        return None, None, None, None, None

    X_train, X_test = X.iloc[:train_size], X.iloc[train_size:]
    y_train, y_test = y.iloc[:train_size], y.iloc[train_size:]

    if X_train.empty or y_train.empty or X_test.empty or y_test.empty:
        print("Train/Test set empty post-split.")
        return None, None, None, None, None
    print(f"Train shapes: X_train={X_train.shape}, y_train={y_train.shape}; Test shapes: X_test={X_test.shape}, y_test={y_test.shape}")

    numeric_cols_xtrain = X_train.select_dtypes(include=np.number).columns
    scaler = None

    if not numeric_cols_xtrain.empty:
        scaler = StandardScaler()
        X_train_scaled_np = scaler.fit_transform(X_train[numeric_cols_xtrain])
        X_train_scaled_df = pd.DataFrame(X_train_scaled_np, columns=numeric_cols_xtrain, index=X_train.index)

        X_train_final = X_train.copy()
        X_train_final[numeric_cols_xtrain] = X_train_scaled_df

        numeric_cols_xtest = X_test.select_dtypes(include=np.number).columns
        common_numeric_cols = [col for col in numeric_cols_xtrain if col in numeric_cols_xtest]

        X_test_final = X_test.copy()
        if common_numeric_cols:
            X_test_scaled_np = scaler.transform(X_test[common_numeric_cols])
            X_test_scaled_df = pd.DataFrame(X_test_scaled_np, columns=common_numeric_cols, index=X_test[common_numeric_cols].index)
            X_test_final[common_numeric_cols] = X_test_scaled_df
        else:
            print("No common numeric columns to scale in X_test, or X_test has no numeric columns that were scaled in train.")

        return X_train_final, X_test_final, y_train, y_test, scaler
    else:
        print("No numeric columns in X_train for scaling.")
        return X_train, X_test, y_train, y_test, None

def lgbm_objective(trial, X_train, y_train, X_val, y_val, base_params):
    params = {
        **base_params,
        'boosting_type': 'gbdt',
        'n_estimators': trial.suggest_int('n_estimators', 100, 1000, step=50),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
        'num_leaves': trial.suggest_int('num_leaves', 10, 50),
        'max_depth': trial.suggest_int('max_depth', -1, 10),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 50),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.5, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.5, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 0, 7),
        'reg_alpha': trial.suggest_float('reg_alpha', 1e-3, 1.0, log=True),
        'reg_lambda': trial.suggest_float('reg_lambda', 1e-3, 1.0, log=True),
        'min_gain_to_split': trial.suggest_float('min_gain_to_split', 0.0, 0.1)
    }
    if params.get('num_class') is None and 'num_class' in params:
        del params['num_class']

    model = lgb.LGBMClassifier(**params)
    model.fit(X_train, y_train,
              eval_set=[(X_val, y_val)],
              eval_metric=base_params.get('metric', 'logloss'),
              callbacks=[lgb.early_stopping(30, verbose=False)])
    y_proba_val = model.predict_proba(X_val)
    return log_loss(y_val, y_proba_val)

def optimize_lgbm_hyperparameters(X_train: pd.DataFrame, y_train: pd.Series, base_params: dict, n_trials=30, validation_ratio=0.2) -> dict:
    if not optuna_available:
        print("Optuna not available. Using default HPs.")
        return optimized_lightgbm_params()

    print(f"Optimizing LightGBM HPs with Optuna ({n_trials} trials)...")
    if len(X_train) * validation_ratio < 1 or len(X_train) * (1-validation_ratio) < 1:
        print("Too few samples for Optuna validation split. Using default HPs.")
        return optimized_lightgbm_params()

    n_val_samples = int(len(X_train) * validation_ratio)
    if n_val_samples == 0 and len(X_train) > 1: n_val_samples = 1
    elif n_val_samples == 0 :
        print("Cannot create validation set for Optuna (0 samples). Using default HPs.")
        return optimized_lightgbm_params()

    n_train_opt_samples = len(X_train) - n_val_samples
    if n_train_opt_samples == 0:
        print("Train set for Optuna is empty after split. Using default HPs.")
        return optimized_lightgbm_params()

    X_train_opt, X_val_opt = X_train.iloc[:n_train_opt_samples], X_train.iloc[n_train_opt_samples:]
    y_train_opt, y_val_opt = y_train.iloc[:n_train_opt_samples], y_train.iloc[n_train_opt_samples:]

    study = optuna.create_study(direction='minimize')
    study.optimize(lambda trial: lgbm_objective(trial, X_train_opt, y_train_opt, X_val_opt, y_val_opt, base_params),
                   n_trials=n_trials, show_progress_bar=True)

    print(f"Best Optuna trial for LightGBM: Value={study.best_value:.4f}, Params={study.best_params}")
    return study.best_params

def train_lightgbm_model(X_train, y_train, X_test, y_test, optimized_params=None):
    print("Training LightGBM model...")
    if X_train is None or X_train.empty or y_train is None or y_train.empty:
        print("X_train or y_train is empty. Skipping LightGBM training.")
        return None, None

    y_train_squeezed = y_train.squeeze()
    y_test_squeezed = y_test.squeeze() if y_test is not None else pd.Series()
    unique_labels_train = sorted(y_train_squeezed.unique())
    num_classes = len(unique_labels_train)

    if num_classes <= 1:
        print(f"Only {num_classes} class(es) in y_train. Skipping LightGBM training.")
        return None, None

    current_params = optimized_lightgbm_params()
    current_params['objective'] = 'multiclass' if num_classes > 2 else 'binary'
    current_params['metric'] = 'multi_logloss' if num_classes > 2 else 'binary_logloss'
    if num_classes > 2:
        current_params['num_class'] = num_classes
    elif 'num_class' in current_params:
        del current_params['num_class']

    if optimized_params and isinstance(optimized_params, dict):
        print("Using Optuna-optimized parameters.")
        current_params.update(optimized_params)
    else:
        print("Using default (or non-Optuna optimized) LightGBM parameters.")

    label_map = {label: i for i, label in enumerate(unique_labels_train)}
    y_train_mapped = y_train_squeezed.map(label_map)
    model = lgb.LGBMClassifier(**current_params)
    eval_set_data = None
    valid_eval_indices = None
    y_test_mapped_for_eval = None

    if X_test is not None and not X_test.empty and not y_test_squeezed.empty:
        y_test_mapped = y_test_squeezed.map(label_map).fillna(-1).astype(int)
        valid_eval_indices = (y_test_mapped != -1)
        if valid_eval_indices.any():
            y_test_mapped_for_eval = y_test_mapped[valid_eval_indices]
            X_test_eval = X_test[valid_eval_indices][X_train.columns] if all(c in X_test.columns for c in X_train.columns) else X_test[valid_eval_indices]
            eval_set_data = (X_test_eval, y_test_mapped_for_eval)

    if eval_set_data:
        model.fit(X_train, y_train_mapped, eval_set=[eval_set_data],
                  eval_metric=current_params.get('metric'),
                  callbacks=[lgb.early_stopping(30, verbose=False)])
    else:
        print("Warning: No valid eval set. Fitting on full training data without early stopping based on eval set.")
        model.fit(X_train, y_train_mapped)

    feat_imp_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': model.feature_importances_}).sort_values(by='Importance', ascending=False)
    print("\nTop 10 features:\n", feat_imp_df.head(10))

    if eval_set_data and y_test_mapped_for_eval is not None and not y_test_mapped_for_eval.empty:
        X_test_predict = X_test[valid_eval_indices][X_train.columns] if all(c in X_test.columns for c in X_train.columns) else X_test[valid_eval_indices]
        y_pred_mapped_on_valid = model.predict(X_test_predict)
        y_proba_on_valid = model.predict_proba(X_test_predict)
        acc = accuracy_score(y_test_mapped_for_eval, y_pred_mapped_on_valid)
        print(f"\n🎯 Accuracy on mapped test data: {acc:.4f}")

        if current_params['objective'] == 'binary' and y_proba_on_valid.shape[1] == 2:
            try:
                auc = roc_auc_score(y_test_mapped_for_eval, y_proba_on_valid[:, 1])
                print(f"📊 AUC: {auc:.4f}")
            except ValueError as e_auc:
                print(f"AUC Calculation Error: {e_auc}")
        print("\nClassification Report (on mapped and valid test labels):")
        try:
            report_labels = sorted(np.unique(np.concatenate((y_test_mapped_for_eval.unique(), pd.Series(y_pred_mapped_on_valid).unique()))))
            print(classification_report(y_test_mapped_for_eval, y_pred_mapped_on_valid, labels=report_labels, zero_division=0))
        except Exception as e_cr:
            print(f"Classification Report Error: {e_cr}")
    else:
        print("No valid test samples for evaluation after mapping, or X_test/y_test was not provided.")
    return model, feat_imp_df

def plot_feature_importance(feature_importance_df, top_n=20, symbol_for_plot="", min_bar_height=0.05):
    if feature_importance_df is None or feature_importance_df.empty:
        print("No feature importance to plot.")
        return

    plot_data = feature_importance_df.head(top_n).copy()
    if plot_data.empty:
        print("No features in plot_data after head(top_n).")
        return

    max_importance = plot_data['Importance'].max()
    min_threshold = max(max_importance * 0.02, 1e-6)
    plot_data['Plot_Importance'] = np.maximum(plot_data['Importance'], min_threshold)

    plt.figure(figsize=(14, max(8, min(top_n, len(plot_data)) * 0.5)))
    ax = sns.barplot(x='Plot_Importance', y='Feature', hue='Feature', data=plot_data, palette="viridis", orient='h', legend=False) # Added hue and legend=False
    for i, row_data in enumerate(plot_data.itertuples()):
        original_val = row_data.Importance
        plot_val = row_data.Plot_Importance
        ax.text(plot_val + max_importance * 0.01, i, f'{original_val:.0f}', va='center', fontsize=9, fontweight='bold')

    plt.title(f'Top {top_n} Feature Importances for {symbol_for_plot} (LightGBM)', fontsize=16, fontweight='bold', pad=20)
    plt.xlabel('Importance', fontsize=12, fontweight='bold')
    plt.ylabel('Feature', fontsize=12, fontweight='bold')
    plt.grid(axis='x', alpha=0.3)
    plt.tight_layout()
    plot_filename = f"feature_importance_{symbol_for_plot}.png"
    plt.savefig(plot_filename)
    print(f"Feature importance plot saved to {plot_filename}")
    plt.close()

def export_lgbm_to_onnx(lgbm_model, X_sample_df, file_path="lgbm_model.onnx", target_opset=12):
    print(f"\nExporting LGBM model to ONNX: {file_path} (opset={target_opset})")
    if not all([onnx_available, skl2onnx_available, onnxmltools_available, (FloatTensorType is not None)]):
        print("One or more ONNX libraries missing or FloatTensorType not imported. Skipping ONNX export.")
        return None
    if lgbm_model is None or X_sample_df is None or X_sample_df.empty:
        print("Model or sample data empty for ONNX. Skipping.")
        return None
    try:
        initial_type = [('float_input', FloatTensorType([None, X_sample_df.shape[1]]))]
        converted_model = onnxmltools.convert_lightgbm(lgbm_model, initial_types=initial_type, target_opset=target_opset)
        with open(file_path, "wb") as f:
            f.write(converted_model.SerializeToString())
        print(f"Model exported to ONNX: {file_path}")
        onnx.checker.check_model(file_path)
        print("ONNX model check OK.")
        return file_path
    except Exception as e:
        print(f"Error exporting LGBM to ONNX: {e}. Fallback to pickle.")
        try:
            import pickle
            pkl_path = file_path.replace('.onnx', '.pkl')
            with open(pkl_path, 'wb') as pf:
                pickle.dump(lgbm_model, pf)
            print(f"Model saved as pickle: {pkl_path}")
            return pkl_path
        except Exception as ep:
            print(f"Pickle save error: {ep}")
            return None

def simple_feature_selection_fallback(X_train, y_train, max_features=20):
    print("Using simple variance-based feature selection fallback...")
    if X_train.empty: return pd.DataFrame()
    X_train_numeric = X_train.select_dtypes(include=np.number)
    if X_train_numeric.empty:
        print("No numeric features for variance selection. Returning first few columns if available.")
        return pd.DataFrame({'Feature': X_train.columns[:max_features].tolist()})
    variance_scores = X_train_numeric.var().sort_values(ascending=False)
    num_features_to_select = min(max_features, len(variance_scores))
    selected_features = variance_scores.head(num_features_to_select).index.tolist()
    return pd.DataFrame({'Feature': selected_features, 'Score': variance_scores.head(num_features_to_select).values})

def prioritized_feature_selection(X_train, y_train, causal_ranking, max_features=25):
    # print("Prioritized feature selection: Causal Ranking + Mutual Information...") # Less verbose
    if X_train.empty or y_train.empty:
        print("X_train or y_train is empty in prioritized_feature_selection. Returning empty DataFrame.")
        return pd.DataFrame()

    top_causal_features = []
    num_causal_to_select = 0
    if causal_ranking and isinstance(causal_ranking, list) and all(isinstance(item, tuple) and len(item)==2 for item in causal_ranking):
        num_causal_to_select = min(len(causal_ranking), max_features // 2)
        if num_causal_to_select > 0:
            # print(f"Selecting up to {num_causal_to_select} features from causal ranking.") # Less verbose
            for feat, score in causal_ranking[:num_causal_to_select]:
                if feat in X_train.columns:
                    top_causal_features.append(feat)
                # else: print(f"Causal feature '{feat}' not in X_train.columns. Skipping.") # Less verbose
    # else: print("No valid causal ranking provided or num_causal_to_select is 0.") # Less verbose

    remaining_slots = max_features - len(top_causal_features)
    stat_selected_features = []

    if remaining_slots > 0:
        features_for_stat_selection = [f for f in X_train.columns if f not in top_causal_features]
        if features_for_stat_selection:
            X_remaining_for_stat = X_train[features_for_stat_selection]
            y_train_squeezed = y_train.squeeze()

            if y_train_squeezed.nunique() > 1 and not X_remaining_for_stat.empty:
                X_remaining_numeric = X_remaining_for_stat.select_dtypes(include=np.number)
                if not X_remaining_numeric.empty:
                    num_stat_to_select = min(remaining_slots, X_remaining_numeric.shape[1])
                    if num_stat_to_select > 0:
                        try:
                            selector_mi = SelectKBest(mutual_info_classif, k=num_stat_to_select)
                            selector_mi.fit(X_remaining_numeric, y_train_squeezed)
                            stat_selected_features = X_remaining_numeric.columns[selector_mi.get_support()].tolist()
                        except Exception as e_mi:
                            print(f"Error in MI based feature selection: {e_mi}. Proceeding without these stat features.")
    final_selected_features = list(dict.fromkeys(top_causal_features + stat_selected_features))
    if not final_selected_features and not X_train.empty:
        print("No features from prioritized selection, falling back to simple variance-based selection.")
        simple_fallback_df = simple_feature_selection_fallback(X_train, y_train, max_features)
        if simple_fallback_df is not None and 'Feature' in simple_fallback_df.columns:
            final_selected_features = simple_fallback_df['Feature'].tolist()
        else:
            final_selected_features = X_train.columns[:max_features].tolist()
    return pd.DataFrame({'Feature': final_selected_features})

def configure_extended_context(base_context=512, data_length=750):
    max_possible_context = int(data_length * 0.7)
    extended_contexts = {
        'short_term': min(256, max_possible_context, data_length - 60),
        'medium_term': min(512, max_possible_context, data_length - 60),
        'long_term': min(1024, max_possible_context, data_length - 60),
        'adaptive': min(base_context * 2, max_possible_context, data_length - 60)
    }
    extended_contexts['adaptive'] = max(extended_contexts['adaptive'], 64)
    return extended_contexts

def add_multitimeframe_features(df, price_col='close'):
    df_mtf = df.copy()
    if price_col not in df_mtf.columns:
        print(f"Price column '{price_col}' not in DataFrame. Skipping multi-timeframe features.")
        return df_mtf

    df_mtf[f'{price_col}_weekly_mean'] = df_mtf[price_col].rolling(5, min_periods=1).mean()
    df_mtf[f'{price_col}_weekly_std'] = df_mtf[price_col].rolling(5, min_periods=1).std()
    df_mtf[f'{price_col}_weekly_max'] = df_mtf[price_col].rolling(5, min_periods=1).max()
    df_mtf[f'{price_col}_weekly_min'] = df_mtf[price_col].rolling(5, min_periods=1).min()

    df_mtf[f'{price_col}_monthly_mean'] = df_mtf[price_col].rolling(21, min_periods=1).mean()
    df_mtf[f'{price_col}_monthly_std'] = df_mtf[price_col].rolling(21, min_periods=1).std()
    df_mtf[f'{price_col}_monthly_trend'] = df_mtf[price_col].rolling(21, min_periods=2).apply(
        lambda x: np.polyfit(range(len(x)), x, 1)[0] if len(x) >= 2 else np.nan, raw=False
    )
    df_mtf[f'{price_col}_quarterly_mean'] = df_mtf[price_col].rolling(63, min_periods=1).mean()
    df_mtf[f'{price_col}_quarterly_volatility'] = df_mtf[price_col].rolling(63, min_periods=1).std() / df_mtf[f'{price_col}_quarterly_mean'].replace(0, np.nan)

    df_mtf[f'{price_col}_weekly_monthly_ratio'] = df_mtf[f'{price_col}_weekly_mean'] / df_mtf[f'{price_col}_monthly_mean'].replace(0, np.nan)
    df_mtf[f'{price_col}_monthly_quarterly_ratio'] = df_mtf[f'{price_col}_monthly_mean'] / df_mtf[f'{price_col}_quarterly_mean'].replace(0, np.nan)
    return df_mtf

def detect_volatility_regimes(returns, window=21, threshold_multiplier=1.5):
    if returns.empty or len(returns) < window:
        return pd.Series(1, index=returns.index)

    rolling_vol = returns.rolling(window, min_periods=window // 2 if window // 2 > 0 else 1).std()
    if rolling_vol.dropna().empty:
        return pd.Series(1, index=returns.index)

    vol_median = rolling_vol.median()
    if pd.isna(vol_median) or vol_median == 0:
        vol_median = rolling_vol.mean()
        if pd.isna(vol_median) or vol_median == 0:
            return pd.Series(1, index=returns.index)

    high_vol_threshold = vol_median * threshold_multiplier
    low_vol_threshold = vol_median / threshold_multiplier
    regimes = pd.Series(1, index=returns.index, dtype=int)
    regimes[rolling_vol >= high_vol_threshold] = 2
    regimes[rolling_vol <= low_vol_threshold] = 0
    regimes = regimes.ffill().fillna(1) # Updated fillna
    return regimes

def add_regime_features(df, returns_col='log_returns', price_col='close'):
    df_rf = df.copy()
    if returns_col not in df_rf.columns:
        print(f"Returns column '{returns_col}' not found. Skipping regime features.")
        return df_rf
    if price_col not in df_rf.columns:
        print(f"Price column '{price_col}' not found for regime-adjusted MAs. Skipping those.")

    regimes = detect_volatility_regimes(df_rf[returns_col])
    df_rf['volatility_regime'] = regimes
    df_rf['regime_0'] = (regimes == 0).astype(int)
    df_rf['regime_1'] = (regimes == 1).astype(int)
    df_rf['regime_2'] = (regimes == 2).astype(int)
    if price_col in df_rf.columns:
        for window in [10, 20, 50]:
            df_rf[f'sma_{window}_regime_adj'] = df_rf[price_col].rolling(window, min_periods=1).mean() * (1 + 0.1 * regimes)
    return df_rf

def optimized_lightgbm_params():
    return {
        'boosting_type': 'gbdt', 'num_leaves': 31, 'learning_rate': 0.05,
        'n_estimators': 200, 'feature_fraction': 0.8, 'bagging_fraction': 0.8,
        'bagging_freq': 5, 'min_child_samples': 20, 'reg_alpha': 0.01,
        'reg_lambda': 0.01, 'random_state': 42, 'verbose': -1, 'n_jobs': -1,
        'class_weight': 'balanced', 'min_gain_to_split': 0.0
    }

def enhanced_patchtst_finetune(log_returns, context_length=512, prediction_length=5, min_sequences=20):
    if not isinstance(log_returns, np.ndarray): log_returns = np.array(log_returns)
    if log_returns.ndim > 1 : log_returns = log_returns.squeeze()

    sequence_length_needed = context_length + prediction_length
    padded_returns = log_returns.copy()

    if len(log_returns) < sequence_length_needed:
        mean_return = np.mean(log_returns) if len(log_returns) > 0 else 0
        std_return = np.std(log_returns) if len(log_returns) > 1 else 0.01
        std_return = max(std_return, 1e-6)
        padding_length = sequence_length_needed - len(log_returns)
        synthetic_padding = np.random.normal(mean_return, std_return, padding_length)
        padded_returns = np.concatenate([synthetic_padding, log_returns])

    X_sequences, y_sequences = [], []
    if len(padded_returns) >= sequence_length_needed:
        for i in range(len(padded_returns) - sequence_length_needed + 1):
            seq = padded_returns[i : i + context_length]
            target = padded_returns[i + context_length : i + sequence_length_needed]
            X_sequences.append(seq)
            y_sequences.append(target)

    if len(X_sequences) >= min_sequences:
        return np.array(X_sequences), np.array(y_sequences), True
    else:
        return None, None, False

def run_patchtst_foundation_forecast(
    symbol_name: str,
    historical_data_df: pd.DataFrame,
    prediction_length: int = 5,
    model_checkpoint: str = "ibm-research/patchtst-etth1-pretrain",
    fine_tune_epochs: int = 10,
    enable_fine_tuning: bool = True,
    configured_context_length: int | None = None
):
    print(f"\n--- Forecasting for {symbol_name} using PatchTST ({model_checkpoint}) ---")
    if not torch_available or 'PatchTSTForPrediction' not in globals():
        print("Torch or PatchTST not available. Skipping.")
        return {"status": "failed", "reason": "Dependencies missing", "forecast": None}
    if 'close' not in historical_data_df.columns or historical_data_df['close'].isnull().all():
        print(f"'close' column missing or all NaN for {symbol_name}. Skipping.")
        return {"status": "failed", "reason": "'close' missing or all NaN", "forecast": None}

    df_clean = historical_data_df.dropna(subset=['close'])
    if len(df_clean) < 2:
        print(f"Not enough non-NaN close prices ({len(df_clean)}) for {symbol_name}. Skipping.")
        return {"status": "failed", "reason": "Insufficient non-NaN close prices", "forecast": None}

    try:
        base_config = PatchTSTConfig.from_pretrained(model_checkpoint)
        effective_context_length = configured_context_length if configured_context_length is not None else base_config.context_length
        min_data_for_one_sequence = effective_context_length + prediction_length + 1
        if len(df_clean) < min_data_for_one_sequence:
            print(f"Data length {len(df_clean)} too short for context {effective_context_length} + pred {prediction_length}. Adjusting context or skipping.")
            effective_context_length = max(10, len(df_clean) - prediction_length - 5)
            if effective_context_length < 10 :
                return {"status": "failed", "reason": f"Cannot determine valid context with data {len(df_clean)}", "forecast": None}

        financial_config = PatchTSTConfig(
            context_length=effective_context_length, prediction_length=prediction_length,
            patch_length=min(16, effective_context_length // 2 if effective_context_length > 32 else 8),
            patch_stride=min(8, effective_context_length // 4 if effective_context_length > 32 else 4),
            num_input_channels=1, d_model=base_config.d_model,
            num_attention_heads=base_config.num_attention_heads, num_hidden_layers=base_config.num_hidden_layers,
            ffn_dim=base_config.ffn_dim, dropout=0.1, head_dropout=0.1, scaling="std", loss="mse"
        )
        model = PatchTSTForPrediction(financial_config)

        try:
            pretrained_model = PatchTSTForPrediction.from_pretrained(model_checkpoint, local_files_only=False, trust_remote_code=True)
            pretrained_dict = pretrained_model.state_dict()
            model_dict = model.state_dict()
            compatible_weights = {}
            for k, v in pretrained_dict.items():
                if k in model_dict and v.size() == model_dict[k].size():
                    if not any(skip_layer in k for skip_layer in ['input_embedding', 'projection', 'head', 'value_embedding', 'patch_embedding.weight', 'patch_embedding.bias']):
                        compatible_weights[k] = v
            model.load_state_dict(compatible_weights, strict=False)
        except Exception as e_load:
            print(f"Weight transfer warning/error: {e_load}. Model may use more random init for some layers.")

        device = "cuda" if torch.cuda.is_available() else "cpu"; model.to(device)
        close_prices = df_clean['close'].values.astype(np.float32)
        log_prices = np.log(np.maximum(close_prices, 1e-6))
        log_returns = np.diff(log_prices)
        if len(log_returns) < financial_config.context_length + financial_config.prediction_length:
            print(f"Insufficient log_returns ({len(log_returns)}) after diff for context/pred. Skipping fine-tune/forecast.")
            return {"status": "failed", "reason": "insufficient log_returns data", "forecast": None}

        finetuned_this_run = False
        if enable_fine_tuning and fine_tune_epochs > 0:
            X_seqs, y_seqs, finetune_data_ok = enhanced_patchtst_finetune(
                log_returns, financial_config.context_length, financial_config.prediction_length)
            if finetune_data_ok and X_seqs is not None and len(X_seqs) > 0:
                finetuned_this_run = True
                train_inputs = torch.tensor(X_seqs, dtype=torch.float32).unsqueeze(-1).to(device)
                train_targets = torch.tensor(y_seqs, dtype=torch.float32).unsqueeze(-1).to(device)
                dataset = torch.utils.data.TensorDataset(train_inputs, train_targets)
                batch_size = min(16, len(X_seqs) // 2 if len(X_seqs) >= 4 else 1)
                if batch_size == 0 and len(X_seqs) > 0: batch_size = 1

                if batch_size > 0:
                    train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True,
                                                               drop_last=True if len(X_seqs) > batch_size else False)
                    model.train()
                    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay=0.01) # Using AdamW from torch.optim
                    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=fine_tune_epochs) # Using CosineAnnealingLR from torch.optim
                    best_loss = float('inf'); patience, patience_counter = 3, 0

                    for epoch in range(fine_tune_epochs):
                        epoch_loss, num_batches = 0, 0
                        if not train_loader: break
                        for batch_inputs_data, batch_targets_data in train_loader:
                            optimizer.zero_grad()
                            outputs = model(past_values=batch_inputs_data, future_values=batch_targets_data)
                            loss = outputs.loss
                            loss.backward(); torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0); optimizer.step()
                            epoch_loss += loss.item(); num_batches += 1
                        if num_batches > 0:
                            avg_loss = epoch_loss / num_batches; scheduler.step()
                            print(f"Epoch {epoch+1}/{fine_tune_epochs} | Loss: {avg_loss:.6f} | LR: {scheduler.get_last_lr()[0]:.6f}")
                            if avg_loss < best_loss: best_loss = avg_loss; patience_counter = 0
                            else:
                                patience_counter += 1
                                if patience_counter >= patience: print(f"Early stopping at epoch {epoch+1}"); break
                        else: print(f"Epoch {epoch+1}/{fine_tune_epochs} | No batches. Stopping fine-tuning."); break
                    loss_display = f"{best_loss:.6f}" if best_loss != float('inf') else "N/A"
                    if num_batches > 0: print(f"Fine-tuning completed. Best loss: {loss_display}")
                else: print(f"Fine-tuning skipped for {symbol_name}: Not enough sequences or batch_size issue."); finetuned_this_run = False

        model.eval()
        current_model_context_length = model.config.context_length
        if len(log_returns) < current_model_context_length:
            mean_lr = np.mean(log_returns) if len(log_returns) > 0 else 0
            padding_needed = current_model_context_length - len(log_returns)
            past_returns_for_forecast = np.concatenate([np.full(padding_needed, mean_lr), log_returns])
        else:
            past_returns_for_forecast = log_returns[-current_model_context_length:]

        returns_mean = np.mean(past_returns_for_forecast); returns_std = max(np.std(past_returns_for_forecast), 1e-8)
        norm_returns_forecast_input = (past_returns_for_forecast - returns_mean) / returns_std
        past_tensor = torch.tensor(norm_returns_forecast_input, dtype=torch.float32).view(1, current_model_context_length, 1).to(device)

        with torch.no_grad(): outputs = model(past_values=past_tensor)
        fc_returns_norm = outputs.prediction_outputs.cpu().numpy().squeeze()
        if fc_returns_norm.ndim == 0: fc_returns_norm = np.array([fc_returns_norm])
        elif fc_returns_norm.ndim > 1: fc_returns_norm = fc_returns_norm.flatten()

        target_pred_len = model.config.prediction_length
        if len(fc_returns_norm) < target_pred_len:
            last_val = fc_returns_norm[-1] if len(fc_returns_norm) > 0 else 0
            fc_returns_norm = np.concatenate([fc_returns_norm, np.full(target_pred_len - len(fc_returns_norm), last_val)])
        forecast_log_returns = (fc_returns_norm[:target_pred_len] * returns_std) + returns_mean

        last_log_price = log_prices[-1]
        forecast_log_prices = last_log_price + np.cumsum(forecast_log_returns)
        forecast_prices = np.exp(forecast_log_prices)
        forecast_prices = np.maximum(forecast_prices, 0.01).tolist()

        last_actual_price = close_prices[-1]
        price_change = forecast_prices[-1] - last_actual_price
        magnitude_pct = (price_change / last_actual_price) * 100 if last_actual_price != 0 else 0
        direction = "📈 UP" if price_change > 0.001 * last_actual_price else "📉 DOWN" if price_change < -0.001 * last_actual_price else "횡보 HOLD"
        atr_val = np.nan
        if 'ATR_14' in df_clean.columns and not df_clean['ATR_14'].empty: atr_val = df_clean['ATR_14'].iloc[-1]

        return {
            "status": "success", "forecast": forecast_prices, "last_price": float(last_actual_price),
            "direction": direction, "magnitude": float(magnitude_pct), "confidence": "🟡 Medium",
            "atr_threshold": float(atr_val) if pd.notna(atr_val) else None,
            "method": f"PatchTST {'Fine-tuned' if finetuned_this_run else 'Pre-trained'} ({fine_tune_epochs} epochs attempted)",
            "model_info": {"context_length": model.config.context_length, "prediction_length": model.config.prediction_length, "fine_tuned_actually": finetuned_this_run}
        }
    except Exception as e:
        print(f"Error in PatchTST forecasting for {symbol_name}: {e}")
        traceback.print_exc()
        return {"status": "failed", "reason": f"PatchTST error: {str(e)}", "forecast": None}

def simple_ma_fallback(prices, length=5):
    if not isinstance(prices, np.ndarray): prices = np.array(prices)
    if len(prices) == 0: return np.full(length, np.nan)
    if len(prices) < 5: return np.full(length, prices[-1])
    ma_val = np.mean(prices[-5:])
    return np.full(length, ma_val)



def discover_and_rank_causal_features(
    df_features: pd.DataFrame, 
    target_col='target', 
    price_c='close', # Used to help select initial potential causes
    max_features_to_analyze=15, # Max features to select for iterative causal analysis
    symbol=""
) -> list:
    print(f"\n--- Causal Feature Discovery and Effect Estimation for {symbol} (Subset Analysis) ---")
    if not dowhy_available or CausalModel is None:
        print("DoWhy not available. Skipping causal analysis.")
        return []

    df_c = df_features.copy()
    if target_col not in df_c.columns or df_c[target_col].isnull().all():
        print(f"Target '{target_col}' missing or all NaN. Skipping causal analysis.")
        return []
    
    df_c[target_col] = pd.to_numeric(df_c[target_col], errors='coerce')

    # Identify candidate numeric features
    all_numeric_cols = [
        col for col in df_c.columns 
        if pd.api.types.is_numeric_dtype(df_c[col]) and 
           col != target_col and 
           df_c[col].notnull().any() and
           df_c[col].nunique() > 1 # Ensure feature has some variance
    ]
    if not all_numeric_cols:
        print("No suitable numeric features for causal analysis.")
        return []

    # Prepare data: Scale numeric features and handle NaNs
    df_subset_for_analysis = df_c[all_numeric_cols + [target_col]].copy()
    df_subset_for_analysis.replace([np.inf, -np.inf], np.nan, inplace=True)
    
    scaler_causal = StandardScaler()
    df_subset_for_analysis[all_numeric_cols] = scaler_causal.fit_transform(df_subset_for_analysis[all_numeric_cols])
    df_subset_for_analysis.dropna(inplace=True) # Crucial for DoWhy

    if df_subset_for_analysis.empty or df_subset_for_analysis.shape[0] < 20 or df_subset_for_analysis[target_col].nunique() < 1:
        print("Not enough data or target variation after cleaning for causal analysis.")
        return []

    # 1. Select a subset of features for deeper causal analysis (graph_feats)
    #    Using a predefined list and a fallback, similar to the old discover_causal_structure
    cwt_mean_col = f"{price_c}_cwt_mean" if f"{price_c}_cwt_mean" in df_subset_for_analysis.columns else f"close_cwt_mean"
    cwt_std_col = f"{price_c}_cwt_std" if f"{price_c}_cwt_std" in df_subset_for_analysis.columns else f"close_cwt_std"
    entropy_sample_col = f"{price_c}_entropy_sample" if f"{price_c}_entropy_sample" in df_subset_for_analysis.columns else f"close_entropy_sample"
    
    # Curated list of potentially interesting features
    potential_causes_list = [
        'RSI_14', 'MACDh_12_26_9', 'ADX_14', 'ATR_14', 
        cwt_mean_col, cwt_std_col, entropy_sample_col, 
        'regime_simple', 'volatility_20', 'log_returns', 
        'BBP_2020', 'BBB_2020', # Check actual sanitized names
        'close_trans_seq_volatility', 'close_trans_seq_autocorr1' # From conceptual transformer
    ]
    
    # Filter this list to only include features present in the (cleaned, scaled) df_subset_for_analysis
    graph_feats_subset = [
        c for c in potential_causes_list 
        if c in df_subset_for_analysis.columns and 
           c != target_col and 
           df_subset_for_analysis[c].nunique() > 1
    ]

    if not graph_feats_subset:
        print("Predefined potential causes not found or lack variance. Falling back to top varying features.")
        # Fallback: select top N varying features from all_numeric_cols present in df_subset_for_analysis
        available_features_for_fallback = [c for c in all_numeric_cols if c in df_subset_for_analysis.columns]
        if available_features_for_fallback:
            num_to_select = min(max_features_to_analyze, len(available_features_for_fallback))
            graph_feats_subset = df_subset_for_analysis[available_features_for_fallback].var().nlargest(num_to_select).index.tolist()
        else:
            print("No features available for fallback selection.")
            return []
    else:
        # If predefined list yields too many, cap it
        if len(graph_feats_subset) > max_features_to_analyze:
            print(f"Capping predefined graph features from {len(graph_feats_subset)} to {max_features_to_analyze}.")
            # This selection could be smarter (e.g. based on some preliminary importance)
            # For now, just take the first `max_features_to_analyze`
            graph_feats_subset = graph_feats_subset[:max_features_to_analyze]


    if not graph_feats_subset:
        print("No features selected for iterative causal effect estimation.")
        return []

    print(f"Iteratively estimating causal effects for {len(graph_feats_subset)} selected features: {graph_feats_subset}")

    feature_effects = {}
    # Data to be used for all models will be the subset containing only the graph_feats_subset and the target
    data_for_iterative_models = df_subset_for_analysis[graph_feats_subset + [target_col]].copy()


    for treatment_var in graph_feats_subset:
        # Define common causes: all other features in graph_feats_subset *excluding* the current treatment_var
        common_causes = [f for f in graph_feats_subset if f != treatment_var]
        
        # Construct a graph string for this specific treatment
        # Assumes common causes can affect treatment, and both common causes and treatment can affect outcome
        graph_dot_str = "digraph { "
        graph_dot_str += f'"{treatment_var}" -> "{target_col}"; ' # Treatment -> Outcome
        for cc in common_causes:
            graph_dot_str += f'"{cc}" -> "{treatment_var}"; '    # Common Cause -> Treatment
            graph_dot_str += f'"{cc}" -> "{target_col}"; '       # Common Cause -> Outcome
        graph_dot_str += "}"
        
        # print(f"  Testing {treatment_var} -> {target_col} with graph: {graph_dot_str}") # Can be verbose

        try:
            model = CausalModel(
                data=data_for_iterative_models, # Use the consistent, cleaned, scaled subset
                treatment=treatment_var,
                outcome=target_col,
                graph=graph_dot_str
            )
            identified_estimand = model.identify_effect(proceed_when_unidentifiable=True)
            estimate = model.estimate_effect(
                identified_estimand,
                method_name="backdoor.linear_regression", # Simple and relatively fast
                test_significance=False,
                # No need for force_univariate if graph is well-defined for the backdoor
            )
            if estimate is not None and hasattr(estimate, 'value') and not np.isnan(estimate.value):
                feature_effects[treatment_var] = abs(estimate.value)
                # print(f"    Estimated effect for {treatment_var}: {estimate.value:.4f}") # Verbose
            else:
                # print(f"    Could not estimate effect for {treatment_var} or effect was NaN.") # Verbose
                pass
        except Exception as e:
            # print(f"    Error estimating effect for {treatment_var}: {e}") # Verbose
            pass # Continue to the next feature

    if not feature_effects:
        print("Causal effect estimation did not yield any valid effects for the selected feature subset.")
        return []
    
    # Sort features by the absolute estimated effect strength
    sorted_features_by_effect = sorted(feature_effects.items(), key=lambda x: x[1], reverse=True)
    
    print(f"Causal features ranked by absolute effect strength (top {min(5, len(sorted_features_by_effect))} of {len(sorted_features_by_effect)}):")
    for feat, val in sorted_features_by_effect[:min(5, len(sorted_features_by_effect))]:
        print(f"  {feat}: {val:.4f}")
        
    return sorted_features_by_effect # Returns list of (feature_name, abs_effect_value)




# --- MAIN WORKFLOW FUNCTION ---
def run_full_workflow(symbol=DEFAULT_SYMBOL, start_date_str=START_DATE, end_date_str=END_DATE,
                      api_key_val=API_KEY, # force_train_autoformer is removed
                      run_optuna_lgbm=False, use_foundation_model=True,
                      enable_smote=True): # Added enable_smote flag
    print(f"\n{'='*40}\n🚀 ENHANCED WORKFLOW FOR: {symbol}\n{'='*40}")
    default_return = {
        "symbol": symbol, "status": "Workflow Started", "raw_data_shape": (0,0),
        "featured_data_shape": (0,0), "X_train_shape": (0,0), "X_test_shape": (0,0),
        "selected_features_count": 0, "selected_feature_names": [], "scaler_object": None,
        "ml_model_object": None, "lgbm_feature_importance": None,
        "causal_model_object": None, "causal_feature_ranking_list": [], # Changed name for clarity
        "onnx_model_path": None,
        "forecasting_results": {
            "configured_context_length": 512,
            "patchtst_forecast": None,
        },
        "lgbm_optimized_params": None
    }
    price_c, target_col = 'close', 'target'

    df_raw = fetch_twelve_data(symbol, api_key_val, start_date_str=start_date_str, end_date_str=end_date_str)
    if df_raw is None or df_raw.empty:
        default_return["status"] = "Data Fetching Failed"
        print(f"Workflow aborted for {symbol}: Data Fetching Failed.")
        return default_return
    default_return["raw_data_shape"] = df_raw.shape

    dynamic_context_length = 512
    if not df_raw.empty:
        context_configs = configure_extended_context(data_length=len(df_raw))
        dynamic_context_length = context_configs.get('adaptive', 512)
    default_return["forecasting_results"]["configured_context_length"] = dynamic_context_length

    print(f"\n--- 🔧 Feature Engineering: {symbol} ---")
    df_f = df_raw.copy()
    # ... (all your add_feature functions calls)
    df_f = add_technical_indicators(df_f)
    df_f = add_optimized_features(df_f, price_col=price_c, volume_col='volume')
    df_f = add_wavelet_features(df_f, column=price_c)
    df_f = add_entropy_features(df_f, column=price_c, window=40)
    df_f = add_advanced_technical_features(df_f, price_col=price_c, high_col='high', low_col='low', volume_col='volume')
    df_f = add_transformer_features_conceptual(df_f, column=price_c, sequence_length=20)
    df_f = add_multitimeframe_features(df_f, price_col=price_c)
    if 'log_returns' in df_f.columns:
        df_f = add_regime_features(df_f, returns_col='log_returns', price_col=price_c)
    else:
        print(f"Skipping regime features for {symbol} as 'log_returns' column is missing.")
    if 'RSI_14' in df_f.columns and 'ADX_14' in df_f.columns:
        df_f['RSI_ADX_interaction'] = df_f['RSI_14'] * df_f['ADX_14'] / 100.0
    if 'ATR_14' in df_f.columns and 'volatility_20' in df_f.columns:
        volatility_safe = df_f['volatility_20'].replace(0, np.nan)
        df_f['ATR_vol_ratio'] = df_f['ATR_14'] / volatility_safe
    default_return["featured_data_shape"] = df_f.shape

    print(f"\n--- Data Cleaning (Inf/NaN Handling & Imputation): {symbol} ---")
    df_f.replace([np.inf, -np.inf], np.nan, inplace=True)
    numeric_cols_to_impute = df_f.select_dtypes(include=np.number).columns
    if not numeric_cols_to_impute.empty:
        nan_counts_before = df_f[numeric_cols_to_impute].isnull().sum().sum()
        if nan_counts_before > 0:
            print(f"NaNs before imputation: {nan_counts_before}")
            df_f[numeric_cols_to_impute] = df_f[numeric_cols_to_impute].interpolate(method='linear', limit_direction='both', axis=0)
            df_f[numeric_cols_to_impute] = df_f[numeric_cols_to_impute].bfill().ffill().fillna(0)
            print(f"NaNs after imputation: {df_f[numeric_cols_to_impute].isnull().sum().sum()}")
    
    print(f"\n--- 📊 Regime Detection (Simplified): {symbol} ---")
    df_f = detect_regimes_simple(df_f, column=price_c)
    print(f"\n--- 🎯 Target Definition: {symbol} ---")
    df_f = balanced_target_definition(df_f, column=price_c, periods=5)

    # --- Causal Discovery (Conditional) ---
    #causal_model_obj = None
    # This will store the list of (feature, score) tuples from causal analysis
    # For now, score will be a dummy 1.0 if feature is in the causal graph
    causal_feature_ranking_for_selection = [] 
    # discovered_causal_graph_features = []

    if not SKIP_CAUSAL_ANALYSIS_FOR_DEBUGGING:
        if df_f is not None and not df_f.empty and target_col in df_f.columns and df_f[target_col].nunique(dropna=True) > 1:
            # Call the new unified function
            causal_feature_ranking_for_selection = discover_and_rank_causal_features(
                df_features=df_f.copy(), # Pass the full featured dataframe
                target_col=target_col,
                price_c=price_c,
                max_features_to_analyze=15, # Adjust this to control runtime vs. depth
                symbol=symbol
            )
        else:
            print(f"Skipping Causal Discovery for {symbol} due to data/target issues.")
    else:
        print(f"\n--- SKIPPING Causal Discovery & Ranking for {symbol} (DEBUG MODE) ---")
    
    # Store the ranked list (which might be empty if causal analysis was skipped or failed)
    default_return["causal_feature_ranking_list"] = causal_feature_ranking_for_selection


    print(f"\n--- ML Preparation & Feature Selection: {symbol} ---")
    X_tr, X_te, y_tr, y_te, scaler_obj = None, None, None, None, None
    sel_feat_names = []
    ml_model = None
    lgbm_feat_imp_df = None
    onnx_file_path = None
    current_status_ml = "ML Prep Incomplete"

    if df_f is None or df_f.empty or target_col not in df_f.columns or df_f[target_col].isnull().all():
        current_status_ml = "ML Prep Failed - DataFrame empty, target missing, or target all NaN"
        print(f"{current_status_ml} for {symbol}.")
    elif df_f[target_col].nunique(dropna=True) <= 1:
        unique_vals_count = df_f[target_col].nunique(dropna=True)
        current_status_ml = f"ML Prep Skipped - Target has {unique_vals_count} unique non-NaN value(s). Training not meaningful."
        print(f"{current_status_ml} for {symbol}.")
    else:
        ml_data_prep_output = prepare_ml_data(df_f.copy(), target_col=target_col, test_split_size=0.15, min_test_samples=30)

        if ml_data_prep_output is None or not all(item is not None for item in ml_data_prep_output[:4]): # Check X/y sets
            current_status_ml = "ML Data Preparation Failed or returned insufficient data."
            print(f"{current_status_ml} for {symbol}. Skipping subsequent ML steps.")
        else:
            X_tr, X_te, y_tr, y_te, scaler_obj = ml_data_prep_output
            default_return["scaler_object"] = scaler_obj
            default_return["X_train_shape"] = X_tr.shape if X_tr is not None else (0,0)
            default_return["X_test_shape"] = X_te.shape if X_te is not None else (0,0)

            if X_tr is None or X_tr.empty or y_tr is None or y_tr.empty:
                current_status_ml = "ML Training Data (X_tr or y_tr) is empty after preparation. Skipping training."
                print(f"{current_status_ml} for {symbol}.")
            else:
                # Pass the causal_feature_ranking_for_selection to prioritized_feature_selection
                # If causal discovery was skipped, this list will be empty, and MI will be used.
                selected_features_df = prioritized_feature_selection(
                    X_tr.copy(), 
                    y_tr.copy(), 
                    causal_feature_ranking_for_selection, # MODIFIED: Pass the causal ranking
                    max_features=35
                )
                # ... (rest of feature selection fallback logic remains the same) ...
                if selected_features_df is not None and 'Feature' in selected_features_df.columns and not selected_features_df.empty:
                    sel_feat_names = selected_features_df['Feature'].tolist()
                    # ... (fallback logic as before) ...
                else: # Fallback if prioritized selection itself fails
                    sel_feat_df_fallback = simple_feature_selection_fallback(X_tr.copy(), y_tr.copy(), max_features=20)
                    if sel_feat_df_fallback is not None and 'Feature' in sel_feat_df_fallback.columns:
                        sel_feat_names = sel_feat_df_fallback['Feature'].tolist()
                    else: sel_feat_names = X_tr.columns[:20].tolist() if not X_tr.empty else []
                
                default_return["selected_feature_names"] = sel_feat_names
                default_return["selected_features_count"] = len(sel_feat_names)
                print(f"Selected {len(sel_feat_names)} features: {sel_feat_names[:10]}...")

                if not sel_feat_names:
                    current_status_ml = "No features selected. Skipping LightGBM training."
                    print(current_status_ml)
                else:
                    X_tr_selected = X_tr[sel_feat_names].copy()
                    X_te_selected = X_te[sel_feat_names].copy() if X_te is not None and not X_te.empty and all(f in X_te.columns for f in sel_feat_names) else pd.DataFrame()

                    # --- SMOTE Integration ---
                    X_train_for_model = X_tr_selected
                    y_train_for_model = y_tr

                    if enable_smote and imblearn_available and y_tr.nunique() == 2: # SMOTE for binary classification
                        print(f"Class distribution before SMOTE: \n{y_tr.value_counts(normalize=True)}")
                        try:
                            smote = SMOTE(random_state=42)
                            X_train_for_model, y_train_for_model = smote.fit_resample(X_tr_selected, y_tr)
                            print(f"Class distribution after SMOTE: \n{pd.Series(y_train_for_model).value_counts(normalize=True)}")
                            print(f"Shape of X_train after SMOTE: {X_train_for_model.shape}")
                        except Exception as e_smote:
                            print(f"Error during SMOTE: {e_smote}. Using original data.")
                            # Fallback to original data if SMOTE fails
                            X_train_for_model = X_tr_selected
                            y_train_for_model = y_tr
                    elif enable_smote and not imblearn_available:
                        print("SMOTE enabled but imblearn not available. Using original data.")
                    elif enable_smote and y_tr.nunique() != 2:
                        print("SMOTE enabled but target is not binary. Using original data.")
                    # --- End SMOTE Integration ---

                    num_classes_for_optuna = y_train_for_model.nunique() # Use y_train_for_model
                    optuna_base_params = optimized_lightgbm_params() 
                    optuna_base_params['objective'] = 'multiclass' if num_classes_for_optuna > 2 else 'binary'
                    optuna_base_params['metric'] = 'multi_logloss' if num_classes_for_optuna > 2 else 'binary_logloss'
                    if num_classes_for_optuna > 2:
                        optuna_base_params['num_class'] = num_classes_for_optuna
                    elif 'num_class' in optuna_base_params:
                        del optuna_base_params['num_class']
                    
                    lgbm_final_params = optuna_base_params.copy()

                    if run_optuna_lgbm and optuna_available:
                        print(f"\n--- Hyperparameter Optimization (Optuna for LightGBM): {symbol} ---")
                        tuned_params_from_optuna = optimize_lgbm_hyperparameters(
                            X_train_for_model.copy(), # Use SMOTE'd or original data
                            y_train_for_model.copy(), 
                            optuna_base_params, 
                            n_trials=50
                        )
                        if tuned_params_from_optuna:
                           lgbm_final_params.update(tuned_params_from_optuna)
                        default_return["lgbm_optimized_params"] = lgbm_final_params
                    else:
                         print("Optuna HPO for LightGBM skipped or Optuna not available.")
                         default_return["lgbm_optimized_params"] = lgbm_final_params

                    print(f"\n--- LightGBM Model Training: {symbol} ---")
                    ml_model, lgbm_feat_imp_df = train_lightgbm_model(
                        X_train_for_model, # Use SMOTE'd or original data
                        y_train_for_model, 
                        X_te_selected, 
                        y_te, 
                        optimized_params=lgbm_final_params
                    )
                    # ... (rest of ML model handling)
                    default_return["ml_model_object"] = ml_model
                    default_return["lgbm_feature_importance"] = lgbm_feat_imp_df
                    if ml_model:
                        current_status_ml = "LightGBM Model Trained"
                        plot_feature_importance(lgbm_feat_imp_df, top_n=20, symbol_for_plot=symbol)
                        if not X_train_for_model.empty: # Use X_train_for_model for sample
                             onnx_file_path = export_lgbm_to_onnx(ml_model, X_train_for_model.head(1), file_path=f"lgbm_model_{symbol}.onnx")
                             default_return["onnx_model_path"] = onnx_file_path
                        else:
                            print("X_train_for_model is empty. Skipping ONNX export.")
                    else:
                        current_status_ml = "LightGBM Model Training Failed."
                        print(current_status_ml)
    default_return["status"] = current_status_ml
    
    # ... (PatchTST forecasting section remains the same) ...
    if use_foundation_model and torch_available and 'PatchTSTForPrediction' in globals():
        print(f"\n--- Foundation Model Forecasting (PatchTST): {symbol} ---")
        # ... (PatchTST call) ...
    else:
        default_return["forecasting_results"]["patchtst_forecast"] = {"status": "skipped", "reason": "Disabled or dependencies missing"}

    print(f"\n🏁 Workflow completed for {symbol}. Final Status: {default_return['status']}")
    return default_return

# --- Example Usage and Main Execution ---
if __name__ == "__main__":
    print("DEBUG: Script execution started, entering __main__ block.")
    start_time = time.time()
    
    current_api_key = API_KEY 

    if current_api_key == "YOUR_API_KEY_HERE" or not current_api_key:
        print("🛑 CRITICAL: TWELVE_DATA_API_KEY is not set. Update 'YOUR_API_KEY_HERE' in the script constants or set API_KEY directly.")
        print("🛑 CRITICAL: Workflow cannot proceed without a valid API key.")
    else:
        print(f"DEBUG: API_KEY appears to be set. Proceeding. Key ends with '...{current_api_key[-4:] if len(current_api_key)>4 else current_api_key}'")

    symbols_to_run = ["AAPL", "GOOGL"]
    print(f"DEBUG: Symbols to process: {symbols_to_run}")
    all_results_dict = {}

    if current_api_key == "YOUR_API_KEY_HERE" or not current_api_key:
        print("DEBUG: Halting before loop due to missing API Key.")
    else:
        for sym_item in symbols_to_run:
            print(f"DEBUG: Starting main loop for symbol: {sym_item}")
            try:
                workflow_output = run_full_workflow(
                    symbol=sym_item,
                    api_key_val=current_api_key, 
                    run_optuna_lgbm=True, 
                    use_foundation_model=True,
                    enable_smote=True # SMOTE is now enabled by default
                )
                # ... (rest of the main loop for printing results) ...
                all_results_dict[sym_item] = workflow_output
                print(f"\n--- Results Summary for {sym_item} ---")
                if workflow_output:
                    print(f"  Overall Status: {workflow_output.get('status')}")
                    # ... (other print statements for summary) ...
                    lgbm_feat_imp = workflow_output.get("lgbm_feature_importance")
                    if lgbm_feat_imp is not None and not lgbm_feat_imp.empty:
                        print(f"  Top LGBM Features: {lgbm_feat_imp['Feature'].head(3).tolist()}")
                    forecast_summary = workflow_output.get("forecasting_results", {})
                    patchtst_info = forecast_summary.get("patchtst_forecast")
                    if patchtst_info and isinstance(patchtst_info, dict) and patchtst_info.get("status") == "success":
                        print(f"  PatchTST Forecast ({patchtst_info.get('method', 'N/A')}):")
                        print(f"    Values: {patchtst_info.get('forecast')}")
                        print(f"    Direction: {patchtst_info.get('direction')}, Magnitude: {patchtst_info.get('magnitude', 0):.2f}%")
                    elif patchtst_info and isinstance(patchtst_info, dict):
                        print(f"  PatchTST Status: {patchtst_info.get('status')}, Reason: {patchtst_info.get('reason')}")
                    print(f"  ONNX Model Path: {workflow_output.get('onnx_model_path')}")
                print("-" * 40)

            except Exception as e_main_loop:
                print(f"🛑 ERROR: Unhandled exception in main loop for symbol {sym_item}: {e_main_loop}")
                traceback.print_exc()
                all_results_dict[sym_item] = {"status": f"Error: {e_main_loop}", "symbol": sym_item}

    end_time = time.time()
    print(f"\nTotal execution time for {len(symbols_to_run)} symbol(s): {(end_time - start_time):.2f} seconds.")
    print("DEBUG: Script __main__ block finished.")


# --- Example Usage and Main Execution ---
if __name__ == "__main__":
    print("DEBUG: Script execution started, entering __main__ block.")
    start_time = time.time()
    
    current_api_key = API_KEY 

    if current_api_key == "YOUR_API_KEY_HERE" or not current_api_key:
        print("🛑 CRITICAL: TWELVE_DATA_API_KEY is not set. Update 'YOUR_API_KEY_HERE' in the script constants or set API_KEY directly.")
        print("🛑 CRITICAL: Workflow cannot proceed without a valid API key.")
    else:
        print(f"DEBUG: API_KEY appears to be set. Proceeding. Key ends with '...{current_api_key[-4:] if len(current_api_key)>4 else current_api_key}'")

    symbols_to_run = ["AAPL", "GOOGL"]
    print(f"DEBUG: Symbols to process: {symbols_to_run}")
    all_results_dict = {}

    if current_api_key == "YOUR_API_KEY_HERE" or not current_api_key:
        print("DEBUG: Halting before loop due to missing API Key.")
    else:
        for sym_item in symbols_to_run:
            print(f"DEBUG: Starting main loop for symbol: {sym_item}")
            try:
                workflow_output = run_full_workflow(
                    symbol=sym_item,
                    api_key_val=current_api_key, 
                    run_optuna_lgbm=True, 
                    # force_train_autoformer flag removed
                    use_foundation_model=True 
                )
                all_results_dict[sym_item] = workflow_output
                print(f"\n--- Results Summary for {sym_item} ---")
                if workflow_output:
                    print(f"  Overall Status: {workflow_output.get('status')}")
                    print(f"  Raw Data Shape: {workflow_output.get('raw_data_shape')}")
                    print(f"  Featured Data Shape: {workflow_output.get('featured_data_shape')}")
                    print(f"  Selected Features Count: {workflow_output.get('selected_features_count')}")
                    
                    lgbm_feat_imp = workflow_output.get("lgbm_feature_importance")
                    if lgbm_feat_imp is not None and not lgbm_feat_imp.empty:
                        print(f"  Top LGBM Features: {lgbm_feat_imp['Feature'].head(3).tolist()}")
                    
                    forecast_summary = workflow_output.get("forecasting_results", {})
                    patchtst_info = forecast_summary.get("patchtst_forecast")
                    # autoformer_info retrieval removed
                    
                    if patchtst_info and isinstance(patchtst_info, dict) and patchtst_info.get("status") == "success":
                        print(f"  PatchTST Forecast ({patchtst_info.get('method', 'N/A')}):")
                        print(f"    Values: {patchtst_info.get('forecast')}")
                        print(f"    Direction: {patchtst_info.get('direction')}, Magnitude: {patchtst_info.get('magnitude', 0):.2f}%")
                    elif patchtst_info and isinstance(patchtst_info, dict):
                        print(f"  PatchTST Status: {patchtst_info.get('status')}, Reason: {patchtst_info.get('reason')}")
                    
                    # Autoformer forecast printing removed
                    
                    print(f"  ONNX Model Path: {workflow_output.get('onnx_model_path')}")
                print("-" * 40)

            except Exception as e_main_loop:
                print(f"🛑 ERROR: Unhandled exception in main loop for symbol {sym_item}: {e_main_loop}")
                traceback.print_exc()
                all_results_dict[sym_item] = {"status": f"Error: {e_main_loop}", "symbol": sym_item}

    end_time = time.time()
    print(f"\nTotal execution time for {len(symbols_to_run)} symbol(s): {(end_time - start_time):.2f} seconds.")
    print("DEBUG: Script __main__ block finished.")



Optuna imported successfully.
PyTorch imported successfully.
PyTorch CUDA available: True, Version: 12.1
Using PyTorch on GPU: NVIDIA GeForce RTX 4070 Laptop GPU
imblearn (for SMOTE) imported successfully.
DoWhy 0.10 and NetworkX 3.1 imported successfully.
ONNX, ONNXRuntime, skl2onnx, and onnxmltools imported successfully.
Onnxmltools version: 1.11.1

All libraries and modules conditional imports attempted.
DEBUG: Script execution started, entering __main__ block.
DEBUG: API_KEY appears to be set. Proceeding. Key ends with '...ef0a'
DEBUG: Symbols to process: ['AAPL', 'GOOGL']
DEBUG: Starting main loop for symbol: AAPL

🚀 ENHANCED WORKFLOW FOR: AAPL
Fetching data for AAPL from Twelve Data (interval=1day, from 2022-06-04 to 2025-06-03)...
Successfully fetched/processed 750 data points for AAPL.

--- 🔧 Feature Engineering: AAPL ---
Adding technical indicators...


 1.09688179e+10 9.95967086e+09 9.95940605e+09 1.25755687e+10
 9.97233035e+09 9.79635737e+09 1.02719769e+10 1.05694694e+10
 9.63284651e+09 9.45818898e+09 1.13631385e+10 1.14780874e+10
 1.14194116e+10 1.24235717e+10 9.87757346e+09 1.00419950e+10
 1.03075722e+10 1.22197531e+10 1.27330542e+10 1.64772630e+10
 1.09881365e+10 1.35716587e+10 9.19816034e+09 9.98346202e+09
 1.18232614e+10 9.67097850e+09 1.16499329e+10 9.33857157e+09
 9.74196305e+09 1.38739808e+10 8.68182651e+09 1.20859660e+10
 1.35908341e+10 1.06564850e+10 1.70375068e+10 1.24421969e+10
 1.68017624e+10 1.41309629e+10 1.28473936e+10 1.61259518e+10
 1.27816217e+10 1.15707123e+10 9.99105288e+09 1.58872146e+10
 1.24359106e+10 1.20936924e+10 1.42446076e+10 9.28966227e+09
 1.26294602e+10 1.12878711e+10 1.13142214e+10 2.53217851e+10
 1.25398382e+10 1.71641023e+10 1.39115016e+10 1.09080380e+10
 1.35458486e+10 1.20141956e+10 1.13232148e+10 7.72796261e+09
 8.78874185e+09 1.62356225e+10 1.05462467e+10 1.01707065e+10
 8.84851958e+09 1.087248


--- Data Cleaning (Inf/NaN Handling & Imputation): AAPL ---
NaNs before imputation: 1858
NaNs after imputation: 0

--- 📊 Regime Detection (Simplified): AAPL ---
Simple Regimes (0:Med,1:Low,2:High):
regime_simple
0    34.666667
1    32.666667
2    32.666667
Name: proportion, dtype: float64 %

--- 🎯 Target Definition: AAPL ---
Target distribution:
target
0    55.333333
1    44.666667
Name: proportion, dtype: float64

--- Causal Feature Discovery and Effect Estimation for AAPL (Subset Analysis) ---
Iteratively estimating causal effects for 11 selected features: ['RSI_14', 'MACDh_12_26_9', 'ADX_14', 'close_cwt_mean', 'close_cwt_std', 'close_entropy_sample', 'regime_simple', 'volatility_20', 'log_returns', 'close_trans_seq_volatility', 'close_trans_seq_autocorr1']


  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
[I 2025-06-03 16:35:50,170] A new study created in memory with name: no-name-546191a1-224b-4f65-a453-95eaf9771b8c


Causal features ranked by absolute effect strength (top 5 of 11):
  close_trans_seq_volatility: 0.1427
  close_trans_seq_autocorr1: 0.0892
  close_cwt_std: 0.0830
  close_entropy_sample: 0.0773
  close_cwt_mean: 0.0707

--- ML Preparation & Feature Selection: AAPL ---
Train shapes: X_train=(638, 100), y_train=(638,); Test shapes: X_test=(112, 100), y_test=(112,)
Selected 35 features: ['close_trans_seq_volatility', 'close_trans_seq_autocorr1', 'close_cwt_std', 'close_entropy_sample', 'close_cwt_mean', 'RSI_14', 'regime_simple', 'MACDh_12_26_9', 'log_returns', 'volatility_20']...
Class distribution before SMOTE: 
target
0    0.532915
1    0.467085
Name: proportion, dtype: float64
Class distribution after SMOTE: 
target
0    0.5
1    0.5
Name: proportion, dtype: float64
Shape of X_train after SMOTE: (680, 35)

--- Hyperparameter Optimization (Optuna for LightGBM): AAPL ---
Optimizing LightGBM HPs with Optuna (50 trials)...


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-06-03 16:35:50,232] Trial 0 finished with value: 0.6805048667101096 and parameters: {'n_estimators': 950, 'learning_rate': 0.09396999915221718, 'num_leaves': 48, 'max_depth': 5, 'min_child_samples': 44, 'feature_fraction': 0.6223679220139575, 'bagging_fraction': 0.6473601264979502, 'bagging_freq': 4, 'reg_alpha': 0.015703651952627976, 'reg_lambda': 0.021634232531815352, 'min_gain_to_split': 0.06724190361724207}. Best is trial 0 with value: 0.6805048667101096.
[I 2025-06-03 16:35:50,391] Trial 1 finished with value: 0.6552185630366003 and parameters: {'n_estimators': 100, 'learning_rate': 0.024630503408160198, 'num_leaves': 36, 'max_depth': -1, 'min_child_samples': 12, 'feature_fraction': 0.8502137177199772, 'bagging_fraction': 0.8595930713409534, 'bagging_freq': 5, 'reg_alpha': 0.35320092522598445, 'reg_lambda': 0.021427036605378105, 'min_gain_to_split': 0.05055630606496985}. Best is trial 1 with value: 0.6552185630366003.
[I 2025-06-03 16:35:50,452] Trial 2 finished with value

The maximum opset needed by this model is only 9.


Feature importance plot saved to feature_importance_AAPL.png

Exporting LGBM model to ONNX: lgbm_model_AAPL.onnx (opset=12)
Model exported to ONNX: lgbm_model_AAPL.onnx
ONNX model check OK.

--- Foundation Model Forecasting (PatchTST): AAPL ---

🏁 Workflow completed for AAPL. Final Status: LightGBM Model Trained

--- Results Summary for AAPL ---
  Overall Status: LightGBM Model Trained
  Top LGBM Features: ['ADX_14', 'MACDh_12_26_9', 'close_trans_seq_volatility']
  ONNX Model Path: lgbm_model_AAPL.onnx
----------------------------------------
DEBUG: Starting main loop for symbol: GOOGL

🚀 ENHANCED WORKFLOW FOR: GOOGL
Fetching data for GOOGL from Twelve Data (interval=1day, from 2022-06-04 to 2025-06-03)...
Successfully fetched/processed 750 data points for GOOGL.

--- 🔧 Feature Engineering: GOOGL ---
Adding technical indicators...


 3.43865086e+09 3.17167519e+09 4.78883455e+09 4.24405295e+09
 4.35871985e+09 4.26272497e+09 4.81415547e+09 4.53681124e+09
 5.23540262e+09 4.10838675e+09 4.05583490e+09 6.68380011e+09
 3.71943344e+09 4.29421548e+09 3.34559263e+09 2.54813406e+09
 2.26042461e+09 3.70164717e+09 2.30950652e+09 2.37180236e+09
 1.96404962e+09 3.09653760e+09 2.52002904e+09 2.55685047e+09
 2.54479800e+09 2.36472752e+09 3.22347074e+09 2.74643330e+09
 2.92220136e+09 2.26916019e+09 2.13695967e+09 3.37931161e+09
 2.48702991e+09 2.83936227e+09 2.43983518e+09 2.54650099e+09
 2.76519870e+09 4.22385634e+09 4.15080653e+09 4.37850685e+09
 3.00166951e+09 2.70625383e+09 4.81684360e+09 3.16852568e+09
 4.09870394e+09 2.87297429e+09 1.85102348e+09 4.31310047e+09
 2.90208266e+09 3.92662859e+09 2.21159676e+09 2.03844918e+09
 2.05264799e+09 2.51952407e+09 3.58487934e+09 2.57100238e+09
 2.43665204e+09 2.75096559e+09 2.40859522e+09 2.66476038e+09
 3.41994701e+09 6.13669823e+09 3.96490547e+09 2.91573786e+09
 3.35410020e+09 2.932705


--- Data Cleaning (Inf/NaN Handling & Imputation): GOOGL ---
NaNs before imputation: 1879
NaNs after imputation: 0

--- 📊 Regime Detection (Simplified): GOOGL ---
Simple Regimes (0:Med,1:Low,2:High):
regime_simple
0    34.666667
1    32.666667
2    32.666667
Name: proportion, dtype: float64 %

--- 🎯 Target Definition: GOOGL ---
Target distribution:
target
0    55.333333
1    44.666667
Name: proportion, dtype: float64

--- Causal Feature Discovery and Effect Estimation for GOOGL (Subset Analysis) ---
Iteratively estimating causal effects for 11 selected features: ['RSI_14', 'MACDh_12_26_9', 'ADX_14', 'close_cwt_mean', 'close_cwt_std', 'close_entropy_sample', 'regime_simple', 'volatility_20', 'log_returns', 'close_trans_seq_volatility', 'close_trans_seq_autocorr1']


  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
[I 2025-06-03 16:36:01,791] A new study created in memory with name: no-name-fcc34a5c-7051-4142-af33-91f37187bd8e


Causal features ranked by absolute effect strength (top 5 of 11):
  close_trans_seq_volatility: 0.2783
  close_trans_seq_autocorr1: 0.2377
  RSI_14: 0.1078
  volatility_20: 0.0659
  MACDh_12_26_9: 0.0634

--- ML Preparation & Feature Selection: GOOGL ---
Train shapes: X_train=(638, 100), y_train=(638,); Test shapes: X_test=(112, 100), y_test=(112,)
Selected 35 features: ['close_trans_seq_volatility', 'close_trans_seq_autocorr1', 'RSI_14', 'volatility_20', 'MACDh_12_26_9', 'close_cwt_std', 'close_cwt_mean', 'ADX_14', 'close_entropy_sample', 'log_returns']...
Class distribution before SMOTE: 
target
0    0.543887
1    0.456113
Name: proportion, dtype: float64
Class distribution after SMOTE: 
target
0    0.5
1    0.5
Name: proportion, dtype: float64
Shape of X_train after SMOTE: (694, 35)

--- Hyperparameter Optimization (Optuna for LightGBM): GOOGL ---
Optimizing LightGBM HPs with Optuna (50 trials)...


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-06-03 16:36:02,175] Trial 0 finished with value: 0.5240301272528531 and parameters: {'n_estimators': 100, 'learning_rate': 0.014323961151037607, 'num_leaves': 45, 'max_depth': 0, 'min_child_samples': 9, 'feature_fraction': 0.5986152731636356, 'bagging_fraction': 0.8190686123306552, 'bagging_freq': 7, 'reg_alpha': 0.10854349078811594, 'reg_lambda': 0.032934774375577236, 'min_gain_to_split': 0.050811038618215426}. Best is trial 0 with value: 0.5240301272528531.
[I 2025-06-03 16:36:02,357] Trial 1 finished with value: 0.5831725429650833 and parameters: {'n_estimators': 350, 'learning_rate': 0.02070684280786433, 'num_leaves': 34, 'max_depth': 10, 'min_child_samples': 37, 'feature_fraction': 0.8381192594249507, 'bagging_fraction': 0.8861022617514336, 'bagging_freq': 4, 'reg_alpha': 0.14801071727502124, 'reg_lambda': 0.019915331065390486, 'min_gain_to_split': 0.018588434556399393}. Best is trial 0 with value: 0.5240301272528531.
[I 2025-06-03 16:36:02,430] Trial 2 finished with value

The maximum opset needed by this model is only 9.


Feature importance plot saved to feature_importance_GOOGL.png

Exporting LGBM model to ONNX: lgbm_model_GOOGL.onnx (opset=12)
Model exported to ONNX: lgbm_model_GOOGL.onnx
ONNX model check OK.

--- Foundation Model Forecasting (PatchTST): GOOGL ---

🏁 Workflow completed for GOOGL. Final Status: LightGBM Model Trained

--- Results Summary for GOOGL ---
  Overall Status: LightGBM Model Trained
  Top LGBM Features: ['RSI_14', 'close_cwt_std', 'sma_10_regime_adj']
  ONNX Model Path: lgbm_model_GOOGL.onnx
----------------------------------------

Total execution time for 2 symbol(s): 32.81 seconds.
DEBUG: Script __main__ block finished.
DEBUG: Script execution started, entering __main__ block.
DEBUG: API_KEY appears to be set. Proceeding. Key ends with '...ef0a'
DEBUG: Symbols to process: ['AAPL', 'GOOGL']
DEBUG: Starting main loop for symbol: AAPL

🚀 ENHANCED WORKFLOW FOR: AAPL
Fetching data for AAPL from Twelve Data (interval=1day, from 2022-06-04 to 2025-06-03)...
Successfully fetched/pr

 1.09688179e+10 9.95967086e+09 9.95940605e+09 1.25755687e+10
 9.97233035e+09 9.79635737e+09 1.02719769e+10 1.05694694e+10
 9.63284651e+09 9.45818898e+09 1.13631385e+10 1.14780874e+10
 1.14194116e+10 1.24235717e+10 9.87757346e+09 1.00419950e+10
 1.03075722e+10 1.22197531e+10 1.27330542e+10 1.64772630e+10
 1.09881365e+10 1.35716587e+10 9.19816034e+09 9.98346202e+09
 1.18232614e+10 9.67097850e+09 1.16499329e+10 9.33857157e+09
 9.74196305e+09 1.38739808e+10 8.68182651e+09 1.20859660e+10
 1.35908341e+10 1.06564850e+10 1.70375068e+10 1.24421969e+10
 1.68017624e+10 1.41309629e+10 1.28473936e+10 1.61259518e+10
 1.27816217e+10 1.15707123e+10 9.99105288e+09 1.58872146e+10
 1.24359106e+10 1.20936924e+10 1.42446076e+10 9.28966227e+09
 1.26294602e+10 1.12878711e+10 1.13142214e+10 2.53217851e+10
 1.25398382e+10 1.71641023e+10 1.39115016e+10 1.09080380e+10
 1.35458486e+10 1.20141956e+10 1.13232148e+10 7.72796261e+09
 8.78874185e+09 1.62356225e+10 1.05462467e+10 1.01707065e+10
 8.84851958e+09 1.087248


--- Data Cleaning (Inf/NaN Handling & Imputation): AAPL ---
NaNs before imputation: 1858
NaNs after imputation: 0

--- 📊 Regime Detection (Simplified): AAPL ---
Simple Regimes (0:Med,1:Low,2:High):
regime_simple
0    34.666667
1    32.666667
2    32.666667
Name: proportion, dtype: float64 %

--- 🎯 Target Definition: AAPL ---
Target distribution:
target
0    55.333333
1    44.666667
Name: proportion, dtype: float64

--- Causal Feature Discovery and Effect Estimation for AAPL (Subset Analysis) ---
Iteratively estimating causal effects for 11 selected features: ['RSI_14', 'MACDh_12_26_9', 'ADX_14', 'close_cwt_mean', 'close_cwt_std', 'close_entropy_sample', 'regime_simple', 'volatility_20', 'log_returns', 'close_trans_seq_volatility', 'close_trans_seq_autocorr1']


  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
[I 2025-06-03 16:36:22,963] A new study created in memory with name: no-name-1bf5c7d3-70c4-44f8-bc9b-036edd5a1d85


Causal features ranked by absolute effect strength (top 5 of 11):
  close_trans_seq_volatility: 0.1427
  close_trans_seq_autocorr1: 0.0892
  close_cwt_std: 0.0830
  close_entropy_sample: 0.0773
  close_cwt_mean: 0.0707

--- ML Preparation & Feature Selection: AAPL ---
Train shapes: X_train=(638, 100), y_train=(638,); Test shapes: X_test=(112, 100), y_test=(112,)
Selected 35 features: ['close_trans_seq_volatility', 'close_trans_seq_autocorr1', 'close_cwt_std', 'close_entropy_sample', 'close_cwt_mean', 'RSI_14', 'regime_simple', 'MACDh_12_26_9', 'log_returns', 'volatility_20']...
Class distribution before SMOTE: 
target
0    0.532915
1    0.467085
Name: proportion, dtype: float64
Class distribution after SMOTE: 
target
0    0.5
1    0.5
Name: proportion, dtype: float64
Shape of X_train after SMOTE: (680, 35)

--- Hyperparameter Optimization (Optuna for LightGBM): AAPL ---
Optimizing LightGBM HPs with Optuna (50 trials)...


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-06-03 16:36:23,196] Trial 0 finished with value: 0.6644854373311424 and parameters: {'n_estimators': 100, 'learning_rate': 0.01120862874245066, 'num_leaves': 40, 'max_depth': 5, 'min_child_samples': 9, 'feature_fraction': 0.7222061197670793, 'bagging_fraction': 0.9344899633417024, 'bagging_freq': 3, 'reg_alpha': 0.00492746658910613, 'reg_lambda': 0.07482502956214401, 'min_gain_to_split': 0.008930268274096343}. Best is trial 0 with value: 0.6644854373311424.
[I 2025-06-03 16:36:23,240] Trial 1 finished with value: 0.6833815749830187 and parameters: {'n_estimators': 650, 'learning_rate': 0.06521597429388001, 'num_leaves': 21, 'max_depth': 8, 'min_child_samples': 41, 'feature_fraction': 0.6871956600713562, 'bagging_fraction': 0.607555288933014, 'bagging_freq': 6, 'reg_alpha': 0.2807813964093168, 'reg_lambda': 0.07797322002335917, 'min_gain_to_split': 0.07023224000864338}. Best is trial 0 with value: 0.6644854373311424.
[I 2025-06-03 16:36:23,298] Trial 2 finished with value: 0.674

The maximum opset needed by this model is only 9.


Feature importance plot saved to feature_importance_AAPL.png

Exporting LGBM model to ONNX: lgbm_model_AAPL.onnx (opset=12)
Model exported to ONNX: lgbm_model_AAPL.onnx
ONNX model check OK.

--- Foundation Model Forecasting (PatchTST): AAPL ---

🏁 Workflow completed for AAPL. Final Status: LightGBM Model Trained

--- Results Summary for AAPL ---
  Overall Status: LightGBM Model Trained
  Raw Data Shape: (750, 5)
  Featured Data Shape: (750, 105)
  Selected Features Count: 35
  Top LGBM Features: ['RSI_14', 'SMA_10', 'close_quarterly_mean']
  ONNX Model Path: lgbm_model_AAPL.onnx
----------------------------------------
DEBUG: Starting main loop for symbol: GOOGL

🚀 ENHANCED WORKFLOW FOR: GOOGL
Fetching data for GOOGL from Twelve Data (interval=1day, from 2022-06-04 to 2025-06-03)...
Successfully fetched/processed 750 data points for GOOGL.

--- 🔧 Feature Engineering: GOOGL ---
Adding technical indicators...


 3.43865086e+09 3.17167519e+09 4.78883455e+09 4.24405295e+09
 4.35871985e+09 4.26272497e+09 4.81415547e+09 4.53681124e+09
 5.23540262e+09 4.10838675e+09 4.05583490e+09 6.68380011e+09
 3.71943344e+09 4.29421548e+09 3.34559263e+09 2.54813406e+09
 2.26042461e+09 3.70164717e+09 2.30950652e+09 2.37180236e+09
 1.96404962e+09 3.09653760e+09 2.52002904e+09 2.55685047e+09
 2.54479800e+09 2.36472752e+09 3.22347074e+09 2.74643330e+09
 2.92220136e+09 2.26916019e+09 2.13695967e+09 3.37931161e+09
 2.48702991e+09 2.83936227e+09 2.43983518e+09 2.54650099e+09
 2.76519870e+09 4.22385634e+09 4.15080653e+09 4.37850685e+09
 3.00166951e+09 2.70625383e+09 4.81684360e+09 3.16852568e+09
 4.09870394e+09 2.87297429e+09 1.85102348e+09 4.31310047e+09
 2.90208266e+09 3.92662859e+09 2.21159676e+09 2.03844918e+09
 2.05264799e+09 2.51952407e+09 3.58487934e+09 2.57100238e+09
 2.43665204e+09 2.75096559e+09 2.40859522e+09 2.66476038e+09
 3.41994701e+09 6.13669823e+09 3.96490547e+09 2.91573786e+09
 3.35410020e+09 2.932705


--- Data Cleaning (Inf/NaN Handling & Imputation): GOOGL ---
NaNs before imputation: 1879
NaNs after imputation: 0

--- 📊 Regime Detection (Simplified): GOOGL ---
Simple Regimes (0:Med,1:Low,2:High):
regime_simple
0    34.666667
1    32.666667
2    32.666667
Name: proportion, dtype: float64 %

--- 🎯 Target Definition: GOOGL ---
Target distribution:
target
0    55.333333
1    44.666667
Name: proportion, dtype: float64

--- Causal Feature Discovery and Effect Estimation for GOOGL (Subset Analysis) ---
Iteratively estimating causal effects for 11 selected features: ['RSI_14', 'MACDh_12_26_9', 'ADX_14', 'close_cwt_mean', 'close_cwt_std', 'close_entropy_sample', 'regime_simple', 'volatility_20', 'log_returns', 'close_trans_seq_volatility', 'close_trans_seq_autocorr1']


  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
  intercept_parameter = self.model.params[0]
[I 2025-06-03 16:36:32,421] A new study created in memory with name: no-name-8fc7b438-93bf-4a22-b992-864636e794ee


Causal features ranked by absolute effect strength (top 5 of 11):
  close_trans_seq_volatility: 0.2783
  close_trans_seq_autocorr1: 0.2377
  RSI_14: 0.1078
  volatility_20: 0.0659
  MACDh_12_26_9: 0.0634

--- ML Preparation & Feature Selection: GOOGL ---
Train shapes: X_train=(638, 100), y_train=(638,); Test shapes: X_test=(112, 100), y_test=(112,)
Selected 35 features: ['close_trans_seq_volatility', 'close_trans_seq_autocorr1', 'RSI_14', 'volatility_20', 'MACDh_12_26_9', 'close_cwt_std', 'close_cwt_mean', 'ADX_14', 'close_entropy_sample', 'log_returns']...
Class distribution before SMOTE: 
target
0    0.543887
1    0.456113
Name: proportion, dtype: float64
Class distribution after SMOTE: 
target
0    0.5
1    0.5
Name: proportion, dtype: float64
Shape of X_train after SMOTE: (694, 35)

--- Hyperparameter Optimization (Optuna for LightGBM): GOOGL ---
Optimizing LightGBM HPs with Optuna (50 trials)...


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-06-03 16:36:32,884] Trial 0 finished with value: 0.5389316759051009 and parameters: {'n_estimators': 550, 'learning_rate': 0.0402528584268907, 'num_leaves': 26, 'max_depth': 7, 'min_child_samples': 6, 'feature_fraction': 0.5865292609891545, 'bagging_fraction': 0.8552710719727215, 'bagging_freq': 6, 'reg_alpha': 0.0017747821361299824, 'reg_lambda': 0.025614252806587353, 'min_gain_to_split': 0.022601228791095107}. Best is trial 0 with value: 0.5389316759051009.
[I 2025-06-03 16:36:33,047] Trial 1 finished with value: 0.5625755063633294 and parameters: {'n_estimators': 950, 'learning_rate': 0.025348086254079954, 'num_leaves': 46, 'max_depth': 9, 'min_child_samples': 33, 'feature_fraction': 0.9753334361941939, 'bagging_fraction': 0.6530802394644688, 'bagging_freq': 3, 'reg_alpha': 0.010366330382332629, 'reg_lambda': 0.06230439324230827, 'min_gain_to_split': 0.08677302832297547}. Best is trial 0 with value: 0.5389316759051009.
[I 2025-06-03 16:36:33,137] Trial 2 finished with value:

The maximum opset needed by this model is only 9.


Feature importance plot saved to feature_importance_GOOGL.png

Exporting LGBM model to ONNX: lgbm_model_GOOGL.onnx (opset=12)
Model exported to ONNX: lgbm_model_GOOGL.onnx
ONNX model check OK.

--- Foundation Model Forecasting (PatchTST): GOOGL ---

🏁 Workflow completed for GOOGL. Final Status: LightGBM Model Trained

--- Results Summary for GOOGL ---
  Overall Status: LightGBM Model Trained
  Raw Data Shape: (750, 5)
  Featured Data Shape: (750, 105)
  Selected Features Count: 35
  Top LGBM Features: ['kurtosis_20', 'close_cwt_std', 'ADX_14']
  ONNX Model Path: lgbm_model_GOOGL.onnx
----------------------------------------

Total execution time for 2 symbol(s): 24.49 seconds.
DEBUG: Script __main__ block finished.
