In [1]:
# ========================================
# STEP 1: CROSS-PLATFORM DEPENDENCY MANAGEMENT
# ========================================

print("🔧 Setting up dependencies...")

try:
    import pandas, numpy, sklearn, xgboost, matplotlib, seaborn, joblib, tqdm
    import lightgbm as lgb
    import optuna
    try:
        from optuna.integration import LightGBMPruningCallback
    except Exception:
        LightGBMPruningCallback = None
    try:
        from optuna.importance import get_param_importances
    except Exception:
        get_param_importances = None
    print("✅ Core dependencies already available")
except ImportError as e:
    print(f"Installing missing dependencies: {e}")
    import sys, subprocess
    pkgs = [
        'pandas', 'numpy', 'scikit-learn', 'lightgbm', 'xgboost',
        'matplotlib', 'seaborn', 'joblib', 'tqdm', 'pyarrow', 'optuna'
    ]
    subprocess.check_call([sys.executable, '-m', 'pip', 'install'] + pkgs)
    import optuna
    try:
        from optuna.integration import LightGBMPruningCallback
    except Exception:
        LightGBMPruningCallback = None
    try:
        from optuna.importance import get_param_importances
    except Exception:
        get_param_importances = None
    print("✅ Dependencies installed")

# Google Drive (Colab) support ------------------------------------------------
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    IS_COLAB = True
    BASE_DIR = '/content/drive/MyDrive/spy_prediction_models'
    print("✅ Google Drive mounted → remote persistence enabled")
except ImportError:
    IS_COLAB = False
    BASE_DIR = './spy_prediction_models'
    print("✅ Local environment detected – saving locally")

# Core imports ---------------------------------------------------------------
import os, warnings, collections, json, itertools, random, time
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight
import joblib

warnings.filterwarnings('ignore')

# Paths ----------------------------------------------------------------------
DATA_DIR  = os.path.join(BASE_DIR, 'spy_data_export')
MODEL_DIR = os.path.join(BASE_DIR, 'blud')
TXT_RESULTS_PATH = os.path.join(BASE_DIR, 'brothaman.txt')

os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs('/tmp/training_chunks', exist_ok=True)
print(f"📁 MODEL_DIR  : {MODEL_DIR}")
print(f"📄 RESULTS TXT: {TXT_RESULTS_PATH}")

🔧 Setting up dependencies...
Installing missing dependencies: No module named 'optuna'
✅ Dependencies installed
Mounted at /content/drive
✅ Google Drive mounted → remote persistence enabled
📁 MODEL_DIR  : /content/drive/MyDrive/spy_prediction_models/blud
📄 RESULTS TXT: /content/drive/MyDrive/spy_prediction_models/brothaman.txt


In [2]:
# ========================================
# STEP 2: UTILITY FUNCTIONS
# ========================================
print("🛠️  Defining utility helpers …")

def left_asof(df, ts_col, target):
    pos = df[ts_col].searchsorted(target, side='right') - 1
    return None if pos < 0 else df.iloc[pos]

def build_feature_vector(raw_ohlcv, iso_ohlc, tf, tf_list):
    o, h, l, c, v = raw_ohlcv
    features = list(raw_ohlcv)                # 5
    features.extend(list(iso_ohlc))           # 4
    features.extend([1 if tf == t else 0 for t in tf_list])  # TF one-hot (len=tf_list)
    features.extend([
        (h-l)/c if c else 0,
        (c-o)/o if o else 0,
        (h-c)/c if c else 0,
        (c-l)/c if c else 0,
        v/1_000_000,
    ])
    return np.array(features)

def parse_vector_column(col):
    if pd.isna(col) or col is None:
        return None
    if isinstance(col, str):
        col = col.strip('[]"')
        try:
            return np.array([float(x.strip()) for x in col.split(',')])
        except ValueError:
            return None
    return np.array(col)

def timestamp_generator(raw_data, stop_ts, minutes=60):
    min_ts = min(df['timestamp'].min() for df in raw_data.values())
    cursor = min_ts
    delta  = pd.Timedelta(minutes=minutes)
    while cursor < stop_ts:
        yield cursor, min(cursor+delta, stop_ts)
        cursor += delta

class SampleBalancer:
    def __init__(self, max_ratio=3):
        self.max_ratio = max_ratio
        self.tf_seen   = collections.Counter()
    def should_add(self, tf):
        if tf == '4h':
            return True
        return True
    def add(self, tf):
        self.tf_seen[tf] += 1

print("✅ Utility functions ready")

🛠️  Defining utility helpers …
✅ Utility functions ready


In [3]:
# ========================================
# STEP 3: LOAD DATA FOR 1D & 4H
# ========================================
print("📊 Loading 1D and 4H CSV files …")
TIMEFRAMES_ORDERED = ['1d', '4h']
csv_files = {'1d': 'spy_1d.csv', '4h': 'spy_4h.csv'}

raw_data, date_info = {}, {}
for tf in TIMEFRAMES_ORDERED:
    fp = os.path.join(DATA_DIR, csv_files[tf])
    if not os.path.exists(fp):
        print(f"❌ {fp} missing – abort"); raise FileNotFoundError(fp)
    df = pd.read_csv(fp)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.sort_values('timestamp').reset_index(drop=True)
    raw_data[tf] = df
    date_info[tf] = {
        'start': df['timestamp'].min(),
        'end'  : df['timestamp'].max(),
        'count': len(df)
    }
    print(f"✅ {tf}: {len(df):,} candles ({date_info[tf]['start']} → {date_info[tf]['end']})")

latest_start = max(d['start'] for d in date_info.values())
earliest_end = min(d['end']   for d in date_info.values())

common_dates=set(raw_data['1d'][ (raw_data['1d']['timestamp']>=latest_start) & (raw_data['1d']['timestamp']<=earliest_end) ]['timestamp'].dt.date.unique())
common_dates &= set(raw_data['4h'][ (raw_data['4h']['timestamp']>=latest_start) & (raw_data['4h']['timestamp']<=earliest_end) ]['timestamp'].dt.date.unique())
all_days = sorted(common_dates)

TEST_DAYS = min(35, len(all_days))
selected_days = all_days[-TEST_DAYS:]

test_start = pd.Timestamp.combine(selected_days[0] , pd.Timestamp.min.time()).tz_localize('UTC')
test_end   = pd.Timestamp.combine(selected_days[-1], pd.Timestamp.max.time()).tz_localize('UTC')

print(f"🎯 Test period: {test_start.date()} → {test_end.date()}  ({TEST_DAYS} trading days)")

📊 Loading 1D and 4H CSV files …
✅ 1d: 2,547 candles (2014-12-23 14:30:00+00:00 → 2025-02-07 14:30:00+00:00)
✅ 4h: 3,058 candles (2019-01-07 14:30:00+00:00 → 2025-02-10 14:30:00+00:00)
🎯 Test period: 2024-12-17 → 2025-02-07  (35 trading days)


In [4]:
# ========================================
# STEP 4: OPTIMIZER CLASS
# ========================================
print("🤖 Building optimizer …")
from lightgbm import LGBMClassifier
class Optimizer:
    def __init__(self):
        self.scaler = StandardScaler()
        self.balancer = SampleBalancer()
        self.best = {}  # key = (model, tf)
        self._init_results_file()
        # Direction-aware tuning state -----------------------------------
        # history[(model, tf)] = deque of dicts: {params, acc, changed_params}
        self.history = collections.defaultdict(lambda: collections.deque(maxlen=300))
        # param_stats[(model, tf)][param] = {weight, jitter}
        self.param_stats = {}
        # search_state[(model, tf)] = {last_params, last_acc, stagnation_count, exploration, max_changes}
        self.search_state = {}
        # Per-timeframe scalers, training splits, and eval cache ----------
        self.scalers_tf = {}
        self.train_data = {}
        self.eval_cache = {}
        # Bayes/Optuna studies per model-tf
        self.studies = {}
    # ------------------------------------------------------------------
    def _init_results_file(self):
        with open(TXT_RESULTS_PATH, 'a') as f:
            f.write('\n'+'='*90+'\n')
            f.write(f"NEW SESSION {datetime.now():%Y-%m-%d %H:%M:%S}\n")
            f.write('='*90+'\n')
    def log_best(self, model_name, tf, params, acc):
        key=(model_name,tf)
        # If no existing accuracy recorded, always set; otherwise set only if improved
        current_acc = None
        if key in self.best:
            current_acc = self.best[key].get('acc', None)
        if (current_acc is None) or (acc > current_acc):
            self.best[key]={'params':params,'acc':acc,'time':datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
            with open(TXT_RESULTS_PATH,'a') as f:
                f.write(f"🏆 {model_name} {tf} {acc:.4f} {params}\n")
            print(f"🏆 NEW BEST {model_name} {tf}: {acc:.4f}")
    # ------------------------------------------------------------------
    def _init_param_stats_if_needed(self, model_name, tf, grid, default_jitter):
        key=(model_name, tf)
        if key not in self.param_stats:
            self.param_stats[key] = {}
        for p in grid.keys():
            if p not in self.param_stats[key]:
                self.param_stats[key][p] = {
                    'weight': 1.0,
                    'jitter': default_jitter
                }
        if key not in self.search_state:
            self.search_state[key] = {
                'last_params': None,
                'last_acc': None,
                'stagnation_count': 0,
                'exploration': 0.2,   # will be reset per model below
                'max_changes': 1      # will be reset per model below
            }

    def get_param_weights(self, model_name, tf, grid, default_jitter):
        self._init_param_stats_if_needed(model_name, tf, grid, default_jitter)
        key=(model_name, tf)
        return {p: self.param_stats[key][p]['weight'] for p in grid.keys()}

    def get_param_jitters(self, model_name, tf, grid, default_jitter):
        self._init_param_stats_if_needed(model_name, tf, grid, default_jitter)
        key=(model_name, tf)
        return {p: self.param_stats[key][p]['jitter'] for p in grid.keys()}

    def _nearly_equal(self, a, b, tol=1e-9):
        try:
            return abs(float(a) - float(b)) <= tol
        except Exception:
            return a == b

    def _changed_params(self, prev_params, curr_params):
        if prev_params is None:
            return list(curr_params.keys())
        changed=[]
        for k,v in curr_params.items():
            pv = prev_params.get(k, None)
            if isinstance(v, float) or isinstance(pv, float):
                if not self._nearly_equal(pv, v, tol=1e-8):
                    changed.append(k)
            else:
                if pv != v:
                    changed.append(k)
        return changed

    def record_result(self, model_name, tf, grid, params, acc, base_exploration, base_max_changes, default_jitter):
        key=(model_name, tf)
        self._init_param_stats_if_needed(model_name, tf, grid, default_jitter)
        state = self.search_state[key]
        last_acc = state['last_acc']
        last_params = state['last_params']

        changed = self._changed_params(last_params, params)
        delta = None if last_acc is None else (acc - last_acc)

        # Update param weights/jitters based on delta
        if delta is not None and changed:
            for p in changed:
                ps = self.param_stats[key][p]
                # Update weight with bounded additive rule
                ps['weight'] += (0.5 * delta)
                ps['weight'] = max(0.1, min(ps['weight'], 5.0))
                # Update jitter slightly toward success direction
                ps['jitter'] *= (1.0 + (0.5 * delta))
                ps['jitter'] = max(0.02, min(ps['jitter'], 0.3))

            # Stagnation detection and exploration scheduling
            if delta <= 1e-5:
                state['stagnation_count'] += 1
            else:
                state['stagnation_count'] = 0

            if state['stagnation_count'] >= 5:
                # Temporarily increase exploration and allow more changes
                state['exploration'] = min(0.5, base_exploration + 0.15)
                state['max_changes'] = min(base_max_changes + 1, max(1, len(grid)//2))
            else:
                # Anneal exploration back toward base
                state['exploration'] = max(base_exploration, state['exploration'] * 0.9)
                state['max_changes'] = max(base_max_changes, int(round(state['max_changes'] * 0.9)))

        # Append to history
        self.history[key].append({
            'params': dict(params),
            'acc': acc,
            'changed_params': changed
        })

        # Update last state
        state['last_params'] = dict(params)
        state['last_acc'] = acc

    def current_search_hyperparams(self, model_name, tf, base_exploration, base_max_changes, grid, default_jitter):
        key=(model_name, tf)
        self._init_param_stats_if_needed(model_name, tf, grid, default_jitter)
        state = self.search_state[key]
        # Initialize with base if first time
        if state['last_acc'] is None:
            state['exploration'] = base_exploration
            state['max_changes'] = base_max_changes
        return state['exploration'], state['max_changes']

    # ------------------- dataset preparation ---------------------------
    def prepare_datasets(self):
        """Build per-timeframe training/validation sets and evaluation caches."""
        self.scalers_tf = {}
        self.train_data = {}
        self.eval_cache = {}

        for tf in TIMEFRAMES_ORDERED:
            # Build training data for this timeframe only - use same approach as single script
            df_tf = raw_data[tf]
            train_df = df_tf[df_tf['timestamp'] < test_start].copy()

            if len(train_df) == 0:
                continue

            # Extract features exactly like single script
            X_list, y_list = [], []
            for _, row in train_df.iterrows():
                fv, lbl = self.extract(row, tf)
                if fv is not None:
                    X_list.append(fv)
                    y_list.append(lbl)

            if not X_list:
                continue

            X = np.array(X_list)
            y = np.array(y_list)
            split = int(len(X) * 0.8)
            scaler = StandardScaler()
            scaler.fit(X[:split])
            X_scaled = scaler.transform(X)
            X_tr, X_val = X_scaled[:split], X_scaled[split:]
            y_tr, y_val = y[:split], y[split:]
            sample_weight_tr = compute_sample_weight('balanced', y_tr)

            self.scalers_tf[tf] = scaler
            self.train_data[tf] = {
                'X_tr': X_tr, 'X_val': X_val, 'y_tr': y_tr, 'y_val': y_val,
                'sample_weight_tr': sample_weight_tr
            }

            # Build evaluation cache for this timeframe
            df_eval = raw_data[tf][(raw_data[tf]['timestamp'] >= test_start) & (raw_data[tf]['timestamp'] <= test_end)]
            X_eval, y_eval = [], []
            for _, row in df_eval.iterrows():
                fv, lbl = self.extract(row, tf)
                if fv is None:
                    continue
                xs = scaler.transform(fv.reshape(1, -1))[0]
                X_eval.append(xs)
                y_eval.append(lbl)
            if X_eval:
                self.eval_cache[tf] = {
                    'X': np.array(X_eval),
                    'y': np.array(y_eval)
                }

    # ------------------- time-series CV utils ----------------------------
    def ts_cv_splits(self, X, n_splits=4, purge_frac=0.0):
        """Simple expanding-window splits with optional purge gap (as fraction of fold size)."""
        n = len(X)
        fold = n // (n_splits + 1)
        for i in range(1, n_splits + 1):
            train_end = fold * i
            val_end = fold * (i + 1)
            purge = int(fold * purge_frac)
            train_idx_end = max(0, train_end - purge)
            tr_idx = np.arange(0, train_idx_end)
            va_idx = np.arange(train_end, val_end)
            yield tr_idx, va_idx

    # ------------------- Optuna search wrappers --------------------------
    def get_study(self, name):
        if name not in self.studies:
            self.studies[name] = optuna.create_study(
                direction='maximize',
                sampler=optuna.samplers.TPESampler(multivariate=True, group=True, n_startup_trials=8),
                pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=0)
            )
        return self.studies[name]

    def optuna_objective_gb(self, tf, grid, base_params):
        td = self.train_data.get(tf)
        if td is None:
            return lambda trial: 0.0
        X = np.vstack([td['X_tr'], td['X_val']])
        y = np.hstack([td['y_tr'], td['y_val']])

        keys = list(grid.keys())
        def suggest_from_grid(trial, k):
            vals = grid[k]
            if isinstance(vals[0], float):
                lo, hi = min(vals), max(vals)
                # use log sampling when range spans orders of magnitude (basic heuristic)
                log = (hi/lo) > 20 if lo > 0 else False
                return trial.suggest_float(k, lo, hi, log=log)
            else:
                return trial.suggest_categorical(k, vals)

        def objective(trial):
            # Start from base and perturb bounded by grid
            params = dict(base_params) if base_params else {}
            for k in keys:
                params[k] = suggest_from_grid(trial, k)

            accs = []
            for tr_idx, va_idx in self.ts_cv_splits(X, n_splits=4, purge_frac=0.1):
                X_tr, y_tr = X[tr_idx], y[tr_idx]
                X_va, y_va = X[va_idx], y[va_idx]
                sw = compute_sample_weight('balanced', y_tr)
                model = GradientBoostingClassifier(random_state=42, **params)
                model.fit(X_tr, y_tr, sample_weight=sw)
                pred = model.predict(X_va)
                accs.append(accuracy_score(y_va, pred))
            return float(np.mean(accs))
        return objective

    def optuna_objective_lgb(self, tf, grid, base_params):
        td = self.train_data.get(tf)
        if td is None:
            return lambda trial: 0.0
        X = np.vstack([td['X_tr'], td['X_val']])
        y = np.hstack([td['y_tr'], td['y_val']])

        keys = list(grid.keys())
        def suggest_from_grid(trial, k):
            vals = grid[k]
            if isinstance(vals[0], float):
                lo, hi = min(vals), max(vals)
                log = (hi/lo) > 20 if lo > 0 else False
                return trial.suggest_float(k, lo, hi, log=log)
            else:
                return trial.suggest_categorical(k, vals)

        def objective(trial):
            params = dict(base_params) if base_params else {}
            for k in keys:
                params[k] = suggest_from_grid(trial, k)

            accs = []
            for tr_idx, va_idx in self.ts_cv_splits(X, n_splits=4, purge_frac=0.1):
                X_tr, y_tr = X[tr_idx], y[tr_idx]
                X_va, y_va = X[va_idx], y[va_idx]
                model = lgb.LGBMClassifier(
                    objective='binary', boosting_type='gbdt',
                    class_weight='balanced', random_state=42, verbose=-1,
                    device_type='gpu', gpu_device_id=0,
                    **params
                )
                callbacks=[lgb.early_stopping(50), lgb.log_evaluation(0)]
                if LightGBMPruningCallback is not None:
                    callbacks.append(LightGBMPruningCallback(trial, 'auc'))
                model.fit(
                    X_tr, y_tr,
                    eval_set=[(X_va, y_va)],
                    eval_metric='auc',
                    callbacks=callbacks
                )
                pred = model.predict(X_va)
                accs.append(accuracy_score(y_va, pred))
            return float(np.mean(accs))
        return objective
    # ------------------------------------------------------------------
    def feature_columns(self):
        return (
            ['raw_o','raw_h','raw_l','raw_c','raw_v']+
            ['iso_0','iso_1','iso_2','iso_3']+
            [f'tf_{t}' for t in TIMEFRAMES_ORDERED]+[
            'hl_range','price_change','upper_shadow','lower_shadow','volume_m']
        )

    # ------------------- data collection -----------------------------
    def collect_training(self):
        X,y=[],[]
        gen=timestamp_generator(raw_data,test_start,60)
        for s,e in gen:
            for tf in TIMEFRAMES_ORDERED:
                df=raw_data[tf]
                chunk=df[(df['timestamp']>=s)&(df['timestamp']<e)]
                for _,row in chunk.iterrows():
                    fv,lbl=self.extract(row,tf)
                    if fv is not None:
                        X.append(fv); y.append(lbl)
        X=np.array(X); y=np.array(y)
        split=int(len(X)*0.8)
        self.scaler.fit(X[:split])
        X=self.scaler.transform(X)
        return X,y,split

    def extract(self,row,tf):
        raw = parse_vector_column(row.get('raw_ohlcv_vec'))
        iso = parse_vector_column(row.get('iso_ohlc'))
        fut = row.get('future')
        if raw is None or iso is None or pd.isna(fut):
            return None,None
        if len(raw)!=5 or len(iso)!=4:
            return None,None
        return build_feature_vector(raw,iso,tf,TIMEFRAMES_ORDERED), int(fut)

optimizer=Optimizer()

# Pre-seed optimizer with parameter configurations ONLY (accuracy will be recomputed on first run)
optimizer.best = {
    ('GradientBoosting', '1d'): {
        'params': {
            'n_estimators': 150,
            'max_depth': 5,
            'learning_rate': 0.1625,
            'subsample': 0.8043,
            'min_samples_split': 2,
            'min_samples_leaf': 1,
        },
        # 'acc' intentionally omitted to force baseline recomputation
    },
    ('GradientBoosting', '4h'): {
        'params': {
            'n_estimators': 75,
            'max_depth': 9,
            'learning_rate': 0.12,
            'subsample': 0.85,
            'min_samples_split': 12,
            'min_samples_leaf': 1,
        },
    },
    ('LightGBM_Financial', '4h'): {
        'params': {
            'num_leaves': 60,
            'max_depth': 6,
            'learning_rate': 0.1,
            'n_estimators': 300,
            'reg_alpha': 0.1,
            'reg_lambda': 0.1,
            'min_child_samples': 20,
            'subsample': 0.8,
            'colsample_bytree': 0.85,
            'subsample_freq': 2,
            'feature_fraction_bynode': 0.9,
            'extra_trees': True,
        },
    },
}
print("✅ Optimizer ready (seeded with params; accuracies will be computed on first run)")

🤖 Building optimizer …
✅ Optimizer ready (seeded with params; accuracies will be computed on first run)


In [5]:
# ========================================
# STEP 5: ADAPTIVE HYPERPARAMETER SEARCH
# ========================================
print("🚀 Starting adaptive optimization loop (Ctrl+C to stop) …")

# Focused hyperparameter ranges per model/timeframe --------
# Keep 1D GB conservative (fewer knobs) and 4H GB richer (more knobs)
GB_1D_GRID={
    'n_estimators':[100,120,150,180,200],
    'max_depth':[3,4,5,6],
    'learning_rate':[0.08,0.10,0.12,0.14,0.16,0.18],
    'subsample':[0.75,0.8,0.85,0.9],
    'min_samples_split':[2,5,8,10,12],
    'min_samples_leaf':[1,2,3]
}
GB_4H_GRID={
    'n_estimators':[40,50,60,75,100,150],
    'max_depth':[6,7,8,9,10],
    'learning_rate':[0.12,0.15,0.18,0.2,0.22],
    'subsample':[0.7,0.8,0.85,0.9],
    'min_samples_split':[2,5,8,10,12],
    'min_samples_leaf':[1,2,3]
}
LGB_FIN_GRID={
    'num_leaves':[31,40,50,60,70],
    'max_depth':[6,8,10],
    'learning_rate':[0.03,0.04,0.05,0.07,0.1],
    'n_estimators':[150,200,250,300,400],
    'reg_alpha':[0.0,0.001,0.01,0.1],
    'reg_lambda':[0.0,0.001,0.01,0.1],
    'min_child_samples':[10,15,20,30],
    'subsample':[0.8,0.85,0.9],
    'colsample_bytree':[0.8,0.85,0.9],
    'subsample_freq':[1,2],
    'feature_fraction_bynode':[0.7,0.8,0.9],
    'extra_trees':[False, True]
}

def _jitter_value(base_val, choices, jitter_frac):
    # For float-like grids, apply multiplicative jitter then clamp to range
    lo, hi = min(choices), max(choices)
    val = base_val * (1 + random.uniform(-jitter_frac, jitter_frac))
    return float(min(max(val, lo), hi))

def propose_params(
    grid,
    base=None,
    max_param_changes=1,
    exploration_prob=0.2,
    jitter_frac=0.10,
    param_weights=None,
    param_jitters=None,
):
    """Propose next params by changing at most N keys from base.

    - If base is provided, start from base and mutate up to max_param_changes keys.
    - With exploration_prob, draw a fully random sample instead.
    - Floats receive small jitter; discrete pick neighbor/random from grid.
    """
    # Exploration: fully random draw
    if base is None or random.random() < exploration_prob:
        return {k: random.choice(v) for k, v in grid.items()}

    proposal = dict(base)
    keys = list(grid.keys())
    num_changes = max(1, min(max_param_changes, len(keys)))
    # Choose which params to change using weights (direction-aware)
    if param_weights:
        w = np.array([max(0.001, float(param_weights.get(k, 1.0))) for k in keys], dtype=float)
        if np.all(w == 0):
            w = np.ones_like(w)
        w = w / w.sum()
        keys_to_change = list(np.random.choice(keys, size=num_changes, replace=False, p=w))
    else:
        keys_to_change = random.sample(keys, num_changes)

    for k in keys_to_change:
        choices = grid[k]
        base_val = base.get(k, random.choice(choices))
        # Determine if this looks like a float grid
        if isinstance(choices[0], float) or isinstance(base_val, float):
            this_jitter = jitter_frac
            if param_jitters and k in param_jitters:
                try:
                    this_jitter = float(param_jitters[k])
                except Exception:
                    this_jitter = jitter_frac
            proposal[k] = _jitter_value(float(base_val), choices, this_jitter)
        else:
            # Discrete/int: pick a nearby or different choice
            if base_val in choices:
                idx = choices.index(base_val)
                candidates = [i for i in [idx-1, idx+1] if 0 <= i < len(choices)]
                if candidates:
                    proposal[k] = choices[random.choice(candidates)]
                else:
                    proposal[k] = random.choice(choices)
            else:
                proposal[k] = random.choice(choices)
    return proposal

# Prepare per-timeframe datasets and eval caches
optimizer.prepare_datasets()

# Save base models after initial dataset prep ---------------------------------
try:
    os.makedirs(os.path.join(MODEL_DIR, 'base models'), exist_ok=True)
    base_dir = os.path.join(MODEL_DIR, 'base models')
    # Train and save GB 1d
    if '1d' in optimizer.train_data:
        td1 = optimizer.train_data['1d']
        gb1d_params = optimizer.best.get(('GradientBoosting','1d'),{}).get('params', {})
        gb1d = GradientBoostingClassifier(random_state=42, **gb1d_params)
        gb1d.fit(td1['X_tr'], td1['y_tr'], sample_weight=td1['sample_weight_tr'])
        joblib.dump(gb1d, os.path.join(base_dir, 'gb_1d_base.joblib'))
    # Train and save GB 4h
    if '4h' in optimizer.train_data:
        td4 = optimizer.train_data['4h']
        gb4_params = optimizer.best.get(('GradientBoosting','4h'),{}).get('params', {})
        gb4 = GradientBoostingClassifier(random_state=42, **gb4_params)
        gb4.fit(td4['X_tr'], td4['y_tr'], sample_weight=td4['sample_weight_tr'])
        joblib.dump(gb4, os.path.join(base_dir, 'gb_4h_base.joblib'))
        # Train and save LGBM Financial 4h
        lgb_params = optimizer.best.get(('LightGBM_Financial','4h'),{}).get('params', {})
        lgb_fin_base = LGBMClassifier(
            objective='binary', boosting_type='gbdt',
            class_weight='balanced', random_state=42, verbose=-1,
            device_type='gpu', gpu_device_id=0,
            **lgb_params
        )
        # Train base LightGBM fully for the configured n_estimators (no early stopping)
        lgb_fin_base.fit(
            td4['X_tr'], td4['y_tr'],
            sample_weight=td4['sample_weight_tr']
        )
        joblib.dump(lgb_fin_base, os.path.join(base_dir, 'lgb_fin_4h_base.joblib'))
    print("✅ Base models saved in 'base models' folder")
except Exception as e:
    print(f"⚠️ Could not save base models: {e}")

iteration=0
tested_baselines=False
try:
    while True:
        iteration+=1
        print(f"\n🔄 ITERATION {iteration}")
        # First pass: verify and log exact seeded bests so we reach them immediately
        if not tested_baselines:
            print("🧪 Baseline verification pass – testing seeded best configurations exactly…")
            # ---------- GradientBoosting 1D (exact, with threshold calibration) ---------
            gb_base = optimizer.best.get(('GradientBoosting','1d'),{}).get('params')
            if gb_base and '1d' in optimizer.train_data:
                td = optimizer.train_data['1d']
                gb = GradientBoostingClassifier(random_state=42, **gb_base)
                gb.fit(td['X_tr'], td['y_tr'], sample_weight=td['sample_weight_tr'])
                if '1d' in optimizer.eval_cache:
                    X_eval = optimizer.eval_cache['1d']['X']
                    y_eval = optimizer.eval_cache['1d']['y']
                    # Calibrate threshold on validation
                    if len(td['X_val']) > 0:
                        val_proba = gb.predict_proba(td['X_val'])[:, 1]
                        best_thr = 0.5
                        best_val_acc = accuracy_score(td['y_val'], (val_proba >= 0.5).astype(int))
                        for thr in np.linspace(0.30, 0.70, 41):
                            acc_thr = accuracy_score(td['y_val'], (val_proba >= thr).astype(int))
                            if acc_thr > best_val_acc:
                                best_val_acc = acc_thr
                                best_thr = float(thr)
                    else:
                        best_thr = 0.5
                        best_val_acc = float('nan')

                    # REFIT on FULL in-sample (train + val) like the single script
                    X_full = np.vstack([td['X_tr'], td['X_val']])
                    y_full = np.hstack([td['y_tr'], td['y_val']])
                    sw_full = compute_sample_weight('balanced', y_full)

                    gb_full = GradientBoostingClassifier(random_state=42, **gb_base)
                    gb_full.fit(X_full, y_full, sample_weight=sw_full)

                    # Test @ best_thr and @0.50 for reference using the refitted model
                    test_proba = gb_full.predict_proba(X_eval)[:, 1]
                    test_acc_best = accuracy_score(y_eval, (test_proba >= best_thr).astype(int))
                    test_acc_050  = accuracy_score(y_eval, (test_proba >= 0.50).astype(int))
                    print(f"   → GB 1d baseline (REFIT full): val_acc@best_thr={best_val_acc:.4f}, thr={best_thr:.2f}, test_acc@best_thr={test_acc_best:.4f}, test_acc@0.50={test_acc_050:.4f}")
                    optimizer.log_best('GradientBoosting','1d',gb_base,test_acc_best)

            # ---------- GradientBoosting 4H (exact) -------------------------
            gb4_base = optimizer.best.get(('GradientBoosting','4h'),{}).get('params')
            if gb4_base and '4h' in optimizer.train_data:
                td4 = optimizer.train_data['4h']
                gb4 = GradientBoostingClassifier(random_state=42, **gb4_base)
                gb4.fit(td4['X_tr'], td4['y_tr'], sample_weight=td4['sample_weight_tr'])
                if '4h' in optimizer.eval_cache:
                    X_eval4 = optimizer.eval_cache['4h']['X']
                    y_eval4 = optimizer.eval_cache['4h']['y']
                    pred = gb4.predict(X_eval4)
                    acc=accuracy_score(y_eval4,pred)
                    optimizer.log_best('GradientBoosting','4h',gb4_base,acc)

            # ---------- LightGBM_Financial 4H (exact, with threshold calibration) --------
            lgb_base = optimizer.best.get(('LightGBM_Financial','4h'),{}).get('params')
            if lgb_base and '4h' in optimizer.train_data:
                td4 = optimizer.train_data['4h']
                lgb_fin = LGBMClassifier(
                    objective='binary', boosting_type='gbdt',
                    class_weight='balanced', random_state=42, verbose=-1,
                    device_type='gpu', gpu_device_id=0,
                    **lgb_base
                )
                lgb_fin.fit(td4['X_tr'], td4['y_tr'])
                if '4h' in optimizer.eval_cache:
                    X_eval4 = optimizer.eval_cache['4h']['X']
                    y_eval4 = optimizer.eval_cache['4h']['y']
                    # Calibrate threshold on validation
                    if len(td4['X_val']) > 0:
                        val_proba = lgb_fin.predict_proba(td4['X_val'])[:, 1]
                        best_thr = 0.5
                        best_val_acc = accuracy_score(td4['y_val'], (val_proba >= 0.5).astype(int))
                        for thr in np.linspace(0.30, 0.70, 41):
                            acc_thr = accuracy_score(td4['y_val'], (val_proba >= thr).astype(int))
                            if acc_thr > best_val_acc:
                                best_val_acc = acc_thr
                                best_thr = float(thr)
                    else:
                        best_thr = 0.5
                        best_val_acc = float('nan')
                    # Test @ best_thr and @0.50 for reference
                    test_proba = lgb_fin.predict_proba(X_eval4)[:, 1]
                    test_acc_best = accuracy_score(y_eval4, (test_proba >= best_thr).astype(int))
                    test_acc_050 = accuracy_score(y_eval4, (test_proba >= 0.50).astype(int))
                    print(f"   → LGB 4h baseline: val_acc@best_thr={best_val_acc:.4f}, thr={best_thr:.2f}, test_acc@best_thr={test_acc_best:.4f}, test_acc@0.50={test_acc_050:.4f}")
                    optimizer.log_best('LightGBM_Financial','4h',lgb_base,test_acc_best)

            tested_baselines=True
            # Proceed to next iteration after baseline verification
            continue
        # ---------- GradientBoosting 1D (with threshold calibration) -------
        trials_1d = 2
        for _ in range(trials_1d):
            gb_base = optimizer.best.get(('GradientBoosting','1d'),{}).get('params')
            # Every 20 iterations force one fully random exploration sample
            force_random = (iteration % 20 == 0)
            gb1d_weights = optimizer.get_param_weights('GradientBoosting','1d', GB_1D_GRID, default_jitter=0.08)
            gb1d_jitters = optimizer.get_param_jitters('GradientBoosting','1d', GB_1D_GRID, default_jitter=0.08)
            gb1d_explore, gb1d_changes = optimizer.current_search_hyperparams('GradientBoosting','1d', base_exploration=0.15, base_max_changes=1, grid=GB_1D_GRID, default_jitter=0.08)
            gb_params = propose_params(
                GB_1D_GRID, gb_base,
                max_param_changes=gb1d_changes,
                exploration_prob=(1.0 if force_random else gb1d_explore),
                jitter_frac=0.08,
                param_weights=gb1d_weights,
                param_jitters=gb1d_jitters,
            )
            # Debug: what changed this trial
            def _changed_keys(base, curr):
                if not base:
                    return list(curr.keys())
                keys=[]
                for k,v in curr.items():
                    pv = base.get(k, None)
                    if isinstance(v, float) or isinstance(pv, float):
                        if not optimizer._nearly_equal(pv, v, tol=1e-8):
                            keys.append(k)
                    else:
                        if pv != v:
                            keys.append(k)
                return keys
            gb1d_changed = _changed_keys(gb_base, gb_params)
            if '1d' not in optimizer.train_data:
                break
            td1 = optimizer.train_data['1d']
            gb = GradientBoostingClassifier(random_state=42, **gb_params)
            gb.fit(td1['X_tr'], td1['y_tr'], sample_weight=td1['sample_weight_tr'])
            if '1d' in optimizer.eval_cache:
                X_eval = optimizer.eval_cache['1d']['X']
                y_eval = optimizer.eval_cache['1d']['y']
                # Calibrate threshold on validation
                if len(td1['X_val']) > 0:
                    val_proba = gb.predict_proba(td1['X_val'])[:, 1]
                    best_thr = 0.5
                    best_val_acc = accuracy_score(td1['y_val'], (val_proba >= 0.5).astype(int))
                    for thr in np.linspace(0.30, 0.70, 41):
                        acc_thr = accuracy_score(td1['y_val'], (val_proba >= thr).astype(int))
                        if acc_thr > best_val_acc:
                            best_val_acc = acc_thr
                            best_thr = float(thr)
                else:
                    best_thr = 0.5
                    best_val_acc = float('nan')
                # REFIT on FULL in-sample (train + val) like the single script
                X_full = np.vstack([td1['X_tr'], td1['X_val']])
                y_full = np.hstack([td1['y_tr'], td1['y_val']])
                sw_full = compute_sample_weight('balanced', y_full)

                gb_full = GradientBoostingClassifier(random_state=42, **gb_params)
                gb_full.fit(X_full, y_full, sample_weight=sw_full)

                # Test @ best_thr and @0.50 for reference using the refitted model
                test_proba = gb_full.predict_proba(X_eval)[:, 1]
                test_acc_best = accuracy_score(y_eval, (test_proba >= best_thr).astype(int))
                test_acc_050  = accuracy_score(y_eval, (test_proba >= 0.50).astype(int))
                print(f"   → GB 1d iter {iteration} (REFIT full): val_acc@best_thr={best_val_acc:.4f}, thr={best_thr:.2f}, test_acc@best_thr={test_acc_best:.4f}, test_acc@0.50={test_acc_050:.4f}, explore={gb1d_explore:.2f}, max_changes={gb1d_changes}, changed={gb1d_changed}")
                optimizer.log_best('GradientBoosting','1d',gb_params,test_acc_best)
                optimizer.record_result('GradientBoosting','1d', GB_1D_GRID, gb_params, test_acc_best, base_exploration=0.15, base_max_changes=1, default_jitter=0.08)

        # ---------- GradientBoosting 4H -----------------------------------
        trials_4h = 3
        for _ in range(trials_4h):
            gb4_base = optimizer.best.get(('GradientBoosting','4h'),{}).get('params')
            force_random = (iteration % 20 == 0)
            gb4_weights = optimizer.get_param_weights('GradientBoosting','4h', GB_4H_GRID, default_jitter=0.10)
            gb4_jitters = optimizer.get_param_jitters('GradientBoosting','4h', GB_4H_GRID, default_jitter=0.10)
            gb4_explore, gb4_changes = optimizer.current_search_hyperparams('GradientBoosting','4h', base_exploration=0.20, base_max_changes=2, grid=GB_4H_GRID, default_jitter=0.10)
            gb4_params = propose_params(
                GB_4H_GRID, gb4_base,
                max_param_changes=gb4_changes,
                exploration_prob=(1.0 if force_random else gb4_explore),
                jitter_frac=0.10,
                param_weights=gb4_weights,
                param_jitters=gb4_jitters,
            )
            # Debug: what changed this trial
            gb4_changed = _changed_keys(gb4_base, gb4_params)
            if '4h' not in optimizer.train_data:
                break
            td4 = optimizer.train_data['4h']
            gb4 = GradientBoostingClassifier(random_state=42, **gb4_params)
            gb4.fit(td4['X_tr'], td4['y_tr'], sample_weight=td4['sample_weight_tr'])
            if '4h' in optimizer.eval_cache:
                X_eval4 = optimizer.eval_cache['4h']['X']
                y_eval4 = optimizer.eval_cache['4h']['y']
                pred = gb4.predict(X_eval4)
                acc_4h_gb=accuracy_score(y_eval4,pred)
                print(f"   → GB 4h iter {iteration}: acc={acc_4h_gb:.4f}, explore={gb4_explore:.2f}, max_changes={gb4_changes}, changed={gb4_changed}")
                optimizer.log_best('GradientBoosting','4h',gb4_params,acc_4h_gb)
                optimizer.record_result('GradientBoosting','4h', GB_4H_GRID, gb4_params, acc_4h_gb, base_exploration=0.20, base_max_changes=2, default_jitter=0.10)

        # ---------- LightGBM_Financial 4H (with threshold calibration) ------
        trials_lgb = 3
        for _ in range(trials_lgb):
            lgb_base = optimizer.best.get(('LightGBM_Financial','4h'),{}).get('params')
            force_random = (iteration % 20 == 0)
            lgb_weights = optimizer.get_param_weights('LightGBM_Financial','4h', LGB_FIN_GRID, default_jitter=0.12)
            lgb_jitters = optimizer.get_param_jitters('LightGBM_Financial','4h', LGB_FIN_GRID, default_jitter=0.12)
            lgb_explore, lgb_changes = optimizer.current_search_hyperparams('LightGBM_Financial','4h', base_exploration=0.25, base_max_changes=2, grid=LGB_FIN_GRID, default_jitter=0.12)
            lgb_params = propose_params(
                LGB_FIN_GRID, lgb_base,
                max_param_changes=lgb_changes,
                exploration_prob=(1.0 if force_random else lgb_explore),
                jitter_frac=0.12,
                param_weights=lgb_weights,
                param_jitters=lgb_jitters,
            )
            lgb_fin = LGBMClassifier(
                objective='binary', boosting_type='gbdt',
                class_weight='balanced', random_state=42, verbose=-1,
                device_type='gpu', gpu_device_id=0,
                **lgb_params
            )
            td4 = optimizer.train_data['4h'] if '4h' in optimizer.train_data else None
            if td4 is None:
                break
            lgb_fin.fit(
                td4['X_tr'], td4['y_tr'],
                eval_set=[(td4['X_val'], td4['y_val'])],
                eval_metric='auc',
                callbacks=[lgb.early_stopping(100), lgb.log_evaluation(0)]
            )
            if '4h' in optimizer.eval_cache:
                X_eval4 = optimizer.eval_cache['4h']['X']
                y_eval4 = optimizer.eval_cache['4h']['y']
                # Calibrate threshold on validation
                if len(td4['X_val']) > 0:
                    val_proba = lgb_fin.predict_proba(td4['X_val'])[:, 1]
                    best_thr = 0.5
                    best_val_acc = accuracy_score(td4['y_val'], (val_proba >= 0.5).astype(int))
                    for thr in np.linspace(0.30, 0.70, 41):
                        acc_thr = accuracy_score(td4['y_val'], (val_proba >= thr).astype(int))
                        if acc_thr > best_val_acc:
                            best_val_acc = acc_thr
                            best_thr = float(thr)
                else:
                    best_thr = 0.5
                    best_val_acc = float('nan')
                # Test @ best_thr and @0.50 for reference
                test_proba = lgb_fin.predict_proba(X_eval4)[:, 1]
                test_acc_best = accuracy_score(y_eval4, (test_proba >= best_thr).astype(int))
                test_acc_050 = accuracy_score(y_eval4, (test_proba >= 0.50).astype(int))
                # Debug: what changed this trial (reuse helper)
                lgb_changed = _changed_keys(lgb_base, lgb_params)
                print(f"   → LGB 4h iter {iteration}: val_acc@best_thr={best_val_acc:.4f}, thr={best_thr:.2f}, test_acc@best_thr={test_acc_best:.4f}, test_acc@0.50={test_acc_050:.4f}, explore={lgb_explore:.2f}, max_changes={lgb_changes}, changed={lgb_changed}")
                optimizer.log_best('LightGBM_Financial','4h',lgb_params,test_acc_best)
                optimizer.record_result('LightGBM_Financial','4h', LGB_FIN_GRID, lgb_params, test_acc_best, base_exploration=0.25, base_max_changes=2, default_jitter=0.12)

        # Periodic Optuna kicks to refine local maxima using TS-CV
        if iteration % 30 == 0:
            try:
                # GB 1d
                if '1d' in optimizer.train_data:
                    study = optimizer.get_study('GB_1d')
                    base = optimizer.best.get(('GradientBoosting','1d'),{}).get('params')
                    study.optimize(optimizer.optuna_objective_gb('1d', GB_1D_GRID, base), n_trials=25, n_jobs=1)
                    best_params = study.best_params
                    # Evaluate on test cache and log
                    td1 = optimizer.train_data['1d']
                    gb = GradientBoostingClassifier(random_state=42, **best_params)
                    gb.fit(td1['X_tr'], td1['y_tr'], sample_weight=td1['sample_weight_tr'])

                    # REFIT on FULL in-sample (train + val) like the single script
                    X_full = np.vstack([td1['X_tr'], td1['X_val']])
                    y_full = np.hstack([td1['y_tr'], td1['y_val']])
                    sw_full = compute_sample_weight('balanced', y_full)

                    gb_full = GradientBoostingClassifier(random_state=42, **best_params)
                    gb_full.fit(X_full, y_full, sample_weight=sw_full)

                    X_eval = optimizer.eval_cache['1d']['X']
                    y_eval = optimizer.eval_cache['1d']['y']
                    acc = accuracy_score(y_eval, gb_full.predict(X_eval))
                    optimizer.log_best('GradientBoosting','1d', best_params, acc)
                    # Feed Optuna importance back to direction-aware weights
                    if get_param_importances is not None:
                        try:
                            importances = get_param_importances(study)
                            for p, imp in importances.items():
                                if p in optimizer.param_stats.get(('GradientBoosting','1d'), {}):
                                    optimizer.param_stats[('GradientBoosting','1d')][p]['weight'] = max(0.1, min(5.0, 1.0 + 3.0*imp))
                        except Exception:
                            pass
                # GB 4h
                if '4h' in optimizer.train_data:
                    study = optimizer.get_study('GB_4h')
                    base = optimizer.best.get(('GradientBoosting','4h'),{}).get('params')
                    study.optimize(optimizer.optuna_objective_gb('4h', GB_4H_GRID, base), n_trials=30, n_jobs=1)
                    best_params = study.best_params
                    td4 = optimizer.train_data['4h']
                    gb4 = GradientBoostingClassifier(random_state=42, **best_params)
                    gb4.fit(td4['X_tr'], td4['y_tr'], sample_weight=td4['sample_weight_tr'])
                    X_eval4 = optimizer.eval_cache['4h']['X']
                    y_eval4 = optimizer.eval_cache['4h']['y']
                    acc = accuracy_score(y_eval4, gb4.predict(X_eval4))
                    optimizer.log_best('GradientBoosting','4h', best_params, acc)
                    if get_param_importances is not None:
                        try:
                            importances = get_param_importances(study)
                            for p, imp in importances.items():
                                if p in optimizer.param_stats.get(('GradientBoosting','4h'), {}):
                                    optimizer.param_stats[('GradientBoosting','4h')][p]['weight'] = max(0.1, min(5.0, 1.0 + 3.0*imp))
                        except Exception:
                            pass
                # LGB 4h
                if '4h' in optimizer.train_data:
                    study = optimizer.get_study('LGB_4h')
                    base = optimizer.best.get(('LightGBM_Financial','4h'),{}).get('params')
                    study.optimize(optimizer.optuna_objective_lgb('4h', LGB_FIN_GRID, base if base else {}), n_trials=35, n_jobs=1)
                    best_params = study.best_params
                    td4 = optimizer.train_data['4h']
                    lgb_fin = lgb.LGBMClassifier(
                        objective='binary', boosting_type='gbdt',
                        class_weight='balanced', random_state=42, verbose=-1,
                        device_type='gpu', gpu_device_id=0,
                        **best_params
                    )
                    lgb_fin.fit(
                        td4['X_tr'], td4['y_tr'],
                        eval_set=[(td4['X_val'], td4['y_val'])],
                        eval_metric='auc',
                        callbacks=[lgb.early_stopping(50), lgb.log_evaluation(0)]
                    )
                    X_eval4 = optimizer.eval_cache['4h']['X']
                    y_eval4 = optimizer.eval_cache['4h']['y']
                    acc = accuracy_score(y_eval4, lgb_fin.predict(X_eval4))
                    optimizer.log_best('LightGBM_Financial','4h', best_params, acc)
                    if get_param_importances is not None:
                        try:
                            importances = get_param_importances(study)
                            for p, imp in importances.items():
                                if p in optimizer.param_stats.get(('LightGBM_Financial','4h'), {}):
                                    optimizer.param_stats[('LightGBM_Financial','4h')][p]['weight'] = max(0.1, min(5.0, 1.0 + 3.0*imp))
                        except Exception:
                            pass
            except Exception as e:
                print(f"⚠️ Optuna phase warning: {e}")

        # Print status every iteration for the first 5, then every 5 thereafter
        if iteration <= 5 or iteration % 5 == 0:
            print("📈 Current bests:")
            for key,val in optimizer.best.items():
                print(f" {key}: {val['acc']:.4f}")
            # Brief direction-aware diagnostics
            for tf in TIMEFRAMES_ORDERED:
                for model_name, grid, dj, be, mc in [
                    ('GradientBoosting', GB_1D_GRID if tf=='1d' else GB_4H_GRID, 0.08 if tf=='1d' else 0.10, 0.15 if tf=='1d' else 0.20, 1 if tf=='1d' else 2),
                ]:
                    w = optimizer.get_param_weights(model_name, tf, grid, dj)
                    top = sorted(w.items(), key=lambda x: x[1], reverse=True)[:3]
                    print(f"   {model_name} {tf} focus → {top}")
        time.sleep(1)

except KeyboardInterrupt:
    print("⏹️  Optimization halted by user")
    print("🏁 Final best configurations:")
    for key,val in optimizer.best.items():
        print(f" {key}: {val['acc']:.4f}  {val['params']}")
    print(f"Results written to {TXT_RESULTS_PATH}")

🚀 Starting adaptive optimization loop (Ctrl+C to stop) …
✅ Base models saved in 'base models' folder

🔄 ITERATION 1
🧪 Baseline verification pass – testing seeded best configurations exactly…
   → GB 1d baseline (REFIT full): val_acc@best_thr=0.5328, thr=0.31, test_acc@best_thr=0.6857, test_acc@0.50=0.7429
🏆 NEW BEST GradientBoosting 1d: 0.6857
🏆 NEW BEST GradientBoosting 4h: 0.6957
   → LGB 4h baseline: val_acc@best_thr=0.5502, thr=0.30, test_acc@best_thr=0.6232, test_acc@0.50=0.5362
🏆 NEW BEST LightGBM_Financial 4h: 0.6232

🔄 ITERATION 2
   → GB 1d iter 2 (REFIT full): val_acc@best_thr=0.5328, thr=0.30, test_acc@best_thr=0.5714, test_acc@0.50=0.5714, explore=0.15, max_changes=1, changed=['learning_rate']
   → GB 1d iter 2 (REFIT full): val_acc@best_thr=0.5288, thr=0.30, test_acc@best_thr=0.5714, test_acc@0.50=0.6571, explore=0.15, max_changes=1, changed=['n_estimators', 'max_depth', 'learning_rate', 'subsample', 'min_samples_split', 'min_samples_leaf']
   → GB 4h iter 2: acc=0.5072, e

[I 2025-08-18 23:24:11,113] A new study created in memory with name: no-name-800c4cee-dc9e-4253-aa92-60ef53478d13


Early stopping, best iteration is:
[2]	valid_0's auc: 0.547186	valid_0's binary_logloss: 0.692798
   → LGB 4h iter 30: val_acc@best_thr=0.5669, thr=0.30, test_acc@best_thr=0.5797, test_acc@0.50=0.5217, explore=0.40, max_changes=3, changed=['num_leaves', 'learning_rate', 'min_child_samples', 'subsample', 'feature_fraction_bynode', 'extra_trees']


[I 2025-08-18 23:24:14,989] Trial 0 finished with value: 0.48306772908366535 and parameters: {'n_estimators': 150, 'max_depth': 3, 'learning_rate': 0.14112577868437953, 'subsample': 0.7656912871376256, 'min_samples_split': 2, 'min_samples_leaf': 2}. Best is trial 0 with value: 0.48306772908366535.
[I 2025-08-18 23:24:21,768] Trial 1 finished with value: 0.4910358565737052 and parameters: {'n_estimators': 150, 'max_depth': 5, 'learning_rate': 0.17293834278633796, 'subsample': 0.8682312478605234, 'min_samples_split': 12, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.4910358565737052.
[I 2025-08-18 23:24:26,205] Trial 2 finished with value: 0.48854581673306774 and parameters: {'n_estimators': 120, 'max_depth': 4, 'learning_rate': 0.1315349384693784, 'subsample': 0.8602018611505047, 'min_samples_split': 8, 'min_samples_leaf': 1}. Best is trial 1 with value: 0.4910358565737052.
[I 2025-08-18 23:24:33,368] Trial 3 finished with value: 0.4750996015936255 and parameters: {'n_estimators

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[4]	valid_0's auc: 0.509133	valid_0's binary_logloss: 0.692614
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.523695	valid_0's binary_logloss: 0.693089
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.488359	valid_0's binary_logloss: 0.693642
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:22,330] Trial 0 finished with value: 0.49288107202680065 and parameters: {'num_leaves': 70, 'max_depth': 10, 'learning_rate': 0.08787987383472719, 'n_estimators': 200, 'reg_alpha': 0.05500927252675789, 'reg_lambda': 0.0865481793659917, 'min_child_samples': 20, 'subsample': 0.859810141092431, 'colsample_bytree': 0.8043904312642145, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7662476277918044, 'extra_trees': True}. Best is trial 0 with value: 0.49288107202680065.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.472946	valid_0's binary_logloss: 0.693869
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.532568	valid_0's binary_logloss: 0.69244
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.475911	valid_0's binary_logloss: 0.693945
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's auc: 0.548096	valid_0's binary_logloss: 0.691412
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:23,653] Trial 1 finished with value: 0.4882747068676717 and parameters: {'num_leaves': 31, 'max_depth': 8, 'learning_rate': 0.07642031559123366, 'n_estimators': 300, 'reg_alpha': 0.02416836699544821, 'reg_lambda': 0.049816022085536565, 'min_child_samples': 20, 'subsample': 0.84085323676676, 'colsample_bytree': 0.8830392277513134, 'subsample_freq': 2, 'feature_fraction_bynode': 0.7316328096926676, 'extra_trees': True}. Best is trial 0 with value: 0.49288107202680065.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.474062	valid_0's binary_logloss: 0.693918
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.531182	valid_0's binary_logloss: 0.692393
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.468625	valid_0's binary_logloss: 0.693346
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.509587	valid_0's binary_logloss: 0.693126


[I 2025-08-18 23:30:24,513] Trial 2 finished with value: 0.4979061976549414 and parameters: {'num_leaves': 40, 'max_depth': 10, 'learning_rate': 0.04022869417694978, 'n_estimators': 400, 'reg_alpha': 0.030958905994726494, 'reg_lambda': 0.038545197977416935, 'min_child_samples': 30, 'subsample': 0.8749983693902456, 'colsample_bytree': 0.8420172531383056, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7462802192250104, 'extra_trees': True}. Best is trial 2 with value: 0.4979061976549414.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.481334	valid_0's binary_logloss: 0.693541
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.496831	valid_0's binary_logloss: 0.694557
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's auc: 0.527248	valid_0's binary_logloss: 0.691006
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.5212	valid_0's binary_logloss: 0.693365
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:26,085] Trial 3 finished with value: 0.4966499162479062 and parameters: {'num_leaves': 60, 'max_depth': 10, 'learning_rate': 0.0898995039537879, 'n_estimators': 300, 'reg_alpha': 0.061499694104693615, 'reg_lambda': 0.00637990870661298, 'min_child_samples': 10, 'subsample': 0.8998778488317151, 'colsample_bytree': 0.8501778856552263, 'subsample_freq': 2, 'feature_fraction_bynode': 0.8183065464393258, 'extra_trees': False}. Best is trial 2 with value: 0.4979061976549414.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.510729	valid_0's binary_logloss: 0.693184
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[37]	valid_0's auc: 0.528008	valid_0's binary_logloss: 0.690329
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's auc: 0.518143	valid_0's binary_logloss: 0.693013
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[12]	valid_0's auc: 0.533581	valid_0's binary_logloss: 0.691746
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:27,036] Trial 4 finished with value: 0.5033500837520939 and parameters: {'num_leaves': 50, 'max_depth': 8, 'learning_rate': 0.06915301459734725, 'n_estimators': 400, 'reg_alpha': 0.05085339991770395, 'reg_lambda': 0.08960377248404115, 'min_child_samples': 20, 'subsample': 0.8988703785247361, 'colsample_bytree': 0.8570496941582748, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7974088803128709, 'extra_trees': True}. Best is trial 4 with value: 0.5033500837520939.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.479755	valid_0's binary_logloss: 0.693999
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[14]	valid_0's auc: 0.538383	valid_0's binary_logloss: 0.689575
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.482848	valid_0's binary_logloss: 0.693717
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[11]	valid_0's auc: 0.518157	valid_0's binary_logloss: 0.69267
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:27,924] Trial 5 finished with value: 0.5309882747068677 and parameters: {'num_leaves': 70, 'max_depth': 6, 'learning_rate': 0.031219240342276012, 'n_estimators': 400, 'reg_alpha': 0.06610606611474791, 'reg_lambda': 0.034069414791970766, 'min_child_samples': 20, 'subsample': 0.8427646303166338, 'colsample_bytree': 0.846238223626753, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7114316534607749, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.526471	valid_0's binary_logloss: 0.692877
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[15]	valid_0's auc: 0.546026	valid_0's binary_logloss: 0.689299
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.483271	valid_0's binary_logloss: 0.69378
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[10]	valid_0's auc: 0.526509	valid_0's binary_logloss: 0.6924
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:29,003] Trial 6 finished with value: 0.5062814070351758 and parameters: {'num_leaves': 60, 'max_depth': 10, 'learning_rate': 0.03956298802267477, 'n_estimators': 200, 'reg_alpha': 0.03494567114900831, 'reg_lambda': 0.016826555755230257, 'min_child_samples': 20, 'subsample': 0.8952367138300603, 'colsample_bytree': 0.8198836782656741, 'subsample_freq': 1, 'feature_fraction_bynode': 0.8929902055222158, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.503084	valid_0's binary_logloss: 0.693433
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.537054	valid_0's binary_logloss: 0.692499
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.470922	valid_0's binary_logloss: 0.693471
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[24]	valid_0's auc: 0.529916	valid_0's binary_logloss: 0.691786
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:30,352] Trial 7 finished with value: 0.48199329983249584 and parameters: {'num_leaves': 40, 'max_depth': 10, 'learning_rate': 0.046693378153172774, 'n_estimators': 300, 'reg_alpha': 0.0032389376229049363, 'reg_lambda': 0.040117624171044114, 'min_child_samples': 30, 'subsample': 0.8841921668643528, 'colsample_bytree': 0.830001963021936, 'subsample_freq': 2, 'feature_fraction_bynode': 0.709077331144497, 'extra_trees': True}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.48237	valid_0's binary_logloss: 0.693489
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's auc: 0.542995	valid_0's binary_logloss: 0.6912
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's auc: 0.476761	valid_0's binary_logloss: 0.693569
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[6]	valid_0's auc: 0.515851	valid_0's binary_logloss: 0.692643
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:31,228] Trial 8 finished with value: 0.5180067001675042 and parameters: {'num_leaves': 31, 'max_depth': 6, 'learning_rate': 0.04055509436745158, 'n_estimators': 400, 'reg_alpha': 0.08586158697721219, 'reg_lambda': 0.016988564917582986, 'min_child_samples': 20, 'subsample': 0.8113176312182179, 'colsample_bytree': 0.8316068777802513, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7275789722273335, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[2]	valid_0's auc: 0.527203	valid_0's binary_logloss: 0.692941
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's auc: 0.53369	valid_0's binary_logloss: 0.690983
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.482494	valid_0's binary_logloss: 0.693583
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[50]	valid_0's auc: 0.563638	valid_0's binary_logloss: 0.68799
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:32,158] Trial 9 finished with value: 0.5020938023450586 and parameters: {'num_leaves': 40, 'max_depth': 6, 'learning_rate': 0.04696247961492109, 'n_estimators': 150, 'reg_alpha': 0.09373510187876119, 'reg_lambda': 0.07438561765517267, 'min_child_samples': 20, 'subsample': 0.8558347302158373, 'colsample_bytree': 0.8298997384121773, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7286943580039691, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.519186	valid_0's binary_logloss: 0.692925
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.511008	valid_0's binary_logloss: 0.692168
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's auc: 0.512118	valid_0's binary_logloss: 0.692411
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[4]	valid_0's auc: 0.542085	valid_0's binary_logloss: 0.691553


[I 2025-08-18 23:30:33,022] Trial 10 finished with value: 0.509212730318258 and parameters: {'num_leaves': 70, 'max_depth': 6, 'learning_rate': 0.06328796675315475, 'n_estimators': 400, 'reg_alpha': 0.04424454343174263, 'reg_lambda': 0.013888784431023971, 'min_child_samples': 20, 'subsample': 0.8623488538807971, 'colsample_bytree': 0.8563769046683118, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7894607302434304, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.504721	valid_0's binary_logloss: 0.693401
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[7]	valid_0's auc: 0.502214	valid_0's binary_logloss: 0.693335
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.468597	valid_0's binary_logloss: 0.693549
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[10]	valid_0's auc: 0.510816	valid_0's binary_logloss: 0.692299


[I 2025-08-18 23:30:33,999] Trial 11 finished with value: 0.48785594639866 and parameters: {'num_leaves': 31, 'max_depth': 6, 'learning_rate': 0.031019195478800894, 'n_estimators': 400, 'reg_alpha': 0.055486963328635244, 'reg_lambda': 0.022029076062211, 'min_child_samples': 10, 'subsample': 0.8462667036027802, 'colsample_bytree': 0.8062013438993813, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7295959844280425, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.512434	valid_0's binary_logloss: 0.693087
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[4]	valid_0's auc: 0.54533	valid_0's binary_logloss: 0.691894
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's auc: 0.529326	valid_0's binary_logloss: 0.692471
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.487231	valid_0's binary_logloss: 0.693492


[I 2025-08-18 23:30:34,897] Trial 12 finished with value: 0.4853433835845896 and parameters: {'num_leaves': 70, 'max_depth': 6, 'learning_rate': 0.0519431260529759, 'n_estimators': 200, 'reg_alpha': 0.08803296344662753, 'reg_lambda': 0.01753376146508496, 'min_child_samples': 15, 'subsample': 0.8104048071367898, 'colsample_bytree': 0.8648557687204638, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7110232142806271, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.505201	valid_0's binary_logloss: 0.695152
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.501311	valid_0's binary_logloss: 0.69279
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.50509	valid_0's binary_logloss: 0.693012
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[5]	valid_0's auc: 0.506174	valid_0's binary_logloss: 0.693246
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:35,698] Trial 13 finished with value: 0.48785594639866 and parameters: {'num_leaves': 31, 'max_depth': 6, 'learning_rate': 0.04876494458861015, 'n_estimators': 400, 'reg_alpha': 0.07218153792037653, 'reg_lambda': 0.011143777177353882, 'min_child_samples': 30, 'subsample': 0.8203341269880846, 'colsample_bytree': 0.8345372430243541, 'subsample_freq': 1, 'feature_fraction_bynode': 0.8010575112564584, 'extra_trees': True}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.478811	valid_0's binary_logloss: 0.693428
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[14]	valid_0's auc: 0.516143	valid_0's binary_logloss: 0.692645
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.473022	valid_0's binary_logloss: 0.693793
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[22]	valid_0's auc: 0.541164	valid_0's binary_logloss: 0.690554
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:36,583] Trial 14 finished with value: 0.4958123953098828 and parameters: {'num_leaves': 31, 'max_depth': 6, 'learning_rate': 0.03603448271329002, 'n_estimators': 400, 'reg_alpha': 0.08909354267414324, 'reg_lambda': 0.044554694896450274, 'min_child_samples': 20, 'subsample': 0.8172092180953764, 'colsample_bytree': 0.8514862874986054, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7832694905484314, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.509653	valid_0's binary_logloss: 0.693087
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[25]	valid_0's auc: 0.540194	valid_0's binary_logloss: 0.691031
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.474914	valid_0's binary_logloss: 0.693607
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[13]	valid_0's auc: 0.530029	valid_0's binary_logloss: 0.69171
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:37,601] Trial 15 finished with value: 0.5067001675041876 and parameters: {'num_leaves': 70, 'max_depth': 8, 'learning_rate': 0.030638444063243314, 'n_estimators': 400, 'reg_alpha': 0.0429218839421883, 'reg_lambda': 0.05930554065686727, 'min_child_samples': 20, 'subsample': 0.8736711157387036, 'colsample_bytree': 0.8531122636265961, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7711102273215523, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[2]	valid_0's auc: 0.493603	valid_0's binary_logloss: 0.693597
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[14]	valid_0's auc: 0.54192	valid_0's binary_logloss: 0.689266
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.467645	valid_0's binary_logloss: 0.693942
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[12]	valid_0's auc: 0.517647	valid_0's binary_logloss: 0.692529
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:39,031] Trial 16 finished with value: 0.49288107202680065 and parameters: {'num_leaves': 31, 'max_depth': 10, 'learning_rate': 0.03322426218176225, 'n_estimators': 300, 'reg_alpha': 0.09932798044254834, 'reg_lambda': 0.004596203606467534, 'min_child_samples': 20, 'subsample': 0.8063579577936659, 'colsample_bytree': 0.846075900370727, 'subsample_freq': 2, 'feature_fraction_bynode': 0.7946388648048729, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.50139	valid_0's binary_logloss: 0.693704
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[11]	valid_0's auc: 0.520549	valid_0's binary_logloss: 0.691184
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.488372	valid_0's binary_logloss: 0.693752
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[37]	valid_0's auc: 0.546878	valid_0's binary_logloss: 0.689087
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:39,934] Trial 17 finished with value: 0.5041876046901173 and parameters: {'num_leaves': 70, 'max_depth': 6, 'learning_rate': 0.05384025457107687, 'n_estimators': 400, 'reg_alpha': 0.06590494614139292, 'reg_lambda': 0.01610647775652615, 'min_child_samples': 20, 'subsample': 0.8067823298006164, 'colsample_bytree': 0.8097203486379777, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7202188501675123, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[2]	valid_0's auc: 0.544026	valid_0's binary_logloss: 0.693291
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[9]	valid_0's auc: 0.508989	valid_0's binary_logloss: 0.692542
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's auc: 0.492291	valid_0's binary_logloss: 0.693513
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[20]	valid_0's auc: 0.541529	valid_0's binary_logloss: 0.690761
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:40,827] Trial 18 finished with value: 0.5050251256281406 and parameters: {'num_leaves': 50, 'max_depth': 6, 'learning_rate': 0.034421617119116905, 'n_estimators': 300, 'reg_alpha': 0.03997381919559376, 'reg_lambda': 0.05913414605890031, 'min_child_samples': 20, 'subsample': 0.8475635570541451, 'colsample_bytree': 0.8363712462387841, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7198310338357672, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.518494	valid_0's binary_logloss: 0.693045
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[17]	valid_0's auc: 0.522654	valid_0's binary_logloss: 0.692215
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.450347	valid_0's binary_logloss: 0.69433
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[38]	valid_0's auc: 0.53384	valid_0's binary_logloss: 0.692422
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:42,274] Trial 19 finished with value: 0.4974874371859297 and parameters: {'num_leaves': 70, 'max_depth': 6, 'learning_rate': 0.04064545405708929, 'n_estimators': 250, 'reg_alpha': 0.07288332672072659, 'reg_lambda': 0.050871951350315, 'min_child_samples': 10, 'subsample': 0.8327384046674857, 'colsample_bytree': 0.869091457435611, 'subsample_freq': 2, 'feature_fraction_bynode': 0.7379071546111137, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.506552	valid_0's binary_logloss: 0.692875
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[12]	valid_0's auc: 0.548746	valid_0's binary_logloss: 0.689295
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.529534	valid_0's binary_logloss: 0.692356
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.518932	valid_0's binary_logloss: 0.69281
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:43,518] Trial 20 finished with value: 0.5071189279731994 and parameters: {'num_leaves': 31, 'max_depth': 6, 'learning_rate': 0.06088348537730802, 'n_estimators': 400, 'reg_alpha': 0.0970183320664931, 'reg_lambda': 0.0497214006743233, 'min_child_samples': 20, 'subsample': 0.8441465867188852, 'colsample_bytree': 0.8142051704116481, 'subsample_freq': 2, 'feature_fraction_bynode': 0.7021815562396795, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[2]	valid_0's auc: 0.529046	valid_0's binary_logloss: 0.692845
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[6]	valid_0's auc: 0.534984	valid_0's binary_logloss: 0.69082
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.511887	valid_0's binary_logloss: 0.692978
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[40]	valid_0's auc: 0.546053	valid_0's binary_logloss: 0.692585


[I 2025-08-18 23:30:44,404] Trial 21 finished with value: 0.51214405360134 and parameters: {'num_leaves': 40, 'max_depth': 6, 'learning_rate': 0.0619826725978373, 'n_estimators': 400, 'reg_alpha': 0.04241762372189638, 'reg_lambda': 0.02713091529609943, 'min_child_samples': 30, 'subsample': 0.8480181161440115, 'colsample_bytree': 0.8671117180476416, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7903838861429379, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.501545	valid_0's binary_logloss: 0.693558
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[6]	valid_0's auc: 0.553692	valid_0's binary_logloss: 0.689611
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.514995	valid_0's binary_logloss: 0.692963
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.490615	valid_0's binary_logloss: 0.693768
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:45,238] Trial 22 finished with value: 0.5184254606365158 and parameters: {'num_leaves': 40, 'max_depth': 6, 'learning_rate': 0.05313051575024476, 'n_estimators': 150, 'reg_alpha': 0.019749663094366977, 'reg_lambda': 0.04968609596613849, 'min_child_samples': 30, 'subsample': 0.8324761675257799, 'colsample_bytree': 0.865849252217179, 'subsample_freq': 1, 'feature_fraction_bynode': 0.8149855003517668, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.522814	valid_0's binary_logloss: 0.693052
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[16]	valid_0's auc: 0.530107	valid_0's binary_logloss: 0.690311
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.501746	valid_0's binary_logloss: 0.693212
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[28]	valid_0's auc: 0.526678	valid_0's binary_logloss: 0.693245


[I 2025-08-18 23:30:46,101] Trial 23 finished with value: 0.5071189279731994 and parameters: {'num_leaves': 50, 'max_depth': 6, 'learning_rate': 0.03664909948739446, 'n_estimators': 150, 'reg_alpha': 0.014202891069442503, 'reg_lambda': 0.06815958280613245, 'min_child_samples': 30, 'subsample': 0.8475864506582509, 'colsample_bytree': 0.876636447819442, 'subsample_freq': 1, 'feature_fraction_bynode': 0.818141493078464, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.505184	valid_0's binary_logloss: 0.693299
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[7]	valid_0's auc: 0.55356	valid_0's binary_logloss: 0.688985
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.475731	valid_0's binary_logloss: 0.694003
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[20]	valid_0's auc: 0.532807	valid_0's binary_logloss: 0.692783
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:47,035] Trial 24 finished with value: 0.5121440536013401 and parameters: {'num_leaves': 40, 'max_depth': 8, 'learning_rate': 0.05565856082890649, 'n_estimators': 150, 'reg_alpha': 0.01345162663615877, 'reg_lambda': 0.04226623917549094, 'min_child_samples': 30, 'subsample': 0.8311817172709478, 'colsample_bytree': 0.8523089452082806, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7756767433663221, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[2]	valid_0's auc: 0.518465	valid_0's binary_logloss: 0.693886
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's auc: 0.528019	valid_0's binary_logloss: 0.691386
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.481024	valid_0's binary_logloss: 0.693727
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[12]	valid_0's auc: 0.526953	valid_0's binary_logloss: 0.692941
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:48,139] Trial 25 finished with value: 0.5171691792294808 and parameters: {'num_leaves': 70, 'max_depth': 10, 'learning_rate': 0.03253540016499753, 'n_estimators': 400, 'reg_alpha': 0.07309755856718786, 'reg_lambda': 0.07388142813420877, 'min_child_samples': 20, 'subsample': 0.8266912561824097, 'colsample_bytree': 0.8409240128827264, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7036283278157529, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[2]	valid_0's auc: 0.56149	valid_0's binary_logloss: 0.692217
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's auc: 0.505245	valid_0's binary_logloss: 0.692468
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.497596	valid_0's binary_logloss: 0.693462
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.510698	valid_0's binary_logloss: 0.693147
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:49,032] Trial 26 finished with value: 0.5020938023450586 and parameters: {'num_leaves': 40, 'max_depth': 6, 'learning_rate': 0.03590413279346513, 'n_estimators': 150, 'reg_alpha': 0.04262207380950385, 'reg_lambda': 0.042028838961512785, 'min_child_samples': 15, 'subsample': 0.829743453905228, 'colsample_bytree': 0.8832691884059064, 'subsample_freq': 1, 'feature_fraction_bynode': 0.8180249490324628, 'extra_trees': True}. Best is trial 5 with value: 0.5309882747068677.


Early stopping, best iteration is:
[2]	valid_0's auc: 0.501373	valid_0's binary_logloss: 0.693312
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.5036	valid_0's binary_logloss: 0.693109
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.486638	valid_0's binary_logloss: 0.693326
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[4]	valid_0's auc: 0.524242	valid_0's binary_logloss: 0.692676


[I 2025-08-18 23:30:49,871] Trial 27 finished with value: 0.5041876046901173 and parameters: {'num_leaves': 70, 'max_depth': 6, 'learning_rate': 0.03895708287894024, 'n_estimators': 400, 'reg_alpha': 0.08297904601370687, 'reg_lambda': 0.03705454554203391, 'min_child_samples': 20, 'subsample': 0.8487796619960403, 'colsample_bytree': 0.8395840970138536, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7370786715157444, 'extra_trees': True}. Best is trial 5 with value: 0.5309882747068677.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.497047	valid_0's binary_logloss: 0.693169
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's auc: 0.571066	valid_0's binary_logloss: 0.689834
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.466474	valid_0's binary_logloss: 0.693962
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[11]	valid_0's auc: 0.531611	valid_0's binary_logloss: 0.691537


[I 2025-08-18 23:30:50,829] Trial 28 finished with value: 0.5087939698492462 and parameters: {'num_leaves': 31, 'max_depth': 8, 'learning_rate': 0.03811064238397015, 'n_estimators': 400, 'reg_alpha': 0.05777508712225899, 'reg_lambda': 0.02311686864200534, 'min_child_samples': 20, 'subsample': 0.8445446483860275, 'colsample_bytree': 0.8545402968449083, 'subsample_freq': 1, 'feature_fraction_bynode': 0.7072219523254054, 'extra_trees': False}. Best is trial 5 with value: 0.5309882747068677.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.524954	valid_0's binary_logloss: 0.69308
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[7]	valid_0's auc: 0.541046	valid_0's binary_logloss: 0.688542
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's auc: 0.524517	valid_0's binary_logloss: 0.69199
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[19]	valid_0's auc: 0.549937	valid_0's binary_logloss: 0.689689
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:51,661] Trial 29 finished with value: 0.5314070351758794 and parameters: {'num_leaves': 60, 'max_depth': 6, 'learning_rate': 0.09156340532307691, 'n_estimators': 150, 'reg_alpha': 0.01935482713522382, 'reg_lambda': 0.07621033765236265, 'min_child_samples': 30, 'subsample': 0.8368215657952556, 'colsample_bytree': 0.8717360338196559, 'subsample_freq': 1, 'feature_fraction_bynode': 0.8180984318738471, 'extra_trees': False}. Best is trial 29 with value: 0.5314070351758794.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.513682	valid_0's binary_logloss: 0.693281
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[8]	valid_0's auc: 0.558477	valid_0's binary_logloss: 0.686796
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.514995	valid_0's binary_logloss: 0.693035
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.490817	valid_0's binary_logloss: 0.69448
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:52,475] Trial 30 finished with value: 0.5184254606365158 and parameters: {'num_leaves': 50, 'max_depth': 6, 'learning_rate': 0.09127480945063715, 'n_estimators': 150, 'reg_alpha': 0.031215967908596542, 'reg_lambda': 0.09572579470527834, 'min_child_samples': 30, 'subsample': 0.8326910060792904, 'colsample_bytree': 0.8817309779394455, 'subsample_freq': 1, 'feature_fraction_bynode': 0.834373400548257, 'extra_trees': False}. Best is trial 29 with value: 0.5314070351758794.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.522814	valid_0's binary_logloss: 0.693043
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[6]	valid_0's auc: 0.531292	valid_0's binary_logloss: 0.6903
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[3]	valid_0's auc: 0.521876	valid_0's binary_logloss: 0.693058
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[16]	valid_0's auc: 0.537168	valid_0's binary_logloss: 0.693072
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:53,300] Trial 31 finished with value: 0.5217755443886096 and parameters: {'num_leaves': 60, 'max_depth': 6, 'learning_rate': 0.09452687334890864, 'n_estimators': 150, 'reg_alpha': 0.02023689685320731, 'reg_lambda': 0.07838858810125399, 'min_child_samples': 30, 'subsample': 0.8483546518364633, 'colsample_bytree': 0.872490843060787, 'subsample_freq': 1, 'feature_fraction_bynode': 0.8507136130028808, 'extra_trees': False}. Best is trial 29 with value: 0.5314070351758794.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.505184	valid_0's binary_logloss: 0.693631
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[4]	valid_0's auc: 0.503928	valid_0's binary_logloss: 0.693808
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.482522	valid_0's binary_logloss: 0.695365
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[12]	valid_0's auc: 0.561303	valid_0's binary_logloss: 0.686701


[I 2025-08-18 23:30:54,150] Trial 32 finished with value: 0.5129815745393634 and parameters: {'num_leaves': 60, 'max_depth': 6, 'learning_rate': 0.0968947063506886, 'n_estimators': 150, 'reg_alpha': 6.43125799854663e-05, 'reg_lambda': 0.08360396084827772, 'min_child_samples': 20, 'subsample': 0.8297792856985271, 'colsample_bytree': 0.869891681591795, 'subsample_freq': 1, 'feature_fraction_bynode': 0.8476208378075203, 'extra_trees': False}. Best is trial 29 with value: 0.5314070351758794.


Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.518294	valid_0's binary_logloss: 0.692983
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[7]	valid_0's auc: 0.52232	valid_0's binary_logloss: 0.690918
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.489881	valid_0's binary_logloss: 0.693785
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[6]	valid_0's auc: 0.517181	valid_0's binary_logloss: 0.692878
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:54,986] Trial 33 finished with value: 0.49497487437185933 and parameters: {'num_leaves': 60, 'max_depth': 6, 'learning_rate': 0.08955195354758684, 'n_estimators': 150, 'reg_alpha': 0.020332620242151454, 'reg_lambda': 0.044929354811389856, 'min_child_samples': 30, 'subsample': 0.8498135413134204, 'colsample_bytree': 0.8523445866563245, 'subsample_freq': 1, 'feature_fraction_bynode': 0.734253349421656, 'extra_trees': False}. Best is trial 29 with value: 0.5314070351758794.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.501545	valid_0's binary_logloss: 0.69375
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[7]	valid_0's auc: 0.53775	valid_0's binary_logloss: 0.690717
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[2]	valid_0's auc: 0.541832	valid_0's binary_logloss: 0.691534
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[11]	valid_0's auc: 0.515312	valid_0's binary_logloss: 0.692539
Training until validation scores don't improve for 50 rounds


[I 2025-08-18 23:30:56,248] Trial 34 finished with value: 0.501675041876047 and parameters: {'num_leaves': 60, 'max_depth': 6, 'learning_rate': 0.07311868167236793, 'n_estimators': 150, 'reg_alpha': 0.03480066854094044, 'reg_lambda': 0.06604116209273553, 'min_child_samples': 30, 'subsample': 0.8437959932037414, 'colsample_bytree': 0.8806025316741275, 'subsample_freq': 2, 'feature_fraction_bynode': 0.8405768461167173, 'extra_trees': False}. Best is trial 29 with value: 0.5314070351758794.


Early stopping, best iteration is:
[1]	valid_0's auc: 0.505493	valid_0's binary_logloss: 0.693402
Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[1]	valid_0's auc: 0.51963	valid_0's binary_logloss: 0.693449
📈 Current bests:
 ('GradientBoosting', '1d'): 0.7143
 ('GradientBoosting', '4h'): 0.6957
 ('LightGBM_Financial', '4h'): 0.6232
   GradientBoosting 1d focus → [('learning_rate', np.float64(2.0865728504018346)), ('max_depth', np.float64(1.8975992264059516)), ('n_estimators', np.float64(1.46703513798778))]
   GradientBoosting 4h focus → [('subsample', np.float64(2.556020717249001)), ('n_estimators', np.float64(1.503834735756718)), ('max_depth', np.float64(1.4418142284137467))]

🔄 ITERATION 31
   → GB 1d iter 31 (REFIT full): val_acc@best_thr=0.4891, thr=0.30, test_acc@best_thr=0.5714, test_acc@0.50=0.4857, explore=0.15, max_changes=1, changed=['learning_rate']
   → GB 1d iter 31 (REFIT full): val_acc@best_thr=0.5249, thr=0.30, test_acc@b