In [1]:
# ========================================
# STEP 1: CROSS-PLATFORM DEPENDENCY MANAGEMENT
# ========================================
print("🔧 Setting up dependencies...")

# Cross-platform dependency installation
try:
    import pandas, numpy, sklearn, xgboost, matplotlib, seaborn, joblib, tqdm
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import accuracy_score, roc_auc_score
    from sklearn.utils.class_weight import compute_sample_weight
    from sklearn.ensemble import GradientBoostingClassifier
    import joblib
    import lightgbm as lgb  # not used, keeps env parity
    print("✅ Core dependencies already available")
except ImportError as e:
    print(f"Installing missing dependencies: {e}")
    import sys, subprocess
    pkgs = [
        'pandas','numpy','scikit-learn','xgboost','lightgbm',
        'matplotlib','seaborn','joblib','tqdm','pyarrow'
    ]
    subprocess.check_call([sys.executable, '-m', 'pip', 'install'] + pkgs)
    from sklearn.preprocessing import StandardScaler
    from sklearn.metrics import accuracy_score, roc_auc_score
    from sklearn.utils.class_weight import compute_sample_weight
    from sklearn.ensemble import GradientBoostingClassifier
    import joblib
    print("✅ Dependencies installed")

# Try to mount Google Drive if available (Colab environment)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    IS_COLAB = True
    BASE_DIR = '/content/drive/MyDrive/daygent_v1_models'  # same base folder as your other notebooks
    print("✅ Google Drive mounted (Colab environment)")
except ImportError:
    IS_COLAB = False
    BASE_DIR = './daygent_v1_models'
    print("✅ Local environment detected")

# Core imports
import os, warnings
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from tqdm import tqdm
warnings.filterwarnings('ignore')

# Paths
DATA_DIR = os.path.join(BASE_DIR, 'spy_data_export')
MODEL_DIR = os.path.join(BASE_DIR, 'gb_4h')
os.makedirs(MODEL_DIR, exist_ok=True)

print(f"✅ Model directory: {MODEL_DIR}")
print(f"✅ Data directory:  {DATA_DIR}")

# Determinism (GB is deterministic with fixed params; this removes any stray numpy randomness)
np.random.seed(42)


🔧 Setting up dependencies...
✅ Core dependencies already available
Mounted at /content/drive
✅ Google Drive mounted (Colab environment)
✅ Model directory: /content/drive/MyDrive/daygent_v1_models/gb_4h
✅ Data directory:  /content/drive/MyDrive/daygent_v1_models/spy_data_export


In [2]:
# ========================================
# STEP 2: LOAD 1D AND 4H DATA (FOR OVERLAP TEST PERIOD)
# ========================================
print("\n📊 Loading 1D and 4H timeframe data...")

TIMEFRAMES_ORDERED = ['1d', '4h']
raw_data = {}

for tf in TIMEFRAMES_ORDERED:
    csv_file = os.path.join(DATA_DIR, f'spy_{tf}.csv')
    if not os.path.exists(csv_file):
        raise FileNotFoundError(f"❌ {csv_file} not found!")
    df = pd.read_csv(csv_file)
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.sort_values('timestamp').reset_index(drop=True)
    raw_data[tf] = df
    print(f"✅ Loaded {tf} data: {len(df):,} candles")
    print(f"📅 {tf} range: {df['timestamp'].min()} to {df['timestamp'].max()}")



📊 Loading 1D and 4H timeframe data...
✅ Loaded 1d data: 2,547 candles
📅 1d range: 2014-12-23 14:30:00+00:00 to 2025-02-07 14:30:00+00:00
✅ Loaded 4h data: 3,058 candles
📅 4h range: 2019-01-07 14:30:00+00:00 to 2025-02-10 14:30:00+00:00


In [3]:
# ========================================
# STEP 3: DEFINE TEST PERIOD (LAST 35 COMMON TRADING DAYS)
# ========================================
latest_start = max(raw_data['1d']['timestamp'].min(), raw_data['4h']['timestamp'].min())
earliest_end  = min(raw_data['1d']['timestamp'].max(), raw_data['4h']['timestamp'].max())

# common trading days across 1d/4h
common_dates = set(
    raw_data['1d'][(raw_data['1d']['timestamp'] >= latest_start) &
                   (raw_data['1d']['timestamp'] <= earliest_end)]['timestamp'].dt.date.unique()
)
common_dates &= set(
    raw_data['4h'][(raw_data['4h']['timestamp'] >= latest_start) &
                   (raw_data['4h']['timestamp'] <= earliest_end)]['timestamp'].dt.date.unique()
)

all_days = sorted(common_dates)
TEST_DAYS = min(35, len(all_days))
selected_days = all_days[-TEST_DAYS:]

test_start = pd.Timestamp.combine(selected_days[0],  pd.Timestamp.min.time()).tz_localize('UTC')
test_end   = pd.Timestamp.combine(selected_days[-1], pd.Timestamp.max.time()).tz_localize('UTC')

print(f"\n🎯 Test period: {test_start.date()} → {test_end.date()} ({TEST_DAYS} trading days)")



🎯 Test period: 2024-12-17 → 2025-02-07 (35 trading days)


In [4]:
# ========================================
# STEP 4: FEATURE EXTRACTION (16-FEATURE CONTRACT)
# ========================================
def parse_vector_column(vector_str):
    """Parse vector string to numpy array."""
    if pd.isna(vector_str) or vector_str is None:
        return None
    if isinstance(vector_str, str):
        s = vector_str.strip('[]"')
        try:
            return np.array([float(x.strip()) for x in s.split(',')])
        except ValueError:
            return None
    return np.array(vector_str)

FEATURE_NAMES = [
    'raw_o','raw_h','raw_l','raw_c','raw_v',
    'iso_0','iso_1','iso_2','iso_3',
    'tf_1d','tf_4h',
    'hl_range','price_change','upper_shadow','lower_shadow','volume_m'
]

def build_feature_vector(raw_ohlcv, iso_ohlc, tf, tf_list):
    """Build 16-feature vector (5 raw + 4 iso + 2 one-hot + 5 engineered)."""
    o, h, l, c, v = raw_ohlcv
    features = list(raw_ohlcv)  # 5
    features.extend(list(iso_ohlc))  # 4
    features.extend([1 if tf == t else 0 for t in tf_list])  # 2
    features.extend([
        (h - l) / c if c else 0,  # hl_range
        (c - o) / o if o else 0,  # price_change
        (h - c) / c if c else 0,  # upper_shadow
        (c - l) / c if c else 0,  # lower_shadow
        v / 1_000_000,            # volume_m
    ])  # 5
    return np.array(features, dtype=float)

def extract_features_4h(row):
    raw_ohlcv = parse_vector_column(row.get('raw_ohlcv_vec'))
    iso_ohlc  = parse_vector_column(row.get('iso_ohlc'))
    future    = row.get('future')
    if raw_ohlcv is None or iso_ohlc is None or pd.isna(future):
        return None, None
    if len(raw_ohlcv) != 5 or len(iso_ohlc) != 4:
        return None, None
    return build_feature_vector(raw_ohlcv, iso_ohlc, '4h', TIMEFRAMES_ORDERED), int(future)


In [5]:
# ========================================
# STEP 5: EXTRACT TRAIN/TEST FEATURES (W2-STYLE) & SCALE
# ========================================
print("\n🔄 Extracting features from 4h data...")

df_4h = raw_data['4h']
train_df = df_4h[df_4h['timestamp'] < test_start].copy()
test_df  = df_4h[(df_4h['timestamp'] >= test_start) & (df_4h['timestamp'] <= test_end)].copy()

print(f"📊 Train samples (pre-test period): {len(train_df):,}")
print(f"📊 Test samples (overlap window):  {len(test_df):,}")

# Train features
X_train, y_train = [], []
for _, row in tqdm(train_df.iterrows(), total=len(train_df), desc="Extracting 4h train features"):
    fv, lbl = extract_features_4h(row)
    if fv is not None:
        X_train.append(fv); y_train.append(lbl)

X_train = np.array(X_train)
y_train = np.array(y_train)
print(f"\n✅ Training features extracted: {X_train.shape}")
if len(y_train):
    print(f"📊 Class distribution (train): {np.bincount(y_train)}")

# Test features + raw info for detailed report
X_test, y_test, test_timestamps = [], [], []
test_rows_info = []
for _, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Extracting 4h test features"):
    fv, lbl = extract_features_4h(row)
    if fv is not None:
        X_test.append(fv); y_test.append(lbl); test_timestamps.append(row['timestamp'])
        test_rows_info.append({
            'timestamp': row['timestamp'],
            'raw_ohlcv': parse_vector_column(row['raw_ohlcv_vec']),
            'iso_ohlc':  parse_vector_column(row['iso_ohlc']),
            'future': int(row['future']),
            'feature_vector': fv
        })

X_test = np.array(X_test)
y_test = np.array(y_test)
print(f"📊 Test features extracted: {X_test.shape}")

# Scale using the FIRST 80% of the pre-test data (exactly like the w2 optimizer did)
scaler = StandardScaler()
split_ix = int(len(X_train) * 0.8)
scaler.fit(X_train[:split_ix])

X_scaled = scaler.transform(X_train)
X_tr = X_scaled[:split_ix]
X_val = X_scaled[split_ix:]  # kept for sanity metrics (not used for calibration or refit)
y_tr = y_train[:split_ix]
y_val = y_train[split_ix:]

print(f"📊 Train slice used for fitting GB: {X_tr.shape}")
print(f"📊 Held-out in-sample val slice : {X_val.shape}")



🔄 Extracting features from 4h data...
📊 Train samples (pre-test period): 2,988
📊 Test samples (overlap window):  69


Extracting 4h train features: 100%|██████████| 2988/2988 [00:00<00:00, 14147.92it/s]



✅ Training features extracted: (2988, 16)
📊 Class distribution (train): [1363 1625]


Extracting 4h test features: 100%|██████████| 69/69 [00:00<00:00, 9655.59it/s]

📊 Test features extracted: (69, 16)
📊 Train slice used for fitting GB: (2390, 16)
📊 Held-out in-sample val slice : (598, 16)





In [6]:
# ========================================
# STEP 6: TRAIN GRADIENT BOOSTING ON TRAIN SLICE ONLY (W2-STYLE)
# ========================================
print("\n🚀 Training GradientBoosting (4h) — train slice only, no threshold calibration, no refit")

gb_params = {
    'n_estimators'     : 75,
    'max_depth'        : 9,
    'learning_rate'    : 0.12,
    'subsample'        : 0.85,
    'min_samples_split': 12,
    'min_samples_leaf' : 1,
    'random_state'     : 42,
}

gb = GradientBoostingClassifier(**gb_params)

# Balanced weights on the train slice only (matches optimizer behavior)
sw_tr = compute_sample_weight('balanced', y_tr) if len(y_tr) else None
gb.fit(X_tr, y_tr, sample_weight=sw_tr)

# Sanity: in-sample val metrics at default 0.50
val_acc = float('nan'); val_auc = float('nan')
if len(X_val):
    val_pred  = gb.predict(X_val)
    val_acc   = accuracy_score(y_val, val_pred)
    val_proba = gb.predict_proba(X_val)[:, 1]
    if len(np.unique(y_val)) == 2:
        val_auc = roc_auc_score(y_val, val_proba)

print(f"✅ In-sample val accuracy (0.50): {val_acc:.4f}")
print(f"✅ In-sample val AUC          : {val_auc:.4f}")



🚀 Training GradientBoosting (4h) — train slice only, no threshold calibration, no refit
✅ In-sample val accuracy (0.50): 0.5050
✅ In-sample val AUC          : 0.5093


In [7]:
# ========================================
# STEP 7: TEST + DETAILED DAY-BY-DAY / PREDICTION-BY-PREDICTION ANALYSIS
# ========================================
print(f"\n🧪 Testing on isolated {len(selected_days)}-day overlap period (4h)...")

# Scale test using SAME scaler (fitted on first 80% of pre-test)
X_test_scaled = scaler.transform(X_test) if len(X_test) else np.empty((0, X_tr.shape[1]))

# Predict with the trained model (default threshold 0.50 via .predict)
test_pred  = gb.predict(X_test_scaled) if len(X_test_scaled) else np.array([])
test_proba = gb.predict_proba(X_test_scaled)[:, 1] if len(X_test_scaled) else np.array([])

# Metrics
test_acc = accuracy_score(y_test, test_pred) if len(test_pred) else float('nan')
test_auc = roc_auc_score(y_test, test_proba) if (len(test_proba) and len(np.unique(y_test))==2) else float('nan')

print("\n🎯 TEST RESULTS (4h, w2-style):")
print(f"✅ Test Accuracy (0.50): {test_acc:.4f}")
print(f"✅ Test AUC            : {test_auc:.4f}")
if len(test_pred):
    print(f"📊 Test predictions: {np.bincount(test_pred)}")
    print(f"📊 Actual labels :  {np.bincount(y_test)}")

# Build detailed per-prediction table
FEATURE_INDEX = {name: i for i, name in enumerate(FEATURE_NAMES)}
THRESHOLD_USED = 0.50

records = []
for i, info in enumerate(test_rows_info):
    ts   = info['timestamp']
    fv   = info['feature_vector']
    raw  = info['raw_ohlcv']
    iso  = info['iso_ohlc']
    true = info['future']

    proba = float(test_proba[i])
    pred  = int(test_pred[i])
    correct = bool(pred == true)
    margin = proba - THRESHOLD_USED

    rec = {
        'candle_index_in_test': i + 1,
        'timestamp_utc': ts,
        'date_utc': ts.date(),
        'pred_prob_up': proba,
        'pred_label': int(pred),      # 1=up, 0=down
        'true_label': int(true),
        'correct': correct,
        'threshold_used': THRESHOLD_USED,
        'decision_margin': margin,

        # Raw 4h OHLCV & ISO
        'raw_o': raw[0], 'raw_h': raw[1], 'raw_l': raw[2], 'raw_c': raw[3], 'raw_v': raw[4],
        'iso_0': iso[0], 'iso_1': iso[1], 'iso_2': iso[2], 'iso_3': iso[3],

        # Engineered features from fv
        'tf_1d': fv[FEATURE_INDEX['tf_1d']],
        'tf_4h': fv[FEATURE_INDEX['tf_4h']],
        'hl_range': fv[FEATURE_INDEX['hl_range']],
        'price_change': fv[FEATURE_INDEX['price_change']],
        'upper_shadow': fv[FEATURE_INDEX['upper_shadow']],
        'lower_shadow': fv[FEATURE_INDEX['lower_shadow']],
        'volume_m': fv[FEATURE_INDEX['volume_m']],
    }
    records.append(rec)

pred_df = pd.DataFrame.from_records(records).sort_values(['date_utc','timestamp_utc']).reset_index(drop=True)

# Save machine-friendly CSV
pred_csv_path = os.path.join(MODEL_DIR, 'test_predictions_4h.csv')
pred_df.to_csv(pred_csv_path, index=False)

# Human-readable TXT report grouped by day
txt_lines = []
txt_lines.append("="*90)
txt_lines.append("GRADIENT BOOSTING 4H — DETAILED DAY-BY-DAY / PREDICTION-BY-PREDICTION REPORT")
txt_lines.append("="*90)
txt_lines.append(f"Test period: {test_start.date()} → {test_end.date()}")
txt_lines.append(f"Total test candles: {len(pred_df)}")
txt_lines.append(f"Threshold used: {THRESHOLD_USED:.2f}")
txt_lines.append(f"Overall Test Accuracy: {test_acc:.4f}")
txt_lines.append(f"Overall Test AUC: {test_auc:.4f}")
txt_lines.append("")

for day in pred_df['date_utc'].unique():
    day_block = pred_df[pred_df['date_utc'] == day]
    correct_n = int(day_block['correct'].sum())
    total_n   = len(day_block)
    txt_lines.append("-"*90)
    txt_lines.append(f"{day}  —  Day accuracy: {correct_n}/{total_n}  ({correct_n/total_n:.3f})")
    txt_lines.append("-"*90)
    for _, r in day_block.iterrows():
        dir_word   = "UP" if r['pred_label'] == 1 else "DOWN"
        truth_word = "UP" if r['true_label'] == 1 else "DOWN"
        right_wrong = "✅ CORRECT" if r['correct'] else "❌ WRONG"
        txt_lines.append(
            f"[{int(r['candle_index_in_test']):02d}] {r['timestamp_utc']}  "
            f"pred={dir_word}  p_up={r['pred_prob_up']:.4f}  thr={r['threshold_used']:.2f}  "
            f"margin={r['decision_margin']:.4f}  truth={truth_word}  → {right_wrong}"
        )
        txt_lines.append(
            f"    OHLCV: O={r['raw_o']:.4f}, H={r['raw_h']:.4f}, L={r['raw_l']:.4f}, C={r['raw_c']:.4f}, V={r['raw_v']:.0f} | "
            f"ISO: [{r['iso_0']:.4f}, {r['iso_1']:.4f}, {r['iso_2']:.4f}, {r['iso_3']:.4f}] | "
            f"feats: hl={r['hl_range']:.4f}, dC={r['price_change']:.4f}, upSh={r['upper_shadow']:.4f}, "
            f"loSh={r['lower_shadow']:.4f}, vol_m={r['volume_m']:.4f}"
        )
    txt_lines.append("")

report_path = os.path.join(MODEL_DIR, 'gb_4h_day_by_day.txt')
with open(report_path, 'w') as f:
    f.write("\n".join(txt_lines))

print(f"\n📝 Saved detailed TXT report to: {report_path}")
print(f"🧾 Saved machine-readable predictions to: {pred_csv_path}")



🧪 Testing on isolated 35-day overlap period (4h)...

🎯 TEST RESULTS (4h, w2-style):
✅ Test Accuracy (0.50): 0.6957
✅ Test AUC            : 0.7052
📊 Test predictions: [26 43]
📊 Actual labels :  [29 40]

📝 Saved detailed TXT report to: /content/drive/MyDrive/daygent_v1_models/gb_4h/gb_4h_day_by_day.txt
🧾 Saved machine-readable predictions to: /content/drive/MyDrive/daygent_v1_models/gb_4h/test_predictions_4h.csv


In [8]:
# ========================================
# STEP 8: SAVE MODEL, SCALER, AND RESULTS
# ========================================
print("\n💾 Saving model and results...")

model_path  = os.path.join(MODEL_DIR, 'gb_4h_w2_style.joblib')
scaler_path = os.path.join(MODEL_DIR, 'scaler_4h_w2_style.joblib')
joblib.dump(gb, model_path)
joblib.dump(scaler, scaler_path)

def _to_py(v):
    try:
        if isinstance(v, (np.integer, np.int64, np.int32)):
            return int(v)
        if isinstance(v, (np.floating,)):
            return float(v)
        return v
    except Exception:
        return v

results = {
    'pipeline': 'w2-style: train on first 80% slice only, default threshold 0.50, no refit',
    'test_accuracy': float(test_acc),
    'test_auc': float(test_auc),
    'val_accuracy_0p50': float(val_acc) if not np.isnan(val_acc) else None,
    'val_auc': float(val_auc) if not np.isnan(val_auc) else None,
    'train_samples_used_for_fit': int(len(X_tr)),
    'heldout_in_sample_val_samples': int(len(X_val)),
    'test_samples': int(len(X_test)),
    'feature_count': int(X_train.shape[1]) if X_train.ndim == 2 else 0,
    'model_params': {k: _to_py(v) for k, v in gb_params.items()},
    'feature_names': FEATURE_NAMES,
    'report_txt': os.path.basename(report_path),
    'predictions_csv': os.path.basename(pred_csv_path),
    'model_path': os.path.basename(model_path),
    'scaler_path': os.path.basename(scaler_path),
    'threshold_used': 0.50,
    'test_period': f"{test_start.date()} to {test_end.date()}"
}

import json
with open(os.path.join(MODEL_DIR, 'results_gb_4h_w2_style.json'), 'w') as f:
    json.dump(results, f, indent=2)

print(f"✅ Model saved to: {model_path}")
print(f"✅ Scaler saved to: {scaler_path}")
print("✅ Results JSON saved as: results_gb_4h_w2_style.json")



💾 Saving model and results...
✅ Model saved to: /content/drive/MyDrive/daygent_v1_models/gb_4h/gb_4h_w2_style.joblib
✅ Scaler saved to: /content/drive/MyDrive/daygent_v1_models/gb_4h/scaler_4h_w2_style.joblib
✅ Results JSON saved as: results_gb_4h_w2_style.json


In [9]:
# ========================================
# STEP 9: SAVE DEPLOYMENT ARTIFACTS (for your site)
# ========================================
import json
from textwrap import dedent
import os

deployment_config = {
    "model_type": "GradientBoostingClassifier",
    "timeframe": "4h",
    "feature_contract_version": "v1",
    "feature_names": FEATURE_NAMES,
    "threshold_used": 0.50,
    "artifact_paths": {
        "model_joblib": "gb_4h_w2_style.joblib",
        "scaler_joblib": "scaler_4h_w2_style.joblib"
    },
    "inference_notes": {
        "scaling": "StandardScaler fitted on first 80% of pre-test 4h training data",
        "one_hot": {"tf_1d": 0, "tf_4h": 1},
        "expected_columns_in_csv": ["timestamp", "raw_ohlcv_vec", "iso_ohlc", "future"]
    },
    "gb_params": results["model_params"]
}

config_path = os.path.join(MODEL_DIR, "deployment_config_4h.json")
with open(config_path, "w") as f:
    json.dump(deployment_config, f, indent=2)

feature_schema = {
    "raw_ohlcv_vec": {
        "desc": "Stringified list of [open, high, low, close, volume]",
        "len": 5,
        "dtype": "float"
    },
    "iso_ohlc": {
        "desc": "Stringified list of 4 ISO-normalized OHLC values",
        "len": 4,
        "dtype": "float"
    },
    "engineered": [
        "hl_range=(H-L)/C",
        "price_change=(C-O)/O",
        "upper_shadow=(H-C)/C",
        "lower_shadow=(C-L)/C",
        "volume_m=V/1e6"
    ],
    "tf_one_hot": {"tf_1d": 0, "tf_4h": 1}
}

schema_path = os.path.join(MODEL_DIR, "feature_schema_4h.json")
with open(schema_path, "w") as f:
    json.dump(feature_schema, f, indent=2)

readme_text = dedent(f"""
    ============================================
    GradientBoosting 4H Inference — Deployment Notes
    ============================================

    Artifacts:
    - Model:       {os.path.basename(model_path)}
    - Scaler:      {os.path.basename(scaler_path)}
    - Config:      {os.path.basename(config_path)}
    - Feature schema: feature_schema_4h.json
    - Predictions: test_predictions_4h.csv
    - Report:      gb_4h_day_by_day.txt

    Pipeline (w2-style):
    • Train on first 80% slice of pre-test 4h data (after scaling with that same slice).
    • No threshold calibration; default 0.50 is used.
    • No refit on train+val.
    • Inference uses scaler.transform then model.predict_proba(...)[1] and 0.50 cutoff.

    Feature order (must match EXACTLY):
    {FEATURE_NAMES}

    Inference steps for your site:
    1) Parse inputs:
       - 'raw_ohlcv_vec' -> [o,h,l,c,v]
       - 'iso_ohlc'      -> [iso_0..iso_3]
       - one-hot: tf_1d=0, tf_4h=1
       - engineered: hl_range, price_change, upper_shadow, lower_shadow, volume_m
       - concatenate into a 16-length vector in the EXACT order above
    2) Load scaler (joblib) and transform the vector.
    3) Load model (joblib) and compute P(up) = predict_proba(...)[0,1].
    4) Predict UP if P(up) >= 0.50 else DOWN.

    Keep the feature order + scaling identical for consistent results.
""").strip()

readme_path = os.path.join(MODEL_DIR, "README_DEPLOY_4H.txt")
with open(readme_path, "w") as f:
    f.write(readme_text)

print("📦 Deployment artifacts saved:")
print(" -", config_path)
print(" -", schema_path)
print(" -", readme_path)


📦 Deployment artifacts saved:
 - /content/drive/MyDrive/daygent_v1_models/gb_4h/deployment_config_4h.json
 - /content/drive/MyDrive/daygent_v1_models/gb_4h/feature_schema_4h.json
 - /content/drive/MyDrive/daygent_v1_models/gb_4h/README_DEPLOY_4H.txt


In [10]:
# ========================================
# FINAL SUMMARY
# ========================================
print("\n" + "="*70)
print("🏆 GRADIENT BOOSTING 4H — W2-STYLE COMPLETE (with deep analysis + deploy artifacts)")
print("="*70)
print(f" • Model dir:    {MODEL_DIR}")
print(f" • Test window:  {test_start.date()} → {test_end.date()}")
print(f" • Test candles: {len(X_test)}")
print(f" • Test Acc/AUC: {test_acc:.4f} / {test_auc:.4f}")
print(f" • Threshold:    0.50")
print(f" • Saved files:  gb_4h_w2_style.joblib, scaler_4h_w2_style.joblib,")
print(f"                 deployment_config_4h.json, feature_schema_4h.json, README_DEPLOY_4H.txt,")
print(f"                 test_predictions_4h.csv, gb_4h_day_by_day.txt, results_gb_4h_w2_style.json")
print("="*70)



🏆 GRADIENT BOOSTING 4H — W2-STYLE COMPLETE (with deep analysis + deploy artifacts)
 • Model dir:    /content/drive/MyDrive/daygent_v1_models/gb_4h
 • Test window:  2024-12-17 → 2025-02-07
 • Test candles: 69
 • Test Acc/AUC: 0.6957 / 0.7052
 • Threshold:    0.50
 • Saved files:  gb_4h_w2_style.joblib, scaler_4h_w2_style.joblib,
                 deployment_config_4h.json, feature_schema_4h.json, README_DEPLOY_4H.txt,
                 test_predictions_4h.csv, gb_4h_day_by_day.txt, results_gb_4h_w2_style.json
