In [None]:
# ========================================
# STEP 1: CROSS-PLATFORM DEPENDENCY MANAGEMENT
# ========================================
print("🔧 Setting up dependencies...")

# Cross-platform dependency installation
try:
    import pandas, numpy, sklearn, xgboost, matplotlib, seaborn, joblib, tqdm
    import lightgbm as lgb
    print("✅ Core dependencies already available")
except ImportError as e:
    print(f"Installing missing dependencies: {e}")
    import sys, subprocess
    pkgs = ['pandas', 'numpy', 'scikit-learn', 'lightgbm',
            'matplotlib', 'seaborn', 'joblib', 'tqdm', 'pyarrow']
    subprocess.check_call([sys.executable, '-m', 'pip', 'install'] + pkgs)
    import lightgbm as lgb
    print("✅ Dependencies installed")

# Try to mount Google Drive if available (Colab environment)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    IS_COLAB = True
    BASE_DIR = '/content/drive/MyDrive/daygent_v1_models'  # <— your new base folder
    print("✅ Google Drive mounted (Colab environment)")
except ImportError:
    IS_COLAB = False
    BASE_DIR = './daygent_v1_models'
    print("✅ Local environment detected")

# Core imports
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from tqdm import tqdm
import warnings
import collections
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.utils.class_weight import compute_sample_weight
import joblib

warnings.filterwarnings('ignore')

# Paths
DATA_DIR = os.path.join(BASE_DIR, 'spy_data_export')
MODEL_DIR = os.path.join(BASE_DIR, 'lgbm_4h')
os.makedirs(MODEL_DIR, exist_ok=True)

print(f"✅ Model directory: {MODEL_DIR}")
print(f"✅ Data directory: {DATA_DIR}")


🔧 Setting up dependencies...
✅ Core dependencies already available
Mounted at /content/drive
✅ Google Drive mounted (Colab environment)
✅ Model directory: /content/drive/MyDrive/daygent_v1_models/lgbm_4h
✅ Data directory: /content/drive/MyDrive/daygent_v1_models/spy_data_export


In [None]:
# ========================================
# STEP 2: LOAD ONLY 4H DATA
# ========================================
print("\n📊 Loading ONLY 4H timeframe data...")

csv_file = os.path.join(DATA_DIR, 'spy_4h.csv')
if not os.path.exists(csv_file):
    raise FileNotFoundError(f"❌ {csv_file} not found!")

df_4h = pd.read_csv(csv_file)
df_4h['timestamp'] = pd.to_datetime(df_4h['timestamp'])
df_4h = df_4h.sort_values('timestamp').reset_index(drop=True)

print(f"✅ Loaded 4h data: {len(df_4h):,} candles")
print(f"📅 Date range: {df_4h['timestamp'].min()} to {df_4h['timestamp'].max()}")



📊 Loading ONLY 4H timeframe data...
✅ Loaded 4h data: 3,058 candles
📅 Date range: 2019-01-07 14:30:00+00:00 to 2025-02-10 14:30:00+00:00


In [None]:
# ========================================
# STEP 3: DEFINE TEST PERIOD (LAST 35 TRADING DAYS)
# ========================================
# Compute last 35 trading days common to 1d and 4h (matching your w2 style)
csv_1d = os.path.join(DATA_DIR, 'spy_1d.csv')
if not os.path.exists(csv_1d):
    raise FileNotFoundError(f"❌ {csv_1d} not found!")

df_1d = pd.read_csv(csv_1d)
df_1d['timestamp'] = pd.to_datetime(df_1d['timestamp'])
df_1d = df_1d.sort_values('timestamp').reset_index(drop=True)

latest_start = max(df_1d['timestamp'].min(), df_4h['timestamp'].min())
earliest_end = min(df_1d['timestamp'].max(), df_4h['timestamp'].max())

one_d_dates = set(df_1d[(df_1d['timestamp'] >= latest_start) & (df_1d['timestamp'] <= earliest_end)]['timestamp'].dt.date.unique())
four_h_dates = set(df_4h[(df_4h['timestamp'] >= latest_start) & (df_4h['timestamp'] <= earliest_end)]['timestamp'].dt.date.unique())
common_dates = sorted(list(one_d_dates & four_h_dates))

TEST_DAYS = 35
if len(common_dates) == 0:
    raise RuntimeError("❌ No common trading days between 1d and 4h in overlap window")

selected_days = common_dates[-min(TEST_DAYS, len(common_dates)) :]
test_start = pd.Timestamp.combine(selected_days[0], pd.Timestamp.min.time()).tz_localize('UTC')
test_end = pd.Timestamp.combine(selected_days[-1], pd.Timestamp.max.time()).tz_localize('UTC')

print(f"\n📅 Total overlapping trading days: {len(common_dates)}")
print(f"🎯 Test period: {test_start.date()} to {test_end.date()} ({len(selected_days)} days)")



📅 Total overlapping trading days: 1532
🎯 Test period: 2024-12-17 to 2025-02-07 (35 days)


In [None]:
# ========================================
# STEP 4: FEATURE EXTRACTION (MATCHING W2)
# ========================================
def parse_vector_column(vector_str):
    """Parse vector string to numpy array"""
    if pd.isna(vector_str) or vector_str is None:
        return None
    if isinstance(vector_str, str):
        vector_str = vector_str.strip('[]"')
        try:
            return np.array([float(x.strip()) for x in vector_str.split(',')])
        except ValueError:
            return None
    return np.array(vector_str)

TIMEFRAMES_ORDERED = ['1d', '4h']

FEATURE_NAMES = [
    'raw_o','raw_h','raw_l','raw_c','raw_v',
    'iso_0','iso_1','iso_2','iso_3',
    'tf_1d','tf_4h',
    'hl_range','price_change','upper_shadow','lower_shadow','volume_m'
]

def build_feature_vector_4h(raw_ohlcv, iso_ohlc):
    o, h, l, c, v = raw_ohlcv
    features = []
    features.extend(raw_ohlcv)  # 5
    features.extend(iso_ohlc)   # 4
    # TF one-hot (for ['1d','4h']) -> [0,1] since we're 4h
    features.extend([0, 1])     # 2
    # engineered
    features.extend([
        (h - l) / c if c != 0 else 0,   # hl_range
        (c - o) / o if o != 0 else 0,   # price_change
        (h - c) / c if c != 0 else 0,   # upper_shadow
        (c - l) / c if c != 0 else 0,   # lower_shadow
        v / 1_000_000,                  # volume_m
    ]) # 5
    return np.array(features, dtype=float)

def extract_features_4h_only(row):
    """Extract features for 4h timeframe only - matching w2 exactly"""
    raw_ohlcv = parse_vector_column(row.get('raw_ohlcv_vec'))
    iso_ohlc = parse_vector_column(row.get('iso_ohlc'))
    future = row.get('future')
    if raw_ohlcv is None or iso_ohlc is None or pd.isna(future):
        return None, None
    if len(raw_ohlcv) != 5 or len(iso_ohlc) != 4:
        return None, None
    return build_feature_vector_4h(raw_ohlcv, iso_ohlc), int(future)


In [None]:
# ========================================
# STEP 5: EXTRACT TRAIN/TEST FEATURES
# ========================================
print("\n🔄 Extracting features from 4h data...")

# Split data into train/test
train_df = df_4h[df_4h['timestamp'] < test_start].copy()
test_df  = df_4h[(df_4h['timestamp'] >= test_start) & (df_4h['timestamp'] <= test_end)].copy()

print(f"📊 Train samples: {len(train_df):,}")
print(f"📊 Test samples: {len(test_df):,}")

# Extract training features
X_train, y_train = [], []
for _, row in tqdm(train_df.iterrows(), total=len(train_df), desc="Extracting train features"):
    features, label = extract_features_4h_only(row)
    if features is not None:
        X_train.append(features)
        y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)
print(f"\n✅ Training features extracted: {X_train.shape}")
print(f"📊 Class distribution: {np.bincount(y_train)}")

# Extract test features + keep raw fields for detailed reporting
X_test, y_test, test_timestamps = [], [], []
test_rows_info = []  # store raw info and feature vector

for _, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Extracting test features"):
    fv, lbl = extract_features_4h_only(row)
    if fv is not None:
        X_test.append(fv)
        y_test.append(lbl)
        test_timestamps.append(row['timestamp'])
        test_rows_info.append({
            'timestamp': row['timestamp'],
            'raw_ohlcv': parse_vector_column(row['raw_ohlcv_vec']),
            'iso_ohlc': parse_vector_column(row['iso_ohlc']),
            'future': int(row['future']),
            'feature_vector': fv
        })

X_test = np.array(X_test)
y_test  = np.array(y_test)

print(f"📊 Test features extracted: {X_test.shape}")



🔄 Extracting features from 4h data...
📊 Train samples: 2,988
📊 Test samples: 69


Extracting train features: 100%|██████████| 2988/2988 [00:00<00:00, 15524.57it/s]



✅ Training features extracted: (2988, 16)
📊 Class distribution: [1363 1625]


Extracting test features: 100%|██████████| 69/69 [00:00<00:00, 10138.27it/s]

📊 Test features extracted: (69, 16)





In [None]:
# ========================================
# STEP 6: SCALE AND SPLIT (MATCHING W2)
# ========================================
scaler = StandardScaler()
split_idx = int(len(X_train) * 0.8)
print(f"\n🔧 Fitting scaler on first {split_idx:,} training samples...")
scaler.fit(X_train[:split_idx])

X_train_scaled = scaler.transform(X_train)
X_tr  = X_train_scaled[:split_idx]
X_val = X_train_scaled[split_idx:]
y_tr  = y_train[:split_idx]
y_val = y_train[split_idx:]

print(f"📊 Training set: {X_tr.shape}")
print(f"📊 Validation set: {X_val.shape}")



🔧 Fitting scaler on first 2,390 training samples...
📊 Training set: (2390, 16)
📊 Validation set: (598, 16)


In [None]:
# ========================================
# STEP 7: TRAIN LIGHTGBM_FINANCIAL (EXACT PARAMS) + CALIBRATION + REFIT
# ========================================
print("\n🚀 Training LightGBM_Financial with exact params...")

lgb_params = {
    'objective': 'binary',
    'boosting_type': 'gbdt',
    'num_leaves': 60,
    'max_depth': 6,
    'learning_rate': 0.1,
    'n_estimators': 300,
    'reg_alpha': 0.1,
    'reg_lambda': 0.1,
    'min_child_samples': 20,
    'subsample': 0.8,
    'colsample_bytree': 0.85,
    'subsample_freq': 2,
    'feature_fraction_bynode': 0.9,
    'extra_trees': True,
    'class_weight': 'balanced',
    'random_state': 42,
    'verbose': -1
}

# Optional GPU acceleration if on Colab
if IS_COLAB:
    lgb_params['device_type'] = 'gpu'
    lgb_params['gpu_device_id'] = 0
    print("✅ GPU acceleration enabled")

# Fit on train slice only (no early stopping)
model = lgb.LGBMClassifier(**lgb_params)
print("🔄 Training LightGBM (no early stopping - full n_estimators)...")
model.fit(X_tr, y_tr)

# Validate
val_pred = model.predict(X_val)
val_pred_proba = model.predict_proba(X_val)[:, 1]
val_acc = accuracy_score(y_val, val_pred)
val_auc = roc_auc_score(y_val, val_pred_proba)
print(f"\n✅ Validation Accuracy (t=0.50): {val_acc:.4f}")
print(f"✅ Validation AUC: {val_auc:.4f}")

# Threshold calibration on validation
thresholds = np.linspace(0.3, 0.7, 41)
best_thr = 0.5
best_val_acc = val_acc

for thr in thresholds:
    preds_thr = (val_pred_proba >= thr).astype(int)
    acc_thr = accuracy_score(y_val, preds_thr)
    if acc_thr > best_val_acc:
        best_val_acc = acc_thr
        best_thr = float(thr)

print(f"✅ Calibrated decision threshold on validation: {best_thr:.2f} (Acc={best_val_acc:.4f})")

# Refit on all in-sample (train + val)
X_full = X_train_scaled
y_full = y_train
model_full = lgb.LGBMClassifier(**lgb_params)
model_full.fit(X_full, y_full)



🚀 Training LightGBM_Financial with exact params...
✅ GPU acceleration enabled
🔄 Training LightGBM (no early stopping - full n_estimators)...

✅ Validation Accuracy (t=0.50): 0.4749
✅ Validation AUC: 0.5119
✅ Calibrated decision threshold on validation: 0.30 (Acc=0.5502)


In [None]:
# ========================================
# STEP 8: TEST + DETAILED DAY-BY-DAY / PRED-BY-PRED ANALYSIS
# ========================================
print(f"\n🧪 Testing on isolated {len(selected_days)}-day period...")

# Scale test data with the SAME scaler
X_test_scaled = scaler.transform(X_test)

# Predict using refit model
test_pred_proba = model_full.predict_proba(X_test_scaled)[:, 1]
test_pred = (test_pred_proba >= best_thr).astype(int)

# Metrics
test_acc = accuracy_score(y_test, test_pred)
test_auc = roc_auc_score(y_test, test_pred_proba)

print(f"\n🎯 TEST RESULTS:")
print(f"✅ Test Accuracy: {test_acc:.4f}")
print(f"✅ Test AUC: {test_auc:.4f}")
print(f"📊 Test predictions: {np.bincount(test_pred)}")
print(f"📊 Actual labels: {np.bincount(y_test)}")

# Build a detailed per-prediction table (DataFrame)
records = []
for i, info in enumerate(test_rows_info):
    ts = info['timestamp']
    fv = info['feature_vector']
    raw = info['raw_ohlcv']
    iso = info['iso_ohlc']
    true = info['future']
    proba = float(test_pred_proba[i])
    pred  = int(test_pred[i])
    correct = bool(pred == true)
    margin = proba - best_thr

    rec = {
        'candle_index_in_test': i + 1,
        'timestamp_utc': ts,
        'date_utc': ts.date(),
        'pred_prob_up': proba,
        'pred_label': int(pred),     # 1 = up, 0 = down (per your 'future' field)
        'true_label': int(true),
        'correct': correct,
        'threshold_used': best_thr,
        'decision_margin': margin,
        # Raw OHLCV & ISO for context
        'raw_o': raw[0], 'raw_h': raw[1], 'raw_l': raw[2], 'raw_c': raw[3], 'raw_v': raw[4],
        'iso_0': iso[0], 'iso_1': iso[1], 'iso_2': iso[2], 'iso_3': iso[3],
        # Engineered features re-extracted from the feature vector (positions per FEATURE_NAMES)
        'tf_1d': fv[FEATURE_NAMES.index('tf_1d')],
        'tf_4h': fv[FEATURE_NAMES.index('tf_4h')],
        'hl_range': fv[FEATURE_NAMES.index('hl_range')],
        'price_change': fv[FEATURE_NAMES.index('price_change')],
        'upper_shadow': fv[FEATURE_NAMES.index('upper_shadow')],
        'lower_shadow': fv[FEATURE_NAMES.index('lower_shadow')],
        'volume_m': fv[FEATURE_NAMES.index('volume_m')],
    }
    records.append(rec)

pred_df = pd.DataFrame.from_records(records).sort_values(['date_utc','timestamp_utc']).reset_index(drop=True)

# Save a machine-friendly CSV for your site
pred_csv_path = os.path.join(MODEL_DIR, 'test_predictions.csv')
pred_df.to_csv(pred_csv_path, index=False)

# Build a human-readable TXT report grouped by day
txt_lines = []
txt_lines.append("="*90)
txt_lines.append("LIGHTGBM 4H — DETAILED DAY-BY-DAY / PREDICTION-BY-PREDICTION REPORT")
txt_lines.append("="*90)
txt_lines.append(f"Test period: {test_start.date()} → {test_end.date()}")
txt_lines.append(f"Total test candles: {len(pred_df)}")
txt_lines.append(f"Calibrated threshold: {best_thr:.2f}")
txt_lines.append(f"Overall Test Accuracy: {test_acc:.4f}")
txt_lines.append(f"Overall Test AUC: {test_auc:.4f}")
txt_lines.append("")

for day in pred_df['date_utc'].unique():
    day_block = pred_df[pred_df['date_utc'] == day]
    correct_n = int(day_block['correct'].sum())
    total_n   = len(day_block)
    txt_lines.append("-"*90)
    txt_lines.append(f"{day}  —  Day accuracy: {correct_n}/{total_n}  ({correct_n/total_n:.3f})")
    txt_lines.append("-"*90)
    for _, r in day_block.iterrows():
        dir_word = "UP" if r['pred_label'] == 1 else "DOWN"
        truth_word = "UP" if r['true_label'] == 1 else "DOWN"
        right_wrong = "✅ CORRECT" if r['correct'] else "❌ WRONG"
        txt_lines.append(
            f"[{int(r['candle_index_in_test']):02d}] {r['timestamp_utc']}  "
            f"pred={dir_word}  p_up={r['pred_prob_up']:.4f}  thr={r['threshold_used']:.2f}  "
            f"margin={r['decision_margin']:.4f}  truth={truth_word}  → {right_wrong}"
        )
        # Include the contextual stats under each prediction
        txt_lines.append(
            f"    OHLCV: O={r['raw_o']:.4f}, H={r['raw_h']:.4f}, L={r['raw_l']:.4f}, C={r['raw_c']:.4f}, V={r['raw_v']:.0f} | "
            f"ISO: [{r['iso_0']:.4f}, {r['iso_1']:.4f}, {r['iso_2']:.4f}, {r['iso_3']:.4f}] | "
            f"feats: hl={r['hl_range']:.4f}, dC={r['price_change']:.4f}, upSh={r['upper_shadow']:.4f}, "
            f"loSh={r['lower_shadow']:.4f}, vol_m={r['volume_m']:.4f}"
        )
    txt_lines.append("")  # blank line between days

report_path = os.path.join(MODEL_DIR, 'lgbm_4h_day_by_day.txt')
with open(report_path, 'w') as f:
    f.write("\n".join(txt_lines))

print(f"\n📝 Saved detailed TXT report to: {report_path}")
print(f"🧾 Saved machine-readable predictions to: {pred_csv_path}")



🧪 Testing on isolated 35-day period...

🎯 TEST RESULTS:
✅ Test Accuracy: 0.6377
✅ Test AUC: 0.6474
📊 Test predictions: [ 6 63]
📊 Actual labels: [29 40]

📝 Saved detailed TXT report to: /content/drive/MyDrive/daygent_v1_models/lgbm_4h/lgbm_4h_day_by_day.txt
🧾 Saved machine-readable predictions to: /content/drive/MyDrive/daygent_v1_models/lgbm_4h/test_predictions.csv


In [None]:
# ========================================
# STEP 9: SAVE MODEL AND RESULTS
# ========================================
print("\n💾 Saving model and results...")

# Save model (the refit model) + scaler + threshold
model_path  = os.path.join(MODEL_DIR, 'lightgbm_financial_4h_only.joblib')
scaler_path = os.path.join(MODEL_DIR, 'scaler_4h_only.joblib')
joblib.dump(model_full, model_path)
joblib.dump(scaler, scaler_path)

def _to_py(v):
    try:
        if isinstance(v, (np.integer, np.int64, np.int32)):
            return int(v)
        if isinstance(v, (np.floating,)):
            return float(v)
        return v
    except Exception:
        return v

results = {
    'test_accuracy': float(test_acc),
    'test_auc': float(test_auc),
    'validation_accuracy': float(best_val_acc),
    'validation_auc': float(val_auc),
    'train_samples': int(len(X_tr)),
    'val_samples': int(len(X_val)),
    'test_samples': int(len(X_test)),
    'feature_count': int(X_train.shape[1]),
    'chosen_threshold': float(best_thr),
    'model_params': {k: _to_py(v) for k, v in lgb_params.items()},
    'feature_names': FEATURE_NAMES,
    'report_txt': os.path.basename(report_path),
    'predictions_csv': os.path.basename(pred_csv_path),
    'model_path': os.path.basename(model_path),
    'scaler_path': os.path.basename(scaler_path),
    'test_period': f"{test_start.date()} to {test_end.date()}"
}

import json
with open(os.path.join(MODEL_DIR, 'results_4h_only.json'), 'w') as f:
    json.dump(results, f, indent=2)

print(f"✅ Model saved to: {model_path}")
print(f"✅ Scaler saved to: {scaler_path}")
print("✅ Results JSON saved as: results_4h_only.json")



💾 Saving model and results...
✅ Model saved to: /content/drive/MyDrive/daygent_v1_models/lgbm_4h/lightgbm_financial_4h_only.joblib
✅ Scaler saved to: /content/drive/MyDrive/daygent_v1_models/lgbm_4h/scaler_4h_only.joblib
✅ Results JSON saved as: results_4h_only.json


In [None]:
# ========================================
# STEP 10: SAVE DEPLOYMENT ARTIFACTS (for your site)
# ========================================
import json
from textwrap import dedent

deployment_config = {
    "model_type": "LightGBMClassifier",
    "timeframe": "4h",
    "feature_contract_version": "v1",
    "feature_names": FEATURE_NAMES,
    "calibrated_threshold": float(best_thr),
    "artifact_paths": {
        "model_joblib": "lightgbm_financial_4h_only.joblib",
        "scaler_joblib": "scaler_4h_only.joblib"
    },
    "inference_notes": {
        "scaling": "StandardScaler fitted on first 80% of pre-test training data",
        "one_hot": {"tf_1d": 0, "tf_4h": 1},
        "expected_columns_in_csv": ["timestamp", "raw_ohlcv_vec", "iso_ohlc", "future"]
    },
    "lgbm_params": {k: _to_py(v) for k, v in lgb_params.items()}
}

config_path = os.path.join(MODEL_DIR, "deployment_config.json")
with open(config_path, "w") as f:
    json.dump(deployment_config, f, indent=2)

feature_schema = {
    "raw_ohlcv_vec": {
        "desc": "Stringified list of [open, high, low, close, volume]",
        "len": 5,
        "dtype": "float"
    },
    "iso_ohlc": {
        "desc": "Stringified list of 4 ISO-normalized OHLC values",
        "len": 4,
        "dtype": "float"
    },
    "engineered": [
        "hl_range=(H-L)/C",
        "price_change=(C-O)/O",
        "upper_shadow=(H-C)/C",
        "lower_shadow=(C-L)/C",
        "volume_m=V/1e6"
    ],
    "tf_one_hot": {"tf_1d": 0, "tf_4h": 1}
}

schema_path = os.path.join(MODEL_DIR, "feature_schema.json")
with open(schema_path, "w") as f:
    json.dump(feature_schema, f, indent=2)

readme_text = dedent(f"""
    ============================================
    LightGBM 4H Inference — Deployment Notes
    ============================================

    Artifacts:
    - Model:       {os.path.basename(model_path)}
    - Scaler:      {os.path.basename(scaler_path)}
    - Config:      {os.path.basename(config_path)}
    - Feature schema: feature_schema.json
    - Threshold:   {best_thr:.2f}
    - Predictions: test_predictions.csv
    - Report:      lgbm_4h_day_by_day.txt

    Feature order (must match EXACTLY):
    {FEATURE_NAMES}

    Inference pipeline for your site:
    1) Parse raw input row:
       - Parse 'raw_ohlcv_vec' -> [o,h,l,c,v]
       - Parse 'iso_ohlc'      -> [iso_0..iso_3]
       - Add one-hot: tf_1d=0, tf_4h=1
       - Compute engineered features as in feature_schema.json
       - Concatenate into a single 16-length vector in the listed order.

    2) Load scaler with joblib and call scaler.transform([vector]).
    3) Load model with joblib and call model.predict_proba(scaled)[0,1].
    4) If prob >= {best_thr:.2f} => predict UP (1); else DOWN (0).

    Notes:
    - This model was trained with class_weight='balanced'.
    - Scaler was fit on the first 80% of pre-test (4h) training data.
    - Keep feature order and scaling identical for consistent results.
""").strip()

readme_path = os.path.join(MODEL_DIR, "README_DEPLOY.txt")
with open(readme_path, "w") as f:
    f.write(readme_text)

print("📦 Deployment artifacts saved:")
print(" -", config_path)
print(" -", schema_path)
print(" -", readme_path)


📦 Deployment artifacts saved:
 - /content/drive/MyDrive/daygent_v1_models/lgbm_4h/deployment_config.json
 - /content/drive/MyDrive/daygent_v1_models/lgbm_4h/feature_schema.json
 - /content/drive/MyDrive/daygent_v1_models/lgbm_4h/README_DEPLOY.txt


In [None]:
# ========================================
# FINAL SUMMARY
# ========================================
print("\n" + "="*70)
print("🏆 LIGHTGBM_FINANCIAL 4H-ONLY — COMPLETE")
print("="*70)
print(f" • Model dir:    {MODEL_DIR}")
print(f" • Test window:  {test_start.date()} → {test_end.date()}")
print(f" • Test candles: {len(X_test)}")
print(f" • Test Acc/AUC: {test_acc:.4f} / {test_auc:.4f}")
print(f" • Threshold:    {best_thr:.2f}")
print(f" • Saved files:  lightgbm_financial_4h_only.joblib, scaler_4h_only.joblib,")
print(f"                 deployment_config.json, feature_schema.json, README_DEPLOY.txt,")
print(f"                 test_predictions.csv, lgbm_4h_day_by_day.txt, results_4h_only.json")
print("="*70)



🏆 LIGHTGBM_FINANCIAL 4H-ONLY — COMPLETE
 • Model dir:    /content/drive/MyDrive/daygent_v1_models/lgbm_4h
 • Test window:  2024-12-17 → 2025-02-07
 • Test candles: 69
 • Test Acc/AUC: 0.6377 / 0.6474
 • Threshold:    0.30
 • Saved files:  lightgbm_financial_4h_only.joblib, scaler_4h_only.joblib,
                 deployment_config.json, feature_schema.json, README_DEPLOY.txt,
                 test_predictions.csv, lgbm_4h_day_by_day.txt, results_4h_only.json
