# 🧠 NeuroNautilus AI Trader - Ultimate Pipeline

**Version:** 2.4 (Validation Fix Edition)
**Updated:** January 2026

---

## 🚀 Overview
This notebook is the **Golden Standard** for training NeuroNautilus.
It connects directly to your Google Drive data and pulls the latest "Clean Architecture" code.

### ✨ Highlights
- **Smart Path Detection:** Auto-finds data in Drive (no more "File Not Found").
- **Auto-Recovery:** Fixes "ModuleNotFoundError" automatically.
- **Clean Output:** Saves models/logs to `models/` and `logs/` directly.



## 1️⃣ Environment Setup


In [None]:
# @title 🔗 Connect to Google Drive (Running this is Mandatory!)
from google.colab import drive
from pathlib import Path
import os

# 1. Mount Drive
if not os.path.exists('/content/drive'):
    try:
        drive.mount('/content/drive')
        print("✅ Google Drive Mounted")
    except:
        print("⚠️ Failed to mount drive. Ignore if local.")
else:
    print("✅ Drive already mounted")

# 2. Define Workspace (Where Data Lives)
WORKSPACE = Path('/content/drive/MyDrive/NeuroTrader_Workspace')
DATA_DIR = WORKSPACE / 'data'
MODELS_DIR = WORKSPACE / 'models'
LOGS_DIR = WORKSPACE / 'logs'

# Ensure Dirs Exist (Silent if drive fail)
try:
    for d in [DATA_DIR, MODELS_DIR, LOGS_DIR]:
        d.mkdir(parents=True, exist_ok=True)
    print(f"\n📂 Workspace: {WORKSPACE}")
except:
    pass



In [None]:
# @title 🛠️ Install & Update Code (Auto-Fix)
# Force Install
!pip install -q stable-baselines3[extra] gymnasium pandas numpy ta
!pip install -q nautilus_trader

import sys
import os
import shutil

# --- AUTO-RECOVERY VS CLEANUP ---
REPO_PATH = '/content/NeuroTrader'

# If we are missing critical new structure, force re-clone
if os.path.exists(REPO_PATH):
    # Check if we have the new clean structure (e.g. no 'skills' folder)
    if os.path.exists(f'{REPO_PATH}/skills'): 
        print("⚠️ Legacy structure detected! Cleaning up...")
        shutil.rmtree(REPO_PATH)
        
    # Check if imports failed previously
    elif not os.path.exists(f'{REPO_PATH}/src/brain/data_discovery.py'):
        print("⚠️ Corrupt install detected! Re-cloning...")
        shutil.rmtree(REPO_PATH)

# Clone if missing
if not os.path.exists(REPO_PATH):
    print("⬇️ Cloning Repository...")
    !git clone https://github.com/MaDoHee33/NeuroTrader.git {REPO_PATH}
else:
    print("🔄 Updating Repository...")
    !cd {REPO_PATH} && git pull

# Add to Python Path
if REPO_PATH not in sys.path:
    sys.path.insert(0, REPO_PATH)

import warnings
warnings.filterwarnings('ignore')
print("✅ Code Update Complete!")



## 2️⃣ Smart Data Configuration


In [None]:
# @title 🛠️ Debug & Fix Import Paths (Run this if you see ModuleNotFoundError)
import sys
import os

# 1. Verify Repo Path
REPO_PATH = '/content/NeuroTrader'
if os.path.exists(REPO_PATH):
    print(f"✅ Repo found at: {REPO_PATH}")
    if REPO_PATH not in sys.path:
        sys.path.insert(0, REPO_PATH)
        print("✅ Added Repo to System Path")
else:
    print("❌ Repo NOT found! Please run the Setup cell above.")

# 2. Check Critical File
TARGET_FILE = f"{REPO_PATH}/src/brain/data_discovery.py"
if os.path.exists(TARGET_FILE):
    print(f"✅ Critical file found: {TARGET_FILE}")
else:
    print(f"❌ Custom module MISSING at: {TARGET_FILE}")
    print("   👉 NUCLEAR OPTION: Forcing Hard Reset...")
    
    # FORCE RESET (Fixes 'Already up to date' lie)
    !cd {REPO_PATH} && git fetch --all
    !cd {REPO_PATH} && git reset --hard origin/neuronautilus-v1
    !cd {REPO_PATH} && git pull
    
# 3. Test Import
try:
    from src.brain.data_discovery import auto_configure_training
    print("✅ Import SUCCESS! You can proceed.")
except ImportError as e:
    print(f"❌ Import FAILED: {e}")
    # Force fix for loose 'src' folders
    if os.path.exists('/content/src'):
        print("⚠️ Found loose 'src' folder in root. Adding /content to path...")
        sys.path.insert(0, '/content')



In [None]:
# @title 🤖 Auto-Discover Best Data
from src.brain.data_discovery import auto_configure_training
import pandas as pd
from datetime import datetime

try:
    # 1. Determine Catalog Path (Absolute for Colab)
    CATALOG_PATH = DATA_DIR / 'nautilus_catalog'
    print(f"📂 Reading Catalog from: {CATALOG_PATH}")
    
    # 2. Run Auto-Config
    config = auto_configure_training(
        catalog_path=str(CATALOG_PATH), 
        workspace=None # We pass full path above
    )
    
    # 3. Set Variables
    BAR_TYPE = config['bar_type']
    TRAIN_START = config['train_start']
    TRAIN_END = config['train_end']
    VAL_START = config['val_start']
    VAL_END = config['val_end']
    TEST_START = config['test_start']
    TEST_END = config['test_end']
    
    print(f"\n✅ Selected Bar Type: {BAR_TYPE}")
    print(f"📅 Training Range: {TRAIN_START} to {TRAIN_END}")

except Exception as e:
    print(f"⚠️ Error: {e}")
    print("👉 Falling back to MANUAL configuration...")
    # Fallback default
    BAR_TYPE = "XAUUSD.SIM-15-MINUTE-LAST-EXTERNAL"
    
    # Define Default Dates (FIXED: Added default dates to prevent NameError)
    TRAIN_START = pd.to_datetime("2020-01-01")
    TRAIN_END = pd.to_datetime("2023-01-01")
    VAL_START = pd.to_datetime("2023-01-01")
    VAL_END = pd.to_datetime("2023-06-01")
    TEST_START = pd.to_datetime("2023-06-01")
    TEST_END = pd.to_datetime("2024-01-01")
     
    print(f"✅ Selected (Fallback): {BAR_TYPE}")
    print(f"📅 Manual Training Range: {TRAIN_START.date()} to {TRAIN_END.date()}")



## 3️⃣ Model Training


In [None]:
# @title 🧠 Train PPO Model (10M Steps)
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import CheckpointCallback
from src.brain.env.trading_env import TradingEnv
from src.brain.train import load_data, add_features
import time
import pandas as pd

# --- CONFIG ---
TOTAL_TIMESTEPS = 5_000_000
MODEL_NAME = 'ppo_neurotrader_v2'
# --------------

print(f"🚀 Starting Training: {MODEL_NAME}")

# 1. Load Data
df = load_data(str(DATA_DIR / 'nautilus_catalog'), BAR_TYPE)

# FIX: Ensure timestamps are index for slicing
if 'timestamp' in df.columns:
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df = df.set_index('timestamp')

df = add_features(df)
train_df = df[(df.index >= TRAIN_START) & (df.index <= TRAIN_END)]
print(f"📊 Training Data: {len(train_df):,} bars")

# 2. Setup Env
env = TradingEnv(train_df)
vec_env = DummyVecEnv([lambda: env])

# 3. Setup Model
model = PPO(
    "MlpPolicy",
    vec_env,
    learning_rate=3e-4,
    n_steps=2048,
    batch_size=64,
    n_epochs=10,
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.2,
    ent_coef=0.01,
    verbose=0,
    tensorboard_log=str(LOGS_DIR)
)

# 4. Train with Auto-Cleanup
checkpoint_callback = CheckpointCallback(
    save_freq=1_000_000,
    save_path=str(MODELS_DIR / 'checkpoints'),
    name_prefix='ppo_v2',
    verbose=1
)

start_time = time.time()
try:
    model.learn(total_timesteps=TOTAL_TIMESTEPS, callback=checkpoint_callback, progress_bar=True)
    
    # Save Final
    final_path = MODELS_DIR / f"{MODEL_NAME}.zip"
    model.save(str(final_path))
    print(f"\n💾 Model Saved: {final_path}")
    
    # Cleanup
    print("🧹 Cleaning up old checkpoints...")
    for f in (MODELS_DIR / 'checkpoints').glob('ppo_v2_*.zip'):
        f.unlink()
    print("✨ Cleanup Done")

except KeyboardInterrupt:
    print("⚠️ Interrupted! Saving...")
    model.save(str(MODELS_DIR / f"{MODEL_NAME}_stopped.zip"))



## 4️⃣ Validation & Test


In [None]:
# @title 📉 Smart Backtest & Validation
from src.brain.model_discovery import find_best_model
from src.neuro_nautilus.runner import simple_backtest, analyze_results
import matplotlib.pyplot as plt
import pandas as pd

# 1. Get Model
best_model = find_best_model(workspace=WORKSPACE)
if not best_model:
    print("⚠️ No model found, using most recent file in models dir")
    # Fallback logic if needed
else:
    print(f"🏆 Best Model: {best_model.name}")

    # 2. Validation Run
    print(f"\n📊 Validating ({VAL_START} to {VAL_END})...")
    res = simple_backtest(
        data_path=str(DATA_DIR / 'nautilus_catalog'),
        model_path=str(best_model),
        bar_type=BAR_TYPE,
        start_date=VAL_START,
        end_date=VAL_END
    )
    
    m = analyze_results(res)
    print(f"   Sharpe: {m['sharpe_ratio']:.2f} | Return: {m['total_return']:.2%}")
    
    # 3. Test Run
    print(f"\n🧪 Testing ({TEST_START} to {TEST_END})...")
    test_res = simple_backtest(
        data_path=str(DATA_DIR / 'nautilus_catalog'),
        model_path=str(best_model),
        bar_type=BAR_TYPE,
        start_date=TEST_START,
        end_date=TEST_END
    )
    
    tm = analyze_results(test_res)
    print(f"   Sharpe: {tm['sharpe_ratio']:.2f} | Return: {tm['total_return']:.2%}")
    
    # 4. Plot Test
    if 'equity_curve' in test_res:
        plt.figure(figsize=(10,4))
        plt.plot(pd.to_datetime(test_res['equity_curve']['timestamp']), test_res['equity_curve']['balance'])
        plt.title(f"Test Results (Sharpe: {tm['sharpe_ratio']:.2f})")
        plt.grid(True, alpha=0.3)
        plt.show()

    # Gate
    if tm['sharpe_ratio'] > 0.5:
        print("\n✅ PASSED: Ready for Paper Trading")
    else:
        print("\n❌ FAILED: Needs Retraining")

