# Algorithmic Trading Pipeline
# Complete Workflow: Data → Model → Strategy → Backtest → Analysis

This notebook demonstrates the full pipeline for:
1. Loading/creating data catalog
2. Model hyperparameter optimization
3. Strategy hyperparameter optimization
4. Final backtest execution
5. Performance analysis and visualization

---
## Stage 0: Setup & Configuration
Load dependencies and configuration

In [None]:
# Use to debugging
%load_ext autoreload
%autoreload 2

# Core imports
from pathlib import Path
import yaml
import logging
import pandas as pd
import mlflow
from mlflow import MlflowClient
import tqdm as notebook_tqdm
from datetime import datetime
from IPython.display import HTML, display

# Nautilus Trader
from nautilus_trader.model.objects import Currency
from nautilus_trader.core.nautilus_pyo3 import CurrencyType
from nautilus_trader.persistence.catalog import ParquetDataCatalog
from nautilus_trader.model.data import TradeTick

# Project modules
from engine.logging_config import setup_logging_and_mlflow
from engine.ModelGenerator import ModelGenerator
from engine.databento_loader import DatabentoTickLoader
from engine.hparam_tuner import OptunaHparamsTuner


In [None]:
# Generate Optimization ID for the whole run
optid = f"opt_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

In [None]:
# Load Sapiens config
sapiens_config_path = Path("configs/sapiens_config.yaml")
sapiens_config = yaml.safe_load(sapiens_config_path.read_text(encoding="utf-8"))


# Setup logging and logs
logs_dir, runs_dir = setup_logging_and_mlflow(
    log_dir=sapiens_config["logs_dir"],
    run_dir=sapiens_config["runs_dir"],
    level=sapiens_config["logs_level"],
    optimization_id=optid
)
logger = logging.getLogger('run_backtest')
client = MlflowClient()


In [None]:
# Model generation or Setup
model_name = sapiens_config["SAPIENS_MODEL"]['model_name']
# Generate new model via DeepCode if necessary
if sapiens_config["SAPIENS_MODEL"]['generate_model']:
    logger.info("="*70)
    logger.info("MODEL GENERATION VIA DEEPCODE")
    logger.info("="*70)
    

    
    gen_cfg = sapiens_config["SAPIENS_MODEL"]['generation']
    generator = ModelGenerator(gen_cfg)
    
    
    model_dir = generator.generate_model(
        source_type=gen_cfg['source_type'],
        source_path=gen_cfg['source_path'],
        model_name=model_name,
    )
    
    print(f"Model generated: {model_dir}")
    print("Review the generated code before continuing!")

# Model Config
model_config_path = Path(f"models/{model_name}/model_config.yaml")
model_config = yaml.safe_load(model_config_path.read_text(encoding="utf-8"))["MODEL"]


In [None]:
# Load strategy and model config for initial setup
strategy_name = sapiens_config["SAPIENS_STRATEGY"]['strategy_name']
strategy_config_path = Path(f"strategies/{strategy_name}/strategy_config.yaml")
strategy_config = yaml.safe_load(strategy_config_path.read_text(encoding="utf-8"))["STRATEGY"]


In [None]:
print("Configuration loaded successfully")
print(f"Model: {model_config['PARAMS']['model_name']}")
print(f"Strategy: {strategy_config['PARAMS']['strategy_name']}")
print(f"Backtest period: {sapiens_config['backtest_start']} to {sapiens_config['backtest_end']}")

---
## Stage 1: Data Catalog and HyperParameter Tuner setup
Load or create Nautilus Trader data catalog from Databento tick data

In [None]:
# Configuration
FORCE_RELOAD_CATALOG = False  # Set to True to rebuild catalog
CATALOG_PATH = None  # Set custom path or None for default

# Initialize loader
logger.info("Initializing Databento loader...")
loader = DatabentoTickLoader(
    cfg=strategy_config["PARAMS"],
    venue_name=strategy_config["PARAMS"]["venue_name"],
    data_dir=sapiens_config["data_dir"],
    #universe_subset = ["TSLA"], # NVIDA undone
)

# Determine catalog path
catalog_path = Path(CATALOG_PATH) if CATALOG_PATH else loader.catalog_path

# Load or create catalog
if not FORCE_RELOAD_CATALOG and loader.catalog_exists(catalog_path):
    logger.info(f" Reusing existing catalog at: {catalog_path}")
    catalog = ParquetDataCatalog(path=str(catalog_path))
else:
    logger.info(f"Loading Databento ticks to catalog at: {catalog_path}")
    if FORCE_RELOAD_CATALOG:
        logger.info("Force reload enabled - rebuilding catalog")
    
    # Load with progress bar and memory management
    catalog = loader.load_to_catalog(
        catalog_path=catalog_path,
    )

# Add catalog path to config
strategy_config["PARAMS"]["catalog_path"] = str(catalog_path)

# Verify catalog
#instruments = catalog.instruments(instrument_type=TradeTick)  # takes too long on laptop. Use loader class instruments property instead
instruments = set(inst.id.value for inst in catalog.instruments())
print(f"\nCatalog ready: {catalog.list_data_types()} data loaded")
print(f"Universe: {[str(symbol) for symbol in instruments]}")


In [None]:
# Initialize hyperparameter tuner
tuner = OptunaHparamsTuner(
    sapiens_config=sapiens_config,
    catalog=catalog,
    model_config=model_config,
    strategy_config=strategy_config,
    run_dir=runs_dir,
    log_dir=logs_dir,
    optimization_id=optid,
)

print("Hyper-parameter tuner initialized")
if sapiens_config['SAPIENS_MODEL']['optimization']['tune_hparams']:
    print(f"Model trials: {sapiens_config['SAPIENS_MODEL']['optimization']['n_trials'] }")
else:
    print(f"Model trials: DISABLED")

if sapiens_config['SAPIENS_STRATEGY']['optimization']['tune_hparams']:
    print(f"Strategy trials: {sapiens_config['SAPIENS_STRATEGY']['optimization']['n_trials']}")
else:
    print(f"Strategy trials: DISABLED")

---
## Stage 2: Model Hyperparameter Optimization
Optimize model hyperparameters using Optuna

In [None]:
# Run model hyperparameter optimization
logger.info("\n" + "="*70)
logger.info("STAGE 2: MODEL HYPERPARAMETER OPTIMIZATION")
logger.info("="*70 + "\n")

model_results = tuner.optimize_model()

print("\nModel optimization complete!")
print(f"Best model path: {model_results['model_path']}")
print(f"MLflow run ID: {model_results['mlflow_run_id']}")
model_results['param_importance_fig'].show()

---
## Stage 3: Strategy Hyperparameter Optimization
Optimize strategy hyperparameters using best model from Stage 2

In [None]:
# Use to debugging
%load_ext autoreload
%autoreload 2


# Run strategy hyperparameter optimization
logger.info("="*70)
logger.info("STAGE 3: STRATEGY HYPERPARAMETER OPTIMIZATION")
logger.info("="*70 + "\n")

model_name = model_config['PARAMS']['model_name']
strategy_results = tuner.optimize_strategy(model_name=model_name)
strategy_results['param_importance_fig'].show()

print("\nStrategy optimization complete!")
print(f"Best hyperparameters: {strategy_results['hparams']}")
print(f"\nBest metrics:")
for metric, value in strategy_results['metrics'].items():
    print(f"  {metric}: {value:.4f}")
print(f"\nMLflow run ID: {strategy_results['mlflow_run_id']}")
print(f"\nInspect results at: {strategy_results["results_path"]}")

---
## Stage 4: Final Backtest
Run final backtest on full period with optimized hyperparameters

In [None]:
"""
# Get optimization context
run = client.get_run(strategy_results["mlflow_run_id"])
optimization_id = run.data.tags.get("optimization_id", "")

# Define backtest period
backtest_start = sapiens_cfg["backtest_start"]
backtest_end = sapiens_cfg["backtest_end"]

print(f"Running final backtest: {backtest_start} to {backtest_end}")
"""

In [None]:
"""
# Execute final backtest
logger.info("\n" + "="*70)
logger.info("STAGE 4: FINAL BACKTEST")
logger.info("="*70 + "\n")

results_path, final_metrics = tuner.run_final_backtest(
    backtest_start=backtest_start,
    backtest_end=backtest_end,
    strategy_hpo_run_id=strategy_results["mlflow_run_id"],
    optimization_id=optimization_id
)

print("\nFinal backtest complete!")
print("\nFinal Performance Metrics:")
print("="*50)
for metric, value in sorted(final_metrics.items()):
    print(f"{metric:.<40} {value:>10.4f}")
"""    

---
## Stage 5: Performance Analysis
Detailed analysis and visualization of backtest results

In [None]:
# Load results from MLflow

# Get the most recent backtest run
#exp = client.get_experiment_by_name("Backtests")
run = client.get_run(strategy_results['mlflow_run_id'])
#results_path = strategy_results['results_path']
run_id = run.info.run_id

results_html = client.download_artifacts(run_id=run_id,path="results.html")
display(HTML(results_html))

---
## Comparison Matrix: All Models × Strategies

In [None]:
# Generate HPO results matrix
hpo_matrix = tuner.get_strategy_hpo_matrix(metric="total_pnl_pct")
print("\nStrategy HPO Results Matrix (total_pnl_pct):")
print(hpo_matrix)

# Generate final backtest results matrix
backtest_matrix = tuner.get_final_backtest_matrix(metric="sharpe_ratio")
print("\nFinal Backtest Results Matrix (sharpe_ratio):")
print(backtest_matrix)

---
## Pipeline Complete 

**Next Steps:**
- Review MLflow UI: `mlflow ui --backend-store-uri file:./logs/mlflow --host 0.0.0.0 --port 5000`
- Explore experiment tracking and compare runs
- Adjust general settings in `configs/sapiens_config.yaml` and rerun
- Export results for production deployment