In [1]:
"""
Complete pipeline orchestrator
Runs all steps in sequence: prepare -> select -> tune -> train
"""

import time
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

from config import OUTPUT_DIR
import prepare_data
import feature_selection
import hyperparameter_tuning
import train_final_model


def print_header(title):
    """Print a formatted header."""
    print("\n" + "="*70)
    print(f"  {title}")
    print("="*70)


def print_step(step_num, title):
    """Print a step header."""
    print("\n" + "-"*70)
    print(f"STEP {step_num}: {title}")
    print("-"*70)


def main():
    """Run the complete XGBoost pipeline."""
    start_time = time.time()
    
    print_header("XGBOOST COMPLETE PIPELINE")
    print(f"Output Directory: {OUTPUT_DIR}")
    print(f"Start Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    # ========================================================================
    # STEP 1: DATA PREPARATION
    # ========================================================================
    print_step(1, "DATA PREPARATION")
    step_start = time.time()
    
    prepare_data.main()
    
    step_time = time.time() - step_start
    print(f"\n✓ Step 1 completed in {step_time/60:.1f} minutes")
    
    # ========================================================================
    # STEP 2: FEATURE SELECTION
    # ========================================================================
    print_step(2, "FEATURE SELECTION")
    step_start = time.time()
    
    feature_selection.main()
    
    step_time = time.time() - step_start
    print(f"\n✓ Step 2 completed in {step_time/60:.1f} minutes")
    
    # ========================================================================
    # STEP 3: HYPERPARAMETER TUNING
    # ========================================================================
    print_step(3, "HYPERPARAMETER TUNING")
    step_start = time.time()
    
    hyperparameter_tuning.main()
    
    step_time = time.time() - step_start
    print(f"\n✓ Step 3 completed in {step_time/60:.1f} minutes")
    
    # ========================================================================
    # STEP 4: FINAL MODEL TRAINING
    # ========================================================================
    print_step(4, "FINAL MODEL TRAINING")
    step_start = time.time()
    
    train_final_model.main()
    
    step_time = time.time() - step_start
    print(f"\n✓ Step 4 completed in {step_time/60:.1f} minutes")
    
    # ========================================================================
    # PIPELINE COMPLETE
    # ========================================================================
    total_time = time.time() - start_time
    
    print_header("PIPELINE COMPLETE!")
    print(f"Total Runtime: {total_time/60:.1f} minutes ({total_time/3600:.2f} hours)")
    print(f"End Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"\nAll outputs saved to: {OUTPUT_DIR}")
    print("\nGenerated files:")
    print("  - processed_features.pkl (prepared data)")
    print("  - selected_features.pkl (feature list)")
    print("  - scaler.pkl (normalization)")
    print("  - xgb_best_model.json (trained model)")
    print("  - xgb_training_history.json (full history)")
    print("  - xgb_test_predictions.csv (predictions for agent)")
    print("  - feature_importance.csv (importance scores)")
    print("\n" + "="*70)


if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'config'