In [None]:
# Wine Quality Model Training
# This notebook trains multiple models and logs them to MLflow

import sys
sys.path.append('..')

import mlflow
from src.data_loader import load_wine_data, prepare_classification_data, split_data
from src.model_trainer import (
    train_random_forest, 
    train_xgboost, 
    train_lightgbm, 
    train_catboost
)

# Set MLflow experiment
mlflow.set_experiment("wine_quality_comparison")

print("üç∑ Wine Quality Model Training Pipeline")
print("=" * 60)

# Load and prepare data
print("\n1Ô∏è‚É£ Loading and preparing data...")
data = load_wine_data()
data = prepare_classification_data(data, quality_threshold=7)
X_train, X_val, X_test, y_train, y_val, y_test = split_data(data)
print(f"‚úÖ Data ready: {len(X_train)} train, {len(X_val)} val, {len(X_test)} test")

# Train models
print("\n2Ô∏è‚É£ Training models and logging to MLflow...")
print("-" * 60)

# Model 1: Random Forest
print("\nüå≤ Training Random Forest...")
rf_model, rf_metrics = train_random_forest(X_train, y_train, X_test, y_test)

# Model 2: XGBoost
print("\nüöÄ Training XGBoost...")
xgb_model, xgb_metrics = train_xgboost(X_train, y_train, X_test, y_test)

# Model 3: LightGBM
print("\nüí° Training LightGBM...")
lgb_model, lgb_metrics = train_lightgbm(X_train, y_train, X_test, y_test)

# Model 4: CatBoost
print("\nüê± Training CatBoost...")
cat_model, cat_metrics = train_catboost(X_train, y_train, X_test, y_test)

print("\n" + "=" * 60)
print("‚úÖ All models trained successfully!")
print("\nüìä Quick Comparison:")
print(f"   Random Forest: AUC = {rf_metrics['auc']:.4f}")
print(f"   XGBoost:       AUC = {xgb_metrics['auc']:.4f}")
print(f"   LightGBM:      AUC = {lgb_metrics['auc']:.4f}")
print(f"   CatBoost:      AUC = {cat_metrics['auc']:.4f}")

print("\nüéØ Next steps:")
print("   1. Open MLflow UI: mlflow ui --port=5001")
print("   2. View experiments at: http://localhost:5001")
print("   3. Run 03_model_comparison.ipynb for detailed comparison")