In [None]:
"""
Test LightningAutoML with Time Optimization Features
=====================================================
Comprehensive testing with automatic time budget calculation
"""

import numpy as np
import pandas as pd
from sklearn.datasets import (
    load_iris, load_wine, load_breast_cancer,  # Classification
    load_diabetes, fetch_california_housing,    # Regression
    make_blobs, make_moons                      # Clustering
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, silhouette_score
import time
import warnings
warnings.filterwarnings('ignore')

# Import your AutoML module
from new import LightningAutoML, calculate_optimal_time_budget, create_automl_with_auto_budget


def test_classification_with_time_modes():
    """Test classification with different time optimization modes"""
    print("\n" + "="*80)
    print("🧪 TEST 1: Classification with Time Optimization Modes")
    print("="*80)
    
    # Load dataset
    X, y = load_breast_cancer(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    modes = ['basic', 'medium', 'full']
    results = {}
    
    for mode in modes:
        print(f"\n{'='*80}")
        print(f"Testing Mode: {mode.upper()}")
        print(f"{'='*80}")
        
        start_time = time.time()
        
        # Create AutoML with automatic time budget
        automl = create_automl_with_auto_budget(
            X_train=X_train,
            X_test=X_test,
            y_train=y_train,
            y_test=y_test,
            ml_type='supervised',
            method='classification',
            mode=mode
        )
        
        # Fit models
        automl.fit()
        
        total_time = time.time() - start_time
        
        # Store results
        results[mode] = {
            'best_model': automl.best_model.model_name,
            'best_score': automl.best_model.score,
            'total_time': total_time,
            'models_trained': len(automl.results)
        }
        
        print(f"\n✅ {mode.upper()} Mode Complete:")
        print(f"   Best Model: {results[mode]['best_model']}")
        print(f"   Best Score: {results[mode]['best_score']:.4f}")
        print(f"   Total Time: {results[mode]['total_time']:.2f}s")
    
    # Compare modes
    print(f"\n{'='*80}")
    print("📊 Mode Comparison Summary")
    print(f"{'='*80}")
    comparison_df = pd.DataFrame(results).T
    print(comparison_df.to_string())
    print()


def test_regression_with_custom_budget():
    """Test regression with custom time budget"""
    print("\n" + "="*80)
    print("🧪 TEST 2: Regression with Custom Time Budget")
    print("="*80)
    
    # Load dataset
    X, y = load_diabetes(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Calculate optimal budget
    time_budget, n_trials = calculate_optimal_time_budget(X_train, mode='medium')
    
    print(f"\n📊 Dataset Info:")
    print(f"   Training samples: {X_train.shape[0]}")
    print(f"   Features: {X_train.shape[1]}")
    print(f"   Calculated time budget: {time_budget}s")
    print(f"   Trials per model: {n_trials}")
    
    # Create AutoML with custom settings
    automl = LightningAutoML(
        ml_type='supervised',
        method='regression',
        X_train=X_train,
        X_test=X_test,
        y_train=y_train,
        y_test=y_test,
        time_budget=time_budget,
        n_trials=n_trials,
        cv_folds=3,
        verbose=True
    )
    
    # Fit and evaluate
    start_time = time.time()
    automl.fit()
    total_time = time.time() - start_time
    
    # Print results
    automl.print_summary()
    
    print(f"\n⏱️  Total Execution Time: {total_time:.2f}s")
    print(f"   Budget Used: {(total_time/time_budget)*100:.1f}%")
    
    # Make predictions
    y_pred = automl.predict(X_test)
    test_r2 = r2_score(y_test, y_pred)
    
    print(f"\n📈 Test Set Performance:")
    print(f"   R² Score: {test_r2:.4f}")
    print(f"   Best Model: {automl.best_model.model_name}")


def test_clustering_fast_mode():
    """Test clustering with fast optimization"""
    print("\n" + "="*80)
    print("🧪 TEST 3: Clustering with Fast Optimization")
    print("="*80)
    
    # Generate synthetic data
    X, _ = make_blobs(n_samples=500, centers=4, n_features=5, random_state=42)
    X_train, X_test = train_test_split(X, test_size=0.2, random_state=42)
    
    # Use basic mode for quick testing
    time_budget, n_trials = calculate_optimal_time_budget(X_train, mode='basic')
    
    print(f"\n⚡ Fast Mode Configuration:")
    print(f"   Time Budget: {time_budget}s")
    print(f"   Trials: {n_trials}")
    
    automl = LightningAutoML(
        ml_type='unsupervised',
        method='clustering',
        X_train=X_train,
        X_test=X_test,
        time_budget=time_budget,
        n_trials=n_trials,
        verbose=True
    )
    
    # Fit models
    start_time = time.time()
    automl.fit()
    total_time = time.time() - start_time
    
    # Results
    automl.print_summary()
    
    print(f"\n⏱️  Completed in: {total_time:.2f}s")
    
    # Cluster predictions
    labels = automl.predict(X_test)
    print(f"\n🎯 Clustering Results:")
    print(f"   Unique clusters found: {len(set(labels))}")
    print(f"   Cluster distribution: {np.bincount(labels)}")


def test_large_dataset_optimization():
    """Test time optimization with larger dataset"""
    print("\n" + "="*80)
    print("🧪 TEST 4: Large Dataset Time Optimization")
    print("="*80)
    
    # Load larger dataset
    try:
        from sklearn.datasets import fetch_california_housing
        X, y = fetch_california_housing(return_X_y=True)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        
        print(f"\n📊 Large Dataset:")
        print(f"   Training samples: {X_train.shape[0]}")
        print(f"   Features: {X_train.shape[1]}")
        print(f"   Total data points: {X_train.shape[0] * X_train.shape[1]:,}")
        
        # Test all modes
        for mode in ['basic', 'medium']:
            print(f"\n{'='*80}")
            print(f"Testing {mode.upper()} mode on large dataset")
            print(f"{'='*80}")
            
            time_budget, n_trials = calculate_optimal_time_budget(X_train, mode=mode)
            
            print(f"⚙️  Configuration:")
            print(f"   Time Budget: {time_budget}s")
            print(f"   Trials: {n_trials}")
            
            automl = LightningAutoML(
                ml_type='supervised',
                method='regression',
                X_train=X_train,
                X_test=X_test,
                y_train=y_train,
                y_test=y_test,
                time_budget=time_budget,
                n_trials=n_trials,
                cv_folds=3,
                verbose=True
            )
            
            start_time = time.time()
            automl.fit()
            total_time = time.time() - start_time
            
            print(f"\n✅ {mode.upper()} Mode Results:")
            print(f"   Best Score: {automl.best_model.score:.4f}")
            print(f"   Time Taken: {total_time:.2f}s")
            print(f"   Models Tried: {len(automl.results)}")
            
    except Exception as e:
        print(f"⚠️  Skipping large dataset test: {e}")


def test_model_persistence():
    """Test saving and loading models"""
    print("\n" + "="*80)
    print("🧪 TEST 5: Model Persistence")
    print("="*80)
    
    # Train a quick model
    X, y = load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Quick training
    automl = LightningAutoML(
        ml_type='supervised',
        method='classification',
        X_train=X_train,
        X_test=X_test,
        y_train=y_train,
        y_test=y_test,
        time_budget=20,
        n_trials=5,
        verbose=False
    )
    
    print("🔧 Training model...")
    automl.fit()
    
    # Save models
    print("\n💾 Saving models...")
    automl.save_best_model("test_models/iris_best.pkl")
    automl.save_all_models("test_models/iris_all/")
    automl.export_results("test_models/iris_results.json")
    
    # Load and test
    print("\n📂 Loading model...")
    automl_loaded = LightningAutoML(
        ml_type='supervised',
        method='classification',
        X_train=X_train,
        X_test=X_test,
        y_train=y_train,
        y_test=y_test,
        verbose=False
    )
    
    automl_loaded.load_model("test_models/iris_best.pkl")
    
    # Verify
    original_pred = automl.predict(X_test)
    loaded_pred = automl_loaded.predict(X_test)
    
    print(f"\n✅ Verification:")
    print(f"   Original accuracy: {accuracy_score(y_test, original_pred):.4f}")
    print(f"   Loaded accuracy: {accuracy_score(y_test, loaded_pred):.4f}")
    print(f"   Predictions match: {np.array_equal(original_pred, loaded_pred)}")


def test_different_datasets():
    """Test with multiple datasets and compare"""
    print("\n" + "="*80)
    print("🧪 TEST 6: Multiple Datasets Comparison")
    print("="*80)
    
    datasets = {
        'Iris': load_iris(),
        'Wine': load_wine(),
        'Breast Cancer': load_breast_cancer()
    }
    
    results_summary = []
    
    for dataset_name, dataset in datasets.items():
        print(f"\n{'='*80}")
        print(f"Testing: {dataset_name}")
        print(f"{'='*80}")
        
        X, y = dataset.data, dataset.target
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42
        )
        
        # Use basic mode for quick testing
        automl = create_automl_with_auto_budget(
            X_train=X_train,
            X_test=X_test,
            y_train=y_train,
            y_test=y_test,
            ml_type='supervised',
            method='classification',
            mode='basic'
        )
        
        start_time = time.time()
        automl.fit()
        total_time = time.time() - start_time
        
        results_summary.append({
            'Dataset': dataset_name,
            'Samples': X_train.shape[0],
            'Features': X_train.shape[1],
            'Best Model': automl.best_model.model_name,
            'Accuracy': automl.best_model.score,
            'Time (s)': total_time
        })
    
    # Display comparison
    print(f"\n{'='*80}")
    print("📊 Dataset Comparison Summary")
    print(f"{'='*80}")
    comparison_df = pd.DataFrame(results_summary)
    print(comparison_df.to_string(index=False))
    print()


def run_all_tests():
    """Run all test cases"""
    print("\n" + "="*80)
    print("⚡ LIGHTNING AutoML - Comprehensive Testing Suite")
    print("="*80)
    
    total_start = time.time()
    
    try:
        # Run tests
        test_classification_with_time_modes()
        test_regression_with_custom_budget()
        test_clustering_fast_mode()
        test_large_dataset_optimization()
        test_model_persistence()
        test_different_datasets()
        
        total_time = time.time() - total_start
        
        print("\n" + "="*80)
        print("✅ ALL TESTS COMPLETED SUCCESSFULLY!")
        print("="*80)
        print(f"Total Testing Time: {total_time:.2f}s")
        print("="*80 + "\n")
        
    except Exception as e:
        print(f"\n❌ Test failed with error: {e}")
        import traceback
        traceback.print_exc()


In [None]:
run_all_tests()