In [2]:
"""
Lightning AutoML - Complete Usage Examples
Using sklearn built-in datasets
"""

import numpy as np
import pandas as pd
from sklearn.datasets import (
    make_classification, 
    make_regression, 
    make_blobs,
    load_iris,
    load_diabetes,
    load_wine
)
from sklearn.model_selection import train_test_split

# Import Lightning AutoML
from automl import LightningAutoML, auto_classification, auto_regression, auto_clustering  

In [3]:
# =============================================================================
# EXAMPLE 1: Classification with Iris Dataset
# =============================================================================
print("="*70)
print("EXAMPLE 1: Classification - Iris Dataset")
print("="*70)

# Load iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Create AutoML instance
automl_cls = LightningAutoML(
    ml_type="supervised",
    method="classification",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    time_budget=180,  # 3 minutes
    n_trials=30,      # 30 trials per model
    verbose=True
)

# Train models
automl_cls.fit()

# Make predictions
predictions = automl_cls.predict()
print(f"\nSample predictions: {predictions[:10]}")

# Evaluate on test set
results = automl_cls.evaluate()

# View leaderboard
print("\nModel Leaderboard:")
print(automl_cls.get_leaderboard())

# Get best model parameters
params = automl_cls.get_model_params()
print(f"\nBest Model: {params['model_name']}")
print(f"Hyperparameters: {params['hyperparameters']}")

# Save model
automl_cls.save_model("iris_model")

# Clear GPU cache
automl_cls.clear_gpu_cache()


# =============================================================================
# EXAMPLE 2: Regression with Diabetes Dataset
# =============================================================================
print("\n" + "="*70)
print("EXAMPLE 2: Regression - Diabetes Dataset")
print("="*70)

# Load diabetes dataset
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Create AutoML instance
automl_reg = LightningAutoML(
    ml_type="supervised",
    method="regression",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    time_budget=180,
    n_trials=30,
    verbose=True
)

# Train models
automl_reg.fit()

# Make predictions
predictions = automl_reg.predict()
print(f"\nSample predictions: {predictions[:10]}")
print(f"Actual values: {y_test[:10]}")

# Evaluate
results = automl_reg.evaluate()

# View leaderboard
print("\nModel Leaderboard:")
print(automl_reg.get_leaderboard())

# Save model
automl_reg.save_model("diabetes_model")
automl_reg.clear_gpu_cache()


# =============================================================================
# EXAMPLE 3: Clustering with Synthetic Data
# =============================================================================
print("\n" + "="*70)
print("EXAMPLE 3: Clustering - Synthetic Blob Data")
print("="*70)

# Generate blob data
X, true_labels = make_blobs(
    n_samples=500,
    n_features=10,
    centers=4,
    random_state=42
)

# Split data (no y needed for clustering)
X_train, X_test = train_test_split(X, test_size=0.3, random_state=42)

# Create AutoML instance
automl_cluster = LightningAutoML(
    ml_type="unsupervised",
    method="cluster",
    X_train=X_train,
    X_test=X_test,
    time_budget=120,
    n_trials=20,
    verbose=True
)

# Train models
automl_cluster.fit()

# Make predictions (cluster assignments)
cluster_labels = automl_cluster.predict()
print(f"\nCluster assignments: {cluster_labels[:20]}")
print(f"Unique clusters: {np.unique(cluster_labels)}")

# Evaluate
results = automl_cluster.evaluate()

# View leaderboard
print("\nModel Leaderboard:")
print(automl_cluster.get_leaderboard())

# Save model
automl_cluster.save_model("clustering_model")
automl_cluster.clear_gpu_cache()


# =============================================================================
# EXAMPLE 4: Quick Classification with Wine Dataset (convenience function)
# =============================================================================
print("\n" + "="*70)
print("EXAMPLE 4: Quick Classification - Wine Dataset")
print("="*70)

# Load wine dataset
wine = load_wine()
X, y = wine.data, wine.target

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Use convenience function
automl = auto_classification(
    X_train, X_test, y_train, y_test,
    time_budget=120,
    n_trials=20,
    verbose=True
)

# Evaluate
results = automl.evaluate()
print("\nFinal Results:")
for metric, value in results.items():
    print(f"  {metric}: {value:.4f}")

# View top 3 models
leaderboard = automl.get_leaderboard()
print("\nTop 3 Models:")
print(leaderboard.head(3))


# =============================================================================
# EXAMPLE 5: Custom Prediction on New Data
# =============================================================================
print("\n" + "="*70)
print("EXAMPLE 5: Predict on New Custom Data")
print("="*70)

# Create some new data (same features as wine dataset)
new_data = np.array([
    [13.2, 2.8, 2.4, 20, 104, 2.9, 2.8, 0.5, 1.8, 5.7, 1.0, 3.2, 1100],
    [12.8, 3.1, 2.3, 21, 101, 2.7, 2.6, 0.4, 1.9, 5.5, 0.9, 3.1, 1050],
])

# Use the trained model to predict
predictions = automl.predict(new_data)
print(f"Predictions for new data: {predictions}")


# =============================================================================
# EXAMPLE 6: Large Classification Dataset
# =============================================================================
print("\n" + "="*70)
print("EXAMPLE 6: Large Synthetic Classification Dataset")
print("="*70)

# Generate large dataset
X, y = make_classification(
    n_samples=5000,
    n_features=20,
    n_informative=15,
    n_redundant=5,
    n_classes=3,
    random_state=42
)

# Split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train with shorter time budget for demo
automl_large = LightningAutoML(
    ml_type="supervised",
    method="classification",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    time_budget=240,  # 4 minutes
    n_trials=40,
    cv_folds=3,  # Faster with fewer folds
    verbose=True
)

automl_large.fit()
results = automl_large.evaluate()

print("\nBest model info:")
params = automl_large.get_model_params()
print(f"  Model: {params['model_name']}")
print(f"  Score: {params['score']:.4f} ± {params['cv_std']:.4f}")
print(f"  Training time: {params['training_time']:.2f}s")

automl_large.clear_gpu_cache()


# =============================================================================
# EXAMPLE 7: DataFrame Input
# =============================================================================
print("\n" + "="*70)
print("EXAMPLE 7: Using Pandas DataFrame")
print("="*70)

# Create DataFrame
df = pd.DataFrame(
    load_iris().data,
    columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
)
df['target'] = load_iris().target

# Split
train_df = df.sample(frac=0.7, random_state=42)
test_df = df.drop(train_df.index)

X_train = train_df.drop('target', axis=1)
y_train = train_df['target']
X_test = test_df.drop('target', axis=1)
y_test = test_df['target']

# Train (AutoML handles DataFrame automatically)
automl_df = LightningAutoML(
    ml_type="supervised",
    method="classification",
    X_train=X_train,
    X_test=X_test,
    y_train=y_train,
    y_test=y_test,
    time_budget=60,
    n_trials=15,
    verbose=True
)

automl_df.fit()
results = automl_df.evaluate()

print("\nDataFrame example completed!")
print(f"Test accuracy: {results['accuracy']:.4f}")

automl_df.clear_gpu_cache()


print("\n" + "="*70)
print("ALL EXAMPLES COMPLETED!")
print("="*70)

2025-10-07 08:30:41 - LightningAutoML - INFO - Using device: cuda (NVIDIA GeForce RTX 3050 Laptop GPU)
2025-10-07 08:30:41 - LightningAutoML - INFO - GPU Memory: 4.00 GB
2025-10-07 08:30:41 - LightningAutoML - INFO - Lightning AutoML v1.2 - Memory Optimized
2025-10-07 08:30:41 - LightningAutoML - INFO - ML Type: SUPERVISED | Method: CLASSIFICATION
2025-10-07 08:30:41 - LightningAutoML - INFO - Device: cuda
2025-10-07 08:30:41 - LightningAutoML - INFO - Training samples: 105 | Test samples: 45
2025-10-07 08:30:41 - LightningAutoML - INFO - Features: 4
2025-10-07 08:30:41 - LightningAutoML - INFO - GPU Memory: 4095.5MB free / 4095.5MB total
2025-10-07 08:30:41 - LightningAutoML - INFO - Models to evaluate: 7
2025-10-07 08:30:41 - LightningAutoML - INFO - Time per model: ~25.7s
2025-10-07 08:30:41 - LightningAutoML - INFO - Trials per model: 30
2025-10-07 08:30:41 - LightningAutoML - INFO - CV folds: 5
2025-10-07 08:30:41 - LightningAutoML - INFO - 
2025-10-07 08:30:41 - LightningAutoML -

EXAMPLE 1: Classification - Iris Dataset


2025-10-07 08:30:46 - lightning_ml.base_model - INFO - Created optimizer: sgd
2025-10-07 08:30:52 - LightningAutoML - ERROR - Error in evaluation: 'Sequential' object has no attribute 'weight'
[I 2025-10-07 08:30:52,353] Trial 0 finished with value: -1.0 and parameters: {'epochs': 437, 'lr': 0.07114476009343425, 'batch_size': 16, 'optimizer': 'sgd'}. Best is trial 0 with value: -1.0.
2025-10-07 08:30:52 - lightning_ml.base_model - INFO - Initialized LogisticRegression on device: cuda
2025-10-07 08:30:52 - lightning_ml.base_model - INFO - Neural model initialized: epochs=737, lr=0.00011527987128232407, batch_size=16, optimizer=rmsprop
2025-10-07 08:30:52 - lightning_ml.base_model - INFO - Created optimizer: rmsprop
2025-10-07 08:31:12 - LightningAutoML - ERROR - Error in evaluation: 'Sequential' object has no attribute 'weight'
[I 2025-10-07 08:31:12,975] Trial 1 finished with value: -1.0 and parameters: {'epochs': 737, 'lr': 0.00011527987128232407, 'batch_size': 16, 'optimizer': 'rmspr

KeyboardInterrupt: 