# LightningCLI and Config-First Experiments

**File Location:** `notebooks/01_lightning_fundamentals/03_lightningcli_config_runs.ipynb`

## Introduction

This notebook introduces LightningCLI for config-driven experiments. Learn how to structure experiments using YAML configurations, enabling reproducible research and easy hyperparameter management without changing code.

## Basic LightningCLI Setup

### Creating CLI-Compatible Components

```python
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader, TensorDataset
from pytorch_lightning.cli import LightningCLI
import yaml
from pathlib import Path
import tempfile

class ConfigurableModel(pl.LightningModule):
    def __init__(
        self, 
        input_size: int = 784,
        hidden_size: int = 128, 
        num_classes: int = 10,
        learning_rate: float = 1e-3,
        optimizer: str = "adam",
        dropout: float = 0.2
    ):
        super().__init__()
        self.save_hyperparameters()
        
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size, num_classes)
        )
        
    def forward(self, x):
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log('train_loss', loss, prog_bar=True)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        acc = (y_hat.argmax(dim=1) == y).float().mean()
        
        self.log('val_loss', loss, prog_bar=True)
        self.log('val_acc', acc, prog_bar=True)
        return loss
    
    def configure_optimizers(self):
        if self.hparams.optimizer == "adam":
            return Adam(self.parameters(), lr=self.hparams.learning_rate)
        elif self.hparams.optimizer == "sgd":
            return SGD(self.parameters(), lr=self.hparams.learning_rate, momentum=0.9)
        else:
            raise ValueError(f"Unknown optimizer: {self.hparams.optimizer}")

class ConfigurableDataModule(pl.LightningDataModule):
    def __init__(
        self,
        num_samples: int = 1000,
        input_size: int = 784,
        num_classes: int = 10,
        batch_size: int = 32,
        num_workers: int = 0
    ):
        super().__init__()
        self.save_hyperparameters()
        
    def setup(self, stage=None):
        torch.manual_seed(42)  # For reproducibility
        
        if stage == "fit" or stage is None:
            # Training data
            self.train_x = torch.randn(self.hparams.num_samples, self.hparams.input_size)
            self.train_y = torch.randint(0, self.hparams.num_classes, (self.hparams.num_samples,))
            
            # Validation data
            val_size = self.hparams.num_samples // 5
            self.val_x = torch.randn(val_size, self.hparams.input_size)
            self.val_y = torch.randint(0, self.hparams.num_classes, (val_size,))
    
    def train_dataloader(self):
        dataset = TensorDataset(self.train_x, self.train_y)
        return DataLoader(
            dataset, 
            batch_size=self.hparams.batch_size, 
            shuffle=True,
            num_workers=self.hparams.num_workers
        )
    
    def val_dataloader(self):
        dataset = TensorDataset(self.val_x, self.val_y)
        return DataLoader(
            dataset, 
            batch_size=self.hparams.batch_size,
            num_workers=self.hparams.num_workers
        )

print("✓ CLI-compatible model and datamodule created")
```

### Creating Configuration Files

```python
# Create temporary directory for configs
import os
config_dir = Path("temp_configs")
config_dir.mkdir(exist_ok=True)

# Basic configuration
basic_config = {
    "model": {
        "class_path": "__main__.ConfigurableModel",
        "init_args": {
            "input_size": 784,
            "hidden_size": 128,
            "num_classes": 10,
            "learning_rate": 0.001,
            "optimizer": "adam",
            "dropout": 0.2
        }
    },
    "data": {
        "class_path": "__main__.ConfigurableDataModule", 
        "init_args": {
            "num_samples": 1000,
            "input_size": 784,
            "num_classes": 10,
            "batch_size": 32,
            "num_workers": 0
        }
    },
    "trainer": {
        "max_epochs": 3,
        "enable_checkpointing": False,
        "logger": False,
        "enable_progress_bar": True
    }
}

# Save basic config
with open(config_dir / "basic_config.yaml", "w") as f:
    yaml.dump(basic_config, f, default_flow_style=False)

print("✓ Basic configuration created")
print(f"Config saved to: {config_dir / 'basic_config.yaml'}")

# Display the config
with open(config_dir / "basic_config.yaml", "r") as f:
    print("\n--- Basic Config Content ---")
    print(f.read())
```

### High Performance Configuration

```python
# High performance configuration
high_perf_config = {
    "model": {
        "class_path": "__main__.ConfigurableModel",
        "init_args": {
            "input_size": 784,
            "hidden_size": 256,  # Larger model
            "num_classes": 10,
            "learning_rate": 0.003,  # Higher learning rate
            "optimizer": "adam",
            "dropout": 0.1  # Less dropout
        }
    },
    "data": {
        "class_path": "__main__.ConfigurableDataModule",
        "init_args": {
            "num_samples": 2000,  # More data
            "input_size": 784,
            "num_classes": 10,
            "batch_size": 64,  # Larger batches
            "num_workers": 2
        }
    },
    "trainer": {
        "max_epochs": 5,
        "enable_checkpointing": False,
        "logger": False,
        "gradient_clip_val": 1.0,  # Gradient clipping
        "enable_progress_bar": True
    }
}

# Save high performance config
with open(config_dir / "high_perf_config.yaml", "w") as f:
    yaml.dump(high_perf_config, f, default_flow_style=False)

print("✓ High performance configuration created")
```

### Debugging Configuration

```python
# Debugging configuration
debug_config = {
    "model": {
        "class_path": "__main__.ConfigurableModel",
        "init_args": {
            "input_size": 784,
            "hidden_size": 64,  # Smaller for faster debugging
            "num_classes": 10,
            "learning_rate": 0.01,  # Higher LR for faster convergence
            "optimizer": "sgd",
            "dropout": 0.0  # No dropout for debugging
        }
    },
    "data": {
        "class_path": "__main__.ConfigurableDataModule",
        "init_args": {
            "num_samples": 500,  # Less data
            "input_size": 784,
            "num_classes": 10,
            "batch_size": 16,  # Smaller batches
            "num_workers": 0
        }
    },
    "trainer": {
        "fast_dev_run": True,  # Debug mode
        "enable_checkpointing": False,
        "logger": False,
        "detect_anomaly": True
    }
}

# Save debug config
with open(config_dir / "debug_config.yaml", "w") as f:
    yaml.dump(debug_config, f, default_flow_style=False)

print("✓ Debug configuration created")
```

## Running with Configurations

### Manual CLI Usage (Programmatic)

```python
# Function to run training with config
def run_with_config(config_path):
    """Run training using a configuration file"""
    
    # Custom CLI class for notebook usage
    class NotebookCLI(LightningCLI):
        def __init__(self, config_path):
            # Override sys.argv for notebook
            import sys
            original_argv = sys.argv.copy()
            sys.argv = ["notebook", "--config", str(config_path)]
            
            try:
                super().__init__(
                    model_class=ConfigurableModel,
                    datamodule_class=ConfigurableDataModule,
                    run=True,  # Run training immediately
                    save_config_callback=None,  # Don't save config
                )
            finally:
                sys.argv = original_argv  # Restore original argv
    
    print(f"Running with config: {config_path}")
    try:
        cli = NotebookCLI(config_path)
        print("✓ Training completed successfully")
        return True
    except Exception as e:
        print(f"✗ Training failed: {e}")
        return False

# Run with different configurations
print("=== Running Debug Configuration ===")
success = run_with_config(config_dir / "debug_config.yaml")

if success:
    print("\n=== Running Basic Configuration ===")
    run_with_config(config_dir / "basic_config.yaml")
```

### Configuration Inheritance and Overrides

```python
# Create a base configuration
base_config = {
    "model": {
        "class_path": "__main__.ConfigurableModel",
        "init_args": {
            "input_size": 784,
            "hidden_size": 128,
            "num_classes": 10,
            "dropout": 0.2
        }
    },
    "data": {
        "class_path": "__main__.ConfigurableDataModule",
        "init_args": {
            "num_samples": 1000,
            "input_size": 784,
            "num_classes": 10,
            "batch_size": 32
        }
    },
    "trainer": {
        "max_epochs": 3,
        "enable_checkpointing": False,
        "logger": False
    }
}

# Save base config
with open(config_dir / "base_config.yaml", "w") as f:
    yaml.dump(base_config, f, default_flow_style=False)

# Create experiment configs that override base settings
experiments = {
    "adam_experiment": {
        "model": {"init_args": {"learning_rate": 0.001, "optimizer": "adam"}},
        "trainer": {"max_epochs": 5}
    },
    "sgd_experiment": {
        "model": {"init_args": {"learning_rate": 0.01, "optimizer": "sgd"}},
        "trainer": {"max_epochs": 5}
    },
    "large_model": {
        "model": {"init_args": {"hidden_size": 256, "learning_rate": 0.001}},
        "data": {"init_args": {"batch_size": 64}},
        "trainer": {"max_epochs": 4}
    }
}

# Save experiment configs
for exp_name, exp_config in experiments.items():
    # Deep merge with base config
    import copy
    full_config = copy.deepcopy(base_config)
    
    # Simple merge (for demonstration)
    for section, params in exp_config.items():
        if section in full_config:
            if "init_args" in params:
                full_config[section]["init_args"].update(params["init_args"])
            else:
                full_config[section].update(params)
    
    with open(config_dir / f"{exp_name}.yaml", "w") as f:
        yaml.dump(full_config, f, default_flow_style=False)

print("✓ Experiment configurations created:")
for exp_name in experiments.keys():
    print(f"  - {exp_name}.yaml")
```

### Hyperparameter Sweeps via Config

```python
# Create multiple configs for hyperparameter sweep
sweep_configs = []

learning_rates = [0.001, 0.003, 0.01]
hidden_sizes = [64, 128, 256]
optimizers = ["adam", "sgd"]

experiment_id = 0
for lr in learning_rates:
    for hidden_size in hidden_sizes:
        for optimizer in optimizers:
            experiment_id += 1
            
            config = {
                "model": {
                    "class_path": "__main__.ConfigurableModel",
                    "init_args": {
                        "input_size": 784,
                        "hidden_size": hidden_size,
                        "num_classes": 10,
                        "learning_rate": lr,
                        "optimizer": optimizer,
                        "dropout": 0.2
                    }
                },
                "data": {
                    "class_path": "__main__.ConfigurableDataModule",
                    "init_args": {
                        "num_samples": 1000,
                        "input_size": 784,
                        "num_classes": 10,
                        "batch_size": 32
                    }
                },
                "trainer": {
                    "max_epochs": 3,
                    "enable_checkpointing": False,
                    "logger": False,
                    "enable_progress_bar": False  # Reduce output for sweep
                }
            }
            
            config_name = f"sweep_{experiment_id:02d}_lr{lr}_h{hidden_size}_{optimizer}.yaml"
            with open(config_dir / config_name, "w") as f:
                yaml.dump(config, f, default_flow_style=False)
            
            sweep_configs.append((config_name, lr, hidden_size, optimizer))

print(f"✓ Created {len(sweep_configs)} sweep configurations")
print("First few experiments:")
for i, (name, lr, hs, opt) in enumerate(sweep_configs[:3]):
    print(f"  {name}: lr={lr}, hidden={hs}, opt={opt}")
```

### Configuration Analysis and Comparison

```python
# Function to analyze and compare configs
def analyze_config(config_path):
    """Analyze a configuration file"""
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    analysis = {
        'model_params': config['model']['init_args'],
        'data_params': config['data']['init_args'],
        'trainer_params': config['trainer']
    }
    return analysis

def compare_configs(config_paths):
    """Compare multiple configurations"""
    analyses = {}
    for path in config_paths:
        name = path.stem
        analyses[name] = analyze_config(path)
    
    return analyses

# Compare some configurations
configs_to_compare = [
    config_dir / "basic_config.yaml",
    config_dir / "high_perf_config.yaml", 
    config_dir / "debug_config.yaml"
]

comparisons = compare_configs(configs_to_compare)

print("=== Configuration Comparison ===")
for config_name, analysis in comparisons.items():
    print(f"\n{config_name.upper()}:")
    print(f"  Model: hidden_size={analysis['model_params']['hidden_size']}, "
          f"lr={analysis['model_params']['learning_rate']}, "
          f"optimizer={analysis['model_params']['optimizer']}")
    print(f"  Data: samples={analysis['data_params']['num_samples']}, "
          f"batch_size={analysis['data_params']['batch_size']}")
    
    trainer_info = []
    if 'fast_dev_run' in analysis['trainer_params']:
        trainer_info.append("fast_dev_run=True")
    else:
        trainer_info.append(f"max_epochs={analysis['trainer_params']['max_epochs']}")
    
    if 'gradient_clip_val' in analysis['trainer_params']:
        trainer_info.append(f"grad_clip={analysis['trainer_params']['gradient_clip_val']}")
    
    print(f"  Trainer: {', '.join(trainer_info)}")
```

## Advanced CLI Features

### Config Validation and Schema

```python
# Create a configuration with validation
def validate_config(config_path):
    """Basic config validation"""
    with open(config_path, 'r') as f:
        config = yaml.safe_load(f)
    
    errors = []
    
    # Check required sections
    required_sections = ['model', 'data', 'trainer']
    for section in required_sections:
        if section not in config:
            errors.append(f"Missing required section: {section}")
    
    # Validate model params
    if 'model' in config:
        model_args = config['model'].get('init_args', {})
        if model_args.get('learning_rate', 0) <= 0:
            errors.append("learning_rate must be positive")
        if model_args.get('hidden_size', 0) <= 0:
            errors.append("hidden_size must be positive")
    
    # Validate data params  
    if 'data' in config:
        data_args = config['data'].get('init_args', {})
        if data_args.get('batch_size', 0) <= 0:
            errors.append("batch_size must be positive")
        if data_args.get('num_samples', 0) <= 0:
            errors.append("num_samples must be positive")
    
    return errors

# Test validation
print("=== Configuration Validation ===")
for config_file in [
    "basic_config.yaml",
    "high_perf_config.yaml", 
    "debug_config.yaml"
]:
    config_path = config_dir / config_file
    errors = validate_config(config_path)
    
    if errors:
        print(f"❌ {config_file}: {len(errors)} errors")
        for error in errors:
            print(f"  - {error}")
    else:
        print(f"✅ {config_file}: Valid")
```

### Config-Based Experiment Tracking

```python
# Create experiment tracking system
class ExperimentTracker:
    def __init__(self):
        self.results = {}
    
    def run_experiment(self, config_path, experiment_name):
        """Run experiment and track results"""
        print(f"Running experiment: {experiment_name}")
        
        # Load config for reference
        with open(config_path, 'r') as f:
            config = yaml.safe_load(f)
        
        # Extract key parameters
        model_params = config['model']['init_args']
        data_params = config['data']['init_args']
        
        # For demonstration, simulate results
        import random
        random.seed(hash(experiment_name) % 1000)
        
        # Simulate training results based on config
        base_acc = 0.7
        lr_factor = min(model_params['learning_rate'] * 100, 1.0)
        size_factor = min(model_params['hidden_size'] / 256, 1.0)
        
        final_acc = base_acc + lr_factor * 0.1 + size_factor * 0.15 + random.uniform(-0.05, 0.05)
        final_loss = max(0.1, 1.0 - final_acc + random.uniform(-0.1, 0.1))
        
        # Store results
        self.results[experiment_name] = {
            'config': config_path.stem,
            'final_accuracy': round(final_acc, 4),
            'final_loss': round(final_loss, 4),
            'learning_rate': model_params['learning_rate'],
            'hidden_size': model_params['hidden_size'],
            'optimizer': model_params['optimizer'],
            'batch_size': data_params['batch_size']
        }
        
        print(f"  Results: acc={final_acc:.4f}, loss={final_loss:.4f}")
        return self.results[experiment_name]
    
    def summarize_results(self):
        """Summarize all experiment results"""
        if not self.results:
            print("No experiments run yet")
            return
        
        print("\n=== Experiment Results Summary ===")
        print(f"{'Experiment':<20} {'Accuracy':<10} {'Loss':<8} {'LR':<8} {'Hidden':<8} {'Optimizer':<10}")
        print("-" * 75)
        
        sorted_results = sorted(self.results.items(), 
                              key=lambda x: x[1]['final_accuracy'], reverse=True)
        
        for name, results in sorted_results:
            print(f"{name:<20} {results['final_accuracy']:<10} "
                  f"{results['final_loss']:<8} {results['learning_rate']:<8} "
                  f"{results['hidden_size']:<8} {results['optimizer']:<10}")
        
        # Best config analysis
        best_exp = sorted_results[0]
        print(f"\n🏆 Best experiment: {best_exp[0]}")
        print(f"   Accuracy: {best_exp[1]['final_accuracy']}")
        print(f"   Config: {best_exp[1]['config']}.yaml")

# Run experiment tracking
tracker = ExperimentTracker()

# Run a few experiments
experiments_to_run = [
    ("basic_config.yaml", "baseline"),
    ("high_perf_config.yaml", "high_perf"),  
    ("adam_experiment.yaml", "adam_opt"),
    ("sgd_experiment.yaml", "sgd_opt"),
]

for config_file, exp_name in experiments_to_run:
    config_path = config_dir / config_file
    if config_path.exists():
        tracker.run_experiment(config_path, exp_name)

# Summarize results
tracker.summarize_results()
```

## Summary

This notebook covered LightningCLI and config-driven experiments:

1. **CLI-Compatible Components**: Models and DataModules with proper type hints and `save_hyperparameters()`
2. **YAML Configurations**: Structured configs for model, data, and trainer parameters
3. **Configuration Management**: Base configs, inheritance, and experiment variants
4. **Hyperparameter Sweeps**: Systematic exploration via multiple config files
5. **Config Validation**: Ensuring configurations are valid before training
6. **Experiment Tracking**: Managing and comparing multiple config-based experiments

Key benefits of config-driven approach:
- **Reproducibility**: Every experiment is fully specified in a config file
- **Version Control**: Configs can be tracked in git alongside code
- **Easy Comparison**: Side-by-side comparison of different approaches
- **No Code Changes**: Experiment with different settings without modifying source code
- **Systematic Exploration**: Organized hyperparameter sweeps and ablation studies

Best practices:
- Use descriptive config names that indicate the experiment
- Validate configs before long training runs
- Keep base configs for common settings
- Track experiment results alongside configurations
- Use version control for both code and configs

Next notebook: We'll dive into building custom DataModules for different data types.