# Module 21: MLOps - From Notebook to Production

**Goal:** Learn how to move ML models from experimentation to production with versioning, reproducibility, and deployment practices.

**Prerequisites:** Modules 1-20

**Expected Runtime:** ~25 minutes

**Outputs:**
- Implemented versioning for models
- Built experiment tracking
- Created a training pipeline

---

## Setup

In [None]:
import numpy as np
import pandas as pd
import json
import os
import hashlib
from datetime import datetime
from typing import Dict, Any, Optional
from dataclasses import dataclass, asdict
import joblib
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, precision_score, recall_score
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)

## Part 1: Model Versioning

In [None]:
@dataclass
class ModelMetadata:
    """Metadata for a trained model."""
    version: str
    name: str
    trained_at: str
    data_version: str
    git_commit: str
    config: Dict[str, Any]
    metrics: Dict[str, float]
    features: list
    threshold: Optional[float] = None

class ModelRegistry:
    """Simple model registry for versioning."""
    
    def __init__(self, base_path: str = "models"):
        self.base_path = base_path
        os.makedirs(base_path, exist_ok=True)
        self.registry_file = os.path.join(base_path, "registry.json")
        self._load_registry()
    
    def _load_registry(self):
        if os.path.exists(self.registry_file):
            with open(self.registry_file) as f:
                self.registry = json.load(f)
        else:
            self.registry = {"models": []}
    
    def _save_registry(self):
        with open(self.registry_file, "w") as f:
            json.dump(self.registry, f, indent=2)
    
    def register(self, model, metadata: ModelMetadata) -> str:
        """Register a new model version."""
        model_path = os.path.join(self.base_path, f"{metadata.name}_{metadata.version}.joblib")
        meta_path = os.path.join(self.base_path, f"{metadata.name}_{metadata.version}.json")
        
        # Save model
        joblib.dump(model, model_path)
        
        # Save metadata
        with open(meta_path, "w") as f:
            json.dump(asdict(metadata), f, indent=2)
        
        # Update registry
        self.registry["models"].append({
            "name": metadata.name,
            "version": metadata.version,
            "path": model_path,
            "registered_at": datetime.now().isoformat()
        })
        self._save_registry()
        
        return model_path
    
    def get_latest(self, name: str):
        """Get the latest version of a model."""
        versions = [m for m in self.registry["models"] if m["name"] == name]
        if not versions:
            return None
        latest = sorted(versions, key=lambda x: x["version"])[-1]
        return joblib.load(latest["path"])
    
    def list_versions(self, name: str = None):
        """List all registered versions."""
        models = self.registry["models"]
        if name:
            models = [m for m in models if m["name"] == name]
        return models

# Create registry
registry = ModelRegistry("./demo_models")
print("Model registry initialized!")

## Part 2: Experiment Tracking

In [None]:
@dataclass
class Experiment:
    """Record of a training experiment."""
    name: str
    timestamp: str
    params: Dict[str, Any]
    metrics: Dict[str, float]
    data_hash: str
    notes: str = ""

class ExperimentTracker:
    """Simple experiment tracking."""
    
    def __init__(self, log_path: str = "experiments"):
        self.log_path = log_path
        os.makedirs(log_path, exist_ok=True)
        self.experiments = []
    
    def log(self, experiment: Experiment):
        """Log an experiment."""
        self.experiments.append(experiment)
        
        # Save to file
        filename = f"{experiment.name}_{experiment.timestamp.replace(':', '-')}.json"
        filepath = os.path.join(self.log_path, filename)
        with open(filepath, "w") as f:
            json.dump(asdict(experiment), f, indent=2)
        
        return filepath
    
    def compare(self, metric: str = "auc"):
        """Compare experiments by metric."""
        if not self.experiments:
            return pd.DataFrame()
        
        data = []
        for exp in self.experiments:
            row = {"name": exp.name, "timestamp": exp.timestamp}
            row.update(exp.params)
            row.update(exp.metrics)
            data.append(row)
        
        df = pd.DataFrame(data)
        if metric in df.columns:
            df = df.sort_values(metric, ascending=False)
        return df

def hash_data(df: pd.DataFrame) -> str:
    """Create a hash of dataframe for versioning."""
    return hashlib.md5(pd.util.hash_pandas_object(df).values).hexdigest()[:8]

# Create tracker
tracker = ExperimentTracker("./demo_experiments")
print("Experiment tracker initialized!")

## Part 3: Training Pipeline

In [None]:
# Generate sample data
np.random.seed(42)
n_samples = 1000

data = pd.DataFrame({
    'tenure': np.random.exponential(20, n_samples),
    'spend': np.random.normal(100, 30, n_samples),
    'support_tickets': np.random.poisson(2, n_samples),
    'engagement': np.random.beta(5, 2, n_samples) * 100
})

# Create target with realistic relationship
churn_prob = 0.2 + 0.2 * (data['support_tickets'] > 3) - 0.1 * (data['engagement'] > 70)
data['churned'] = (np.random.random(n_samples) < churn_prob).astype(int)

print(f"Sample data: {len(data)} rows, churn rate: {data['churned'].mean():.1%}")

In [None]:
class TrainingPipeline:
    """End-to-end training pipeline."""
    
    def __init__(self, config: Dict):
        self.config = config
        self.data = None
        self.model = None
        self.metrics = {}
    
    def load_data(self, df: pd.DataFrame):
        """Step 1: Load and validate data."""
        print("Step 1: Loading data...")
        self.data = df.copy()
        self.data_hash = hash_data(df)
        print(f"  Loaded {len(df)} rows, hash: {self.data_hash}")
        return self
    
    def preprocess(self):
        """Step 2: Preprocess features."""
        print("Step 2: Preprocessing...")
        features = self.config["features"]
        target = self.config["target"]
        
        self.X = self.data[features]
        self.y = self.data[target]
        
        # Split
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            self.X, self.y, 
            test_size=self.config["test_size"],
            random_state=self.config["random_state"]
        )
        print(f"  Train: {len(self.X_train)}, Test: {len(self.X_test)}")
        return self
    
    def train(self):
        """Step 3: Train model."""
        print("Step 3: Training model...")
        model_config = self.config["model"]
        
        self.model = GradientBoostingClassifier(
            n_estimators=model_config["n_estimators"],
            max_depth=model_config["max_depth"],
            random_state=self.config["random_state"]
        )
        self.model.fit(self.X_train, self.y_train)
        print("  Model trained!")
        return self
    
    def evaluate(self):
        """Step 4: Evaluate model."""
        print("Step 4: Evaluating...")
        y_prob = self.model.predict_proba(self.X_test)[:, 1]
        y_pred = (y_prob >= self.config["threshold"]).astype(int)
        
        self.metrics = {
            "auc": roc_auc_score(self.y_test, y_prob),
            "precision": precision_score(self.y_test, y_pred),
            "recall": recall_score(self.y_test, y_pred)
        }
        print(f"  AUC: {self.metrics['auc']:.3f}")
        print(f"  Precision: {self.metrics['precision']:.3f}")
        print(f"  Recall: {self.metrics['recall']:.3f}")
        return self
    
    def register(self, registry: ModelRegistry, version: str):
        """Step 5: Register model."""
        print("Step 5: Registering model...")
        
        metadata = ModelMetadata(
            version=version,
            name="churn_model",
            trained_at=datetime.now().isoformat(),
            data_version=self.data_hash,
            git_commit="demo",
            config=self.config,
            metrics=self.metrics,
            features=self.config["features"],
            threshold=self.config["threshold"]
        )
        
        path = registry.register(self.model, metadata)
        print(f"  Registered as {version} at {path}")
        return self
    
    def run(self, df: pd.DataFrame, registry: ModelRegistry, version: str):
        """Run full pipeline."""
        return (self
                .load_data(df)
                .preprocess()
                .train()
                .evaluate()
                .register(registry, version))

In [None]:
# Define configuration
config = {
    "features": ["tenure", "spend", "support_tickets", "engagement"],
    "target": "churned",
    "test_size": 0.2,
    "random_state": 42,
    "threshold": 0.5,
    "model": {
        "n_estimators": 100,
        "max_depth": 4
    }
}

# Run pipeline
print("=== Running Training Pipeline ===")
pipeline = TrainingPipeline(config)
pipeline.run(data, registry, "v1.0.0")

## Part 4: Running Multiple Experiments

In [None]:
# Run experiments with different configs
experiments_to_run = [
    {"name": "baseline", "n_estimators": 50, "max_depth": 3},
    {"name": "deeper", "n_estimators": 100, "max_depth": 5},
    {"name": "more_trees", "n_estimators": 200, "max_depth": 4},
]

print("=== Running Experiments ===")
for exp_config in experiments_to_run:
    print(f"\nExperiment: {exp_config['name']}")
    
    # Update config
    config["model"]["n_estimators"] = exp_config["n_estimators"]
    config["model"]["max_depth"] = exp_config["max_depth"]
    
    # Run pipeline
    pipeline = TrainingPipeline(config)
    pipeline.load_data(data).preprocess().train().evaluate()
    
    # Log experiment
    experiment = Experiment(
        name=exp_config["name"],
        timestamp=datetime.now().isoformat(),
        params=exp_config,
        metrics=pipeline.metrics,
        data_hash=pipeline.data_hash
    )
    tracker.log(experiment)

In [None]:
# Compare experiments
print("=== Experiment Comparison ===")
comparison = tracker.compare("auc")
print(comparison[["name", "n_estimators", "max_depth", "auc", "precision", "recall"]])

## Part 5: Model Serving (Batch)

In [None]:
def batch_predict(model_path: str, data: pd.DataFrame, features: list) -> pd.DataFrame:
    """
    Run batch predictions.
    
    Args:
        model_path: Path to saved model
        data: Input data
        features: Feature columns to use
    
    Returns:
        DataFrame with predictions
    """
    # Load model
    model = joblib.load(model_path)
    
    # Generate predictions
    X = data[features]
    probabilities = model.predict_proba(X)[:, 1]
    
    # Create results
    results = data[["tenure", "spend"]].copy()  # Include some identifying columns
    results["churn_probability"] = probabilities
    results["predicted_at"] = datetime.now().isoformat()
    results["model_version"] = model_path.split("_")[-1].replace(".joblib", "")
    
    return results

# Test batch prediction
print("=== Batch Prediction ===")
model_versions = registry.list_versions("churn_model")
if model_versions:
    latest = model_versions[-1]
    predictions = batch_predict(
        latest["path"], 
        data.head(10), 
        config["features"]
    )
    print(predictions)

## Part 6: Reproducibility Check

In [None]:
def check_reproducibility(config: Dict, data: pd.DataFrame, n_runs: int = 3) -> bool:
    """
    Verify that training is reproducible with the same config.
    """
    results = []
    
    for i in range(n_runs):
        pipeline = TrainingPipeline(config)
        pipeline.load_data(data).preprocess().train().evaluate()
        results.append(pipeline.metrics["auc"])
    
    # Check if all results are identical
    is_reproducible = len(set(results)) == 1
    
    print(f"Reproducibility check ({n_runs} runs):")
    print(f"  Results: {results}")
    print(f"  Reproducible: {is_reproducible}")
    
    return is_reproducible

# Run check
check_reproducibility(config, data)

## Part 7: TODO - Create Your Pipeline

Extend the pipeline with additional features.

In [None]:
# TODO: Add these capabilities to the pipeline:
# 1. Feature validation (check for missing values, expected ranges)
# 2. Model validation (minimum performance thresholds)
# 3. Automated rollback if new model is worse

print("Extend the pipeline with validation and rollback!")

## Self-Check

Run the cell below to verify your pipeline components are correct.

In [None]:
# SELF-CHECK: Verify your MLOps pipeline

assert 'pipeline' in dir(), "Pipeline should be defined"
assert hasattr(pipeline, 'metrics'), "Pipeline should have metrics"
assert len(registry.list_versions("churn_model")) >= 1, "Should have registered at least one model"
assert len(tracker.experiments) >= 1, "Should have logged at least one experiment"
print(f"✅ Self-check passed! {len(registry.list_versions('churn_model'))} model versions, {len(tracker.experiments)} experiments")

---

## Stakeholder Summary

### TODO: Write a 3-bullet summary (~100 words) for the VP of Engineering

Template:
• **Why MLOps:** Without it, we can't [reproduce results / debug production issues / compare experiments]. Version control for ML.
• **Reproducibility:** Same code + data + config = same model. Set random seeds, track data versions, log all hyperparameters.
• **Minimum viable setup:** Start with [experiment tracking + model registry + config files]. Prevents "it worked on my laptop" syndrome.

## Key Takeaways

1. **Version everything**: Code, data, models, config
2. **Track experiments**: Parameters, metrics, artifacts
3. **Use pipelines**: Modular, reproducible, testable
4. **Register models**: With metadata for traceability
5. **Set random seeds**: For reproducibility

### Next Steps
- Explore the interactive playground
- Complete the quiz
- Move to Module 22: Monitoring