# FastAPI Model Deployment

This notebook demonstrates how to deploy machine learning models using FastAPI, focusing on production-ready deployment patterns.

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List, Any, Optional
import sys
import os
import time
import pickle
import asyncio
from datetime import datetime
from enum import Enum

# Add project root to path
sys.path.append(os.path.join(os.path.dirname("__file__"), '..', '..'))

# Set style for plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## Production Model Deployment with FastAPI

Deploying ML models in production requires:

1. **API Design**: Clean, well-documented endpoints
2. **Performance**: Efficient inference and request handling
3. **Monitoring**: Metrics and logging for model performance
4. **Scalability**: Ability to handle multiple requests
5. **Security**: Input validation and protection against attacks
6. **Reliability**: Error handling and graceful degradation

In [None]:
# Create a simple model to deploy
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Generate sample data
X, y = make_regression(n_samples=1000, n_features=10, noise=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Model Performance:")
print(f"MSE: {mse:.4f}")
print(f"RÂ²: {r2:.4f}")

# Save the model
model_path = "temp_model.pkl"
with open(model_path, 'wb') as f:
    pickle.dump(model, f)

print(f"Model saved to {model_path}")

In [None]:
# Create a FastAPI application
from fastapi import FastAPI, HTTPException, BackgroundTasks
from pydantic import BaseModel
from typing import List
import logging
from contextlib import asynccontextmanager
import joblib

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


# Define Pydantic models for request/response
class PredictionRequest(BaseModel):
    features: List[List[float]]  # 2D array for batch predictions

class PredictionResponse(BaseModel):
    predictions: List[float]
    processing_time: float
    timestamp: float

class HealthCheckResponse(BaseModel):
    status: str
    timestamp: float


# Global model variable
model = None


@asynccontextmanager
async def lifespan(app: FastAPI):
    """Lifespan manager for the FastAPI app."""
    global model
    logger.info("Loading model...")
    
    # Load the model
    model = joblib.load(model_path)
    logger.info("Model loaded successfully")
    
    yield
    
    # Cleanup
    logger.info("Shutting down...")


# Create FastAPI app
app = FastAPI(
    title="ML Model API",
    description="Production-ready API for ML model inference",
    version="1.0.0",
    lifespan=lifespan
)


@app.get("/")
async def root():
    return {"message": "Welcome to the ML Model API"}


@app.get("/health", response_model=HealthCheckResponse)
async def health_check():
    """Health check endpoint."""
    return HealthCheckResponse(
        status="healthy",
        timestamp=time.time()
    )


@app.post("/predict", response_model=PredictionResponse)
async def predict(request: PredictionRequest):
    """Make predictions using the model."""
    start_time = time.time()
    
    try:
        # Convert features to numpy array
        features = np.array(request.features)
        
        # Validate input shape
        if features.ndim != 2:
            raise HTTPException(status_code=400, detail="Features must be a 2D array")
        
        # Make predictions
        predictions = model.predict(features).tolist()
        
        processing_time = time.time() - start_time
        
        # Log the prediction
        logger.info(f"Prediction made for {len(predictions)} samples, processing_time: {processing_time:.4f}s")
        
        return PredictionResponse(
            predictions=predictions,
            processing_time=processing_time,
            timestamp=time.time()
        )
    
    except Exception as e:
        logger.error(f"Prediction error: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")


# Test the API endpoints
import requests
import json

# Define a function to test the API
def test_api():
    # Test health check
    try:
        response = requests.get("http://localhost:8000/health")
        print(f"Health check: {response.status_code}, {response.json()}")
    except requests.exceptions.ConnectionError:
        print("API is not running. Start it with: uvicorn this_app:app --reload")
    
    # Test prediction
    try:
        test_data = {"features": X_test[:5].tolist()}  # Take first 5 samples
        response = requests.post("http://localhost:8000/predict", json=test_data)
        print(f"Prediction: {response.status_code}, {response.json()}")
    except requests.exceptions.ConnectionError:
        print("API is not running. Start it with: uvicorn this_app:app --reload")

# Note: To run the API, you would typically use:
# uvicorn this_notebook_app:app --reload --port 8000
# But in a notebook, we'll just define the app

In [None]:
# Create a more advanced API with monitoring and caching
from fastapi import FastAPI, HTTPException, BackgroundTasks, Request
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List, Dict, Any
import time
import asyncio
from collections import deque
import threading
import statistics

# Advanced API with monitoring
class AdvancedModelAPI:
    def __init__(self):
        self.app = FastAPI(title="Advanced ML Model API")
        self.model = None
        self.prediction_times = deque(maxlen=1000)  # Keep last 1000 prediction times
        self.request_count = 0
        self.error_count = 0
        
        # Add CORS middleware
        self.app.add_middleware(
            CORSMiddleware,
            allow_origins=["*"],
            allow_credentials=True,
            allow_methods=["*"],
            allow_headers=["*"],
        )
        
        # Add routes
        self._add_routes()
    
    def _add_routes(self):
        @self.app.get("/")
        async def root():
            return {"message": "Advanced ML Model API"}
        
        @self.app.get("/health")
        async def health_check():
            return {
                "status": "healthy",
                "timestamp": time.time(),
                "metrics": {
                    "request_count": self.request_count,
                    "error_count": self.error_count,
                    "avg_prediction_time": statistics.mean(self.prediction_times) if self.prediction_times else 0,
                    "p95_prediction_time": float(np.percentile(self.prediction_times, 95)) if self.prediction_times else 0
                }
            }
        
        @self.app.post("/predict")
        async def predict(request: PredictionRequest):
            start_time = time.time()
            self.request_count += 1
            
            try:
                # Convert features to numpy array
                features = np.array(request.features)
                
                # Validate input shape
                if features.ndim != 2:
                    raise HTTPException(status_code=400, detail="Features must be a 2D array")
                
                # Make predictions (using our pre-trained model)
                # In a real implementation, you would load the actual model
                predictions = model.predict(features).tolist()
                
                # Record prediction time
                prediction_time = time.time() - start_time
                self.prediction_times.append(prediction_time)
                
                return {
                    "predictions": predictions,
                    "processing_time": prediction_time,
                    "timestamp": time.time()
                }
            except Exception as e:
                self.error_count += 1
                raise HTTPException(status_code=500, detail=f"Prediction failed: {str(e)}")
        
        @self.app.get("/metrics")
        async def get_metrics():
            return {
                "request_count": self.request_count,
                "error_count": self.error_count,
                "error_rate": self.error_count / max(self.request_count, 1),
                "avg_prediction_time": statistics.mean(self.prediction_times) if self.prediction_times else 0,
                "p50_prediction_time": statistics.median(self.prediction_times) if self.prediction_times else 0,
                "p95_prediction_time": float(np.percentile(self.prediction_times, 95)) if self.prediction_times else 0,
                "p99_prediction_time": float(np.percentile(self.prediction_times, 99)) if self.prediction_times else 0,
                "active_requests": 0  # In a real implementation, you'd track this
            }
    
    def run(self, host="0.0.0.0", port=8000):
        import uvicorn
        uvicorn.run(self.app, host=host, port=port)

# Create the advanced API
advanced_api = AdvancedModelAPI()

# Print the API info
print("Advanced Model API created with monitoring capabilities")
print("Endpoints:")
print("  GET  / - Root endpoint")
print("  GET  /health - Health check with metrics")
print("  POST /predict - Make predictions")
print("  GET  /metrics - Performance metrics")

In [None]:
# Demonstrate model serialization and versioning
import joblib
import hashlib
from datetime import datetime
import json

class ModelRegistry:
    """Simple model registry for versioning and management."""
    
    def __init__(self, registry_path="model_registry"):
        self.registry_path = registry_path
        os.makedirs(registry_path, exist_ok=True)
        self.registry_file = os.path.join(registry_path, "registry.json")
        self.models = self._load_registry()
    
    def _load_registry(self):
        """Load the model registry from file."""
        if os.path.exists(self.registry_file):
            with open(self.registry_file, 'r') as f:
                return json.load(f)
        return {}
    
    def _save_registry(self):
        """Save the model registry to file."""
        with open(self.registry_file, 'w') as f:
            json.dump(self.models, f, indent=2)
    
    def register_model(self, model, model_name, metrics=None, description=""):
        """Register a model in the registry."""
        # Create a unique version ID
        version_id = f"{model_name}-v{len([k for k in self.models.keys() if k.startswith(model_name)]) + 1}-{int(time.time())}"
        
        # Save the model
        model_path = os.path.join(self.registry_path, f"{version_id}.pkl")
        joblib.dump(model, model_path)
        
        # Calculate model hash
        with open(model_path, 'rb') as f:
            model_hash = hashlib.md5(f.read()).hexdigest()
        
        # Register the model
        self.models[version_id] = {
            "name": model_name,
            "version_id": version_id,
            "path": model_path,
            "hash": model_hash,
            "registered_at": datetime.now().isoformat(),
            "metrics": metrics or {},
            "description": description
        }
        
        self._save_registry()
        return version_id
    
    def get_model(self, version_id):
        """Load a model by version ID."""
        if version_id not in self.models:
            raise ValueError(f"Model {version_id} not found in registry")
        
        model_path = self.models[version_id]["path"]
        return joblib.load(model_path)
    
    def list_models(self, model_name=None):
        """List all models or models with a specific name."""
        if model_name:
            return {k: v for k, v in self.models.items() if v["name"] == model_name}
        return self.models

# Create a model registry and register our model
registry = ModelRegistry()

# Register the model we trained earlier
version_id = registry.register_model(
    model=model,
    model_name="random_forest_regressor",
    metrics={"mse": mse, "r2": r2},
    description="Random Forest Regressor trained on synthetic data"
)

print(f"Model registered with version ID: {version_id}")
print(f"Registry contents:")
for vid, info in registry.list_models().items():
    print(f"  {vid}: {info['description']}")

# Load the model back from the registry
loaded_model = registry.get_model(version_id)
print(f"Model loaded from registry: {type(loaded_model).__name__}")

In [None]:
# Create a complete deployment configuration
import yaml

# Create a deployment configuration
deployment_config = {
    "api": {
        "name": "ml-model-api",
        "version": "1.0.0",
        "host": "0.0.0.0",
        "port": 8000,
        "workers": 4,
        "timeout": 30,
        "keep_alive": 5
    },
    "model": {
        "version_id": version_id,
        "registry_path": "model_registry",
        "max_batch_size": 1000,
        "cache_enabled": True,
        "cache_size": 10000
    },
    "monitoring": {
        "metrics_enabled": True,
        "logging_level": "INFO",
        "prometheus_enabled": True,
        "prometheus_port": 9090
    },
    "security": {
        "rate_limiting": {
            "enabled": True,
            "requests_per_minute": 1000
        },
        "authentication": {
            "enabled": False,
            "api_keys": []
        }
    },
    "performance": {
        "max_memory_usage": "2GB",
        "cpu_affinity": True,
        "threading": {
            "enabled": True,
            "max_workers": 10
        }
    }
}

# Save the configuration
with open("deployment_config.yaml", "w") as f:
    yaml.dump(deployment_config, f, default_flow_style=False)

print("Deployment configuration saved to deployment_config.yaml")
print("\nConfiguration:")
print(yaml.dump(deployment_config, default_flow_style=False))

## Key Takeaways

1. **API Design**: Clean, well-documented endpoints with proper request/response models
2. **Model Serialization**: Proper model saving/loading with versioning and registry
3. **Monitoring**: Metrics collection for performance and error tracking
4. **Configuration**: Externalized configuration for different deployment environments
5. **Scalability**: Considerations for handling multiple requests and resource usage
6. **Security**: Input validation and rate limiting for production safety