In [None]:
"""
api/server.py - FastAPI Inference Server.

This module provides the functionality to create and run a FastAPI server
that serves the LLM chatbot model from the MLflow Model Registry. It defines
the API endpoints and handles the loading of the model at startup.
"""
import os
import logging
from typing import Optional

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
from pydantic import BaseModel
import mlflow.pyfunc

# Set up logging for this module
logger = logging.getLogger(__name__)

# Pydantic models for API request and response bodies
class ChatRequest(BaseModel):
    """Schema for the chatbot request body."""
    question: str

class ChatResponse(BaseModel):
    """Schema for the chatbot response body."""
    answer: str
    model: Optional[str] = None

def create_app(model_name: str) -> FastAPI:
    """
    Creates and configures the FastAPI application.

    This function handles the application setup, including CORS middleware,
    endpoint definitions, and most importantly, loading the production model
    from MLflow at application startup.

    Args:
        model_name: The name of the model to serve from the MLflow registry.

    Returns:
        The configured FastAPI application instance.
    """
    logger.info("Initializing FastAPI application...")
    app = FastAPI(title="LLMOps Chatbot API", version="1.0.0")

    # Enable CORS for all origins to allow local development and broad access
    app.add_middleware(
        CORSMiddleware,
        allow_origins=["*"],
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )

    # --- Application Startup Event ---
    @app.on_event("startup")
    def load_model():
        """
        Loads the production model from the MLflow Registry at startup.
        """
        logger.info(f"Loading production model from MLflow: '{model_name}'...")
        try:
            model_uri = f"models:/{model_name}/Production"
            app.state.model = mlflow.pyfunc.load_model(model_uri)
            logger.info("Model loaded successfully.")
        except Exception as e:
            logger.error(f"Failed to load production model: {e}", exc_info=True)
            # Re-raise the exception to prevent the application from starting
            raise RuntimeError("Application failed to start due to model loading error.") from e

    # --- API Endpoints ---
    @app.get("/health")
    def health_check():
        """A simple health check endpoint."""
        return {"status": "ok"}

    @app.post("/chat", response_model=ChatResponse)
    def chat_endpoint(req: ChatRequest):
        """
        Main chat endpoint to receive a question and return a response.
        """
        # The MLflow pyfunc model expects a pandas DataFrame or dict
        result = app.state.model.predict({"question": req.question})

        # The result is a list of strings, as defined by our pyfunc wrapper
        answer = result[0] if isinstance(result, list) else str(result)

        # Return the response, including the model name if available
        return ChatResponse(
            answer=answer,
            model=os.getenv("MLFLOW_DEPLOYED_MODEL", model_name)
        )

    return app

def run_server(model_name: str, host: str, port: int):
    """
    Runs the FastAPI server using Uvicorn.

    Args:
        model_name: The name of the model to serve.
        host: The host address to bind the server to.
        port: The port to listen on.
    """
    app = create_app(model_name)
    uvicorn.run(app, host=host, port=port)