In [None]:
# =============================================================================
# Stage 8 - API & Deploy: nb73_dockerfile_and_env.ipynb
# Topic: Docker containerization + environment management + CORS + production ready
# =============================================================================

# Cell1:  Shared Cache Bootstrap
import os, pathlib, torch
import sys
from datetime import datetime

# Shared cache configuration (複製到每本 notebook)
AI_CACHE_ROOT = os.getenv("AI_CACHE_ROOT", "../ai_warehouse/cache")

for k, v in {
    "HF_HOME": f"{AI_CACHE_ROOT}/hf",
    "TRANSFORMERS_CACHE": f"{AI_CACHE_ROOT}/hf/transformers",
    "HF_DATASETS_CACHE": f"{AI_CACHE_ROOT}/hf/datasets",
    "HUGGINGFACE_HUB_CACHE": f"{AI_CACHE_ROOT}/hf/hub",
    "TORCH_HOME": f"{AI_CACHE_ROOT}/torch",
}.items():
    os.environ[k] = v
    pathlib.Path(v).mkdir(parents=True, exist_ok=True)
print("[Cache]", AI_CACHE_ROOT, "| GPU:", torch.cuda.is_available())

In [None]:
# =============================================================================
# Cell 2: Environment Variables Design & .env.example
# =============================================================================


def create_env_example():
    """Create .env.example template with all required environment variables"""

    env_template = """# ragent-text-lab Environment Configuration
# ===========================================

# AI Cache & Model Configuration
AI_CACHE_ROOT=/mnt/ai/cache
MODEL_ID=Qwen/Qwen2.5-7B-Instruct
BACKEND=transformers
DEVICE_MAP=auto
TORCH_DTYPE=auto
QUANTIZATION=none

# RAG Configuration
EMBEDDING_MODEL=BAAI/bge-m3
RERANKER_MODEL=BAAI/bge-reranker-base
INDEX_PATH=indices/general.faiss
RAG_CHUNK_SIZE=800
RAG_OVERLAP=80

# API Server Configuration
API_HOST=0.0.0.0
API_PORT=8000
API_WORKERS=1
DEBUG=false

# CORS Configuration
CORS_ORIGINS=*
CORS_METHODS=GET,POST,PUT,DELETE,OPTIONS
CORS_HEADERS=*
CORS_CREDENTIALS=true

# Rate Limiting
RATE_LIMIT_REQUESTS=100
RATE_LIMIT_WINDOW=3600
RATE_LIMIT_STORAGE=memory

# Security Configuration
MAX_PROMPT_LENGTH=4096
MAX_RESPONSE_LENGTH=2048
SAFETY_CHECK=true
ALLOWED_HOSTS=*

# Tool Configuration
WEB_SEARCH_ENABLED=true
CALCULATOR_ENABLED=true
FILE_LOOKUP_ENABLED=true
FILE_LOOKUP_WHITELIST=data/,outs/

# Logging Configuration
LOG_LEVEL=info
LOG_FORMAT=json
LOG_FILE=logs/ragent.log

# Database Configuration (if needed)
# DATABASE_URL=sqlite:///./ragent.db

# External Services (optional)
# OPENAI_API_KEY=your_openai_key_here
# ANTHROPIC_API_KEY=your_anthropic_key_here
"""

    # Write to project root
    env_path = pathlib.Path(".env.example")
    env_path.write_text(env_template.strip(), encoding="utf-8")
    print(
        f"✅ Created {env_path} with {len(env_template.split('='))} configuration items"
    )

    return env_template


# Create environment template
env_content = create_env_example()

In [None]:
# =============================================================================
# Cell 3: Dockerfile Multi-stage Build
# =============================================================================


def create_dockerfile():
    """Create optimized multi-stage Dockerfile for production deployment"""

    dockerfile_content = """# ragent-text-lab Dockerfile
# Multi-stage build for optimized production image

# ===========================================
# Stage 1: Base Python Environment
# ===========================================
FROM python:3.11-slim as base

# Set environment variables
ENV PYTHONUNBUFFERED=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1

# Install system dependencies
RUN apt-get update && apt-get install -y \
    gcc \
    g++ \
    git \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Create app user for security
RUN useradd --create-home --shell /bin/bash app

# ===========================================
# Stage 2: Dependencies Installation
# ===========================================
FROM base as deps

# Set working directory
WORKDIR /app

# Copy requirements first for better layer caching
COPY requirements.txt requirements-dev.txt ./

# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt

# ===========================================
# Stage 3: Application Build
# ===========================================
FROM deps as app

# Copy application code
COPY --chown=app:app . .

# Create necessary directories
RUN mkdir -p logs outs indices data && \
    chown -R app:app /app

# Switch to app user
USER app

# Set cache directory
ENV AI_CACHE_ROOT=/app/cache
RUN mkdir -p /app/cache/{hf,torch}

# ===========================================
# Stage 4: Production Image
# ===========================================
FROM app as production

# Expose API port
EXPOSE 8000

# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD curl -f http://localhost:8000/health || exit 1

# Default command
CMD ["python", "-m", "uvicorn", "apps.api.main:app", "--host", "0.0.0.0", "--port", "8000"]
"""

    dockerfile_path = pathlib.Path("Dockerfile")
    dockerfile_path.write_text(dockerfile_content.strip(), encoding="utf-8")
    print(f"✅ Created {dockerfile_path}")

    return dockerfile_content


# Create Dockerfile
dockerfile_content = create_dockerfile()

In [None]:
# =============================================================================
# Cell 4: docker-compose.yml Complete Service Orchestration
# =============================================================================


def create_docker_compose():
    """Create docker-compose.yml for complete service orchestration"""

    compose_content = """# ragent-text-lab Docker Compose Configuration
version: '3.8'

services:
  # ===========================================
  # Main API Service
  # ===========================================
  ragent-api:
    build:
      context: .
      target: production
    container_name: ragent-api
    restart: unless-stopped
    ports:
      - "${API_PORT:-8000}:8000"
    environment:
      - AI_CACHE_ROOT=/app/cache
      - MODEL_ID=${MODEL_ID:-Qwen/Qwen2.5-7B-Instruct}
      - BACKEND=${BACKEND:-transformers}
      - API_HOST=0.0.0.0
      - API_PORT=8000
      - DEBUG=${DEBUG:-false}
      - LOG_LEVEL=${LOG_LEVEL:-info}
      - CORS_ORIGINS=${CORS_ORIGINS:-*}
      - RATE_LIMIT_REQUESTS=${RATE_LIMIT_REQUESTS:-100}
      - MAX_PROMPT_LENGTH=${MAX_PROMPT_LENGTH:-4096}
      - SAFETY_CHECK=${SAFETY_CHECK:-true}
    volumes:
      - ./data:/app/data:ro
      - ./outs:/app/outs
      - ./indices:/app/indices
      - ./logs:/app/logs
      - ai-cache:/app/cache
    networks:
      - ragent-network
    depends_on:
      - redis
    deploy:
      resources:
        limits:
          memory: 8G
        reservations:
          memory: 4G
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s

  # ===========================================
  # Gradio UI Service
  # ===========================================
  ragent-ui:
    build:
      context: .
      target: production
    container_name: ragent-ui
    restart: unless-stopped
    ports:
      - "${UI_PORT:-7860}:7860"
    environment:
      - API_URL=http://ragent-api:8000
      - GRADIO_SERVER_NAME=0.0.0.0
      - GRADIO_SERVER_PORT=7860
    command: ["python", "apps/gradio_app/app.py"]
    networks:
      - ragent-network
    depends_on:
      - ragent-api
    deploy:
      resources:
        limits:
          memory: 2G

  # ===========================================
  # Redis for Rate Limiting & Caching
  # ===========================================
  redis:
    image: redis:7-alpine
    container_name: ragent-redis
    restart: unless-stopped
    ports:
      - "6379:6379"
    volumes:
      - redis-data:/data
    networks:
      - ragent-network
    command: redis-server --appendonly yes
    deploy:
      resources:
        limits:
          memory: 512M

  # ===========================================
  # Nginx Reverse Proxy (Optional)
  # ===========================================
  nginx:
    image: nginx:alpine
    container_name: ragent-nginx
    restart: unless-stopped
    ports:
      - "80:80"
      - "443:443"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf:ro
      - ./ssl:/etc/nginx/ssl:ro
    networks:
      - ragent-network
    depends_on:
      - ragent-api
      - ragent-ui
    profiles:
      - production

# ===========================================
# Named Volumes
# ===========================================
volumes:
  ai-cache:
    driver: local
  redis-data:
    driver: local

# ===========================================
# Networks
# ===========================================
networks:
  ragent-network:
    driver: bridge
    ipam:
      config:
        - subnet: 172.20.0.0/16
"""

    compose_path = pathlib.Path("docker-compose.yml")
    compose_path.write_text(compose_content.strip(), encoding="utf-8")
    print(f"✅ Created {compose_path}")

    return compose_content


# Create docker-compose configuration
compose_content = create_docker_compose()

In [None]:
# =============================================================================
# Cell 5: CORS & Security Middleware Enhancement
# =============================================================================


def create_security_middleware():
    """Enhanced security middleware for production deployment"""

    middleware_code = '''from fastapi import FastAPI, Request, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.middleware.trustedhost import TrustedHostMiddleware
from fastapi.responses import JSONResponse
import time
import asyncio
from collections import defaultdict
import os
import logging

logger = logging.getLogger(__name__)

# ===========================================
# Rate Limiting Middleware
# ===========================================
class RateLimitMiddleware:
    def __init__(self, requests_per_window: int = 100, window_seconds: int = 3600):
        self.requests_per_window = requests_per_window
        self.window_seconds = window_seconds
        self.client_requests = defaultdict(list)

    async def __call__(self, request: Request, call_next):
        client_ip = request.client.host
        now = time.time()

        # Clean old requests
        self.client_requests[client_ip] = [
            req_time for req_time in self.client_requests[client_ip]
            if now - req_time < self.window_seconds
        ]

        # Check rate limit
        if len(self.client_requests[client_ip]) >= self.requests_per_window:
            return JSONResponse(
                status_code=429,
                content={"error": "Rate limit exceeded", "retry_after": self.window_seconds}
            )

        # Record request
        self.client_requests[client_ip].append(now)

        response = await call_next(request)
        return response

# ===========================================
# Security Headers Middleware
# ===========================================
class SecurityHeadersMiddleware:
    async def __call__(self, request: Request, call_next):
        response = await call_next(request)

        # Security headers
        response.headers["X-Content-Type-Options"] = "nosniff"
        response.headers["X-Frame-Options"] = "DENY"
        response.headers["X-XSS-Protection"] = "1; mode=block"
        response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
        response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
        response.headers["Content-Security-Policy"] = "default-src 'self'"

        return response

# ===========================================
# Setup Security for FastAPI App
# ===========================================
def setup_security(app: FastAPI):
    """Configure comprehensive security for production"""

    # CORS Configuration
    cors_origins = os.getenv("CORS_ORIGINS", "*").split(",")
    if cors_origins == ["*"]:
        logger.warning("CORS允許所有來源 - 生產環境建議限制")

    app.add_middleware(
        CORSMiddleware,
        allow_origins=cors_origins,
        allow_credentials=True,
        allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
        allow_headers=["*"],
    )

    # Trusted Host Middleware
    allowed_hosts = os.getenv("ALLOWED_HOSTS", "*").split(",")
    if allowed_hosts != ["*"]:
        app.add_middleware(
            TrustedHostMiddleware,
            allowed_hosts=allowed_hosts
        )

    # Rate Limiting
    rate_limit_requests = int(os.getenv("RATE_LIMIT_REQUESTS", "100"))
    rate_limit_window = int(os.getenv("RATE_LIMIT_WINDOW", "3600"))

    app.middleware("http")(
        RateLimitMiddleware(rate_limit_requests, rate_limit_window)
    )

    # Security Headers
    app.middleware("http")(SecurityHeadersMiddleware())

    logger.info(f"🔒 安全設定完成: CORS={len(cors_origins)} origins, Rate={rate_limit_requests}/hr")

    return app

# ===========================================
# Health Check Endpoint
# ===========================================
@app.get("/health")
async def health_check():
    """Comprehensive health check for container orchestration"""
    import psutil
    import torch

    try:
        # System metrics
        cpu_percent = psutil.cpu_percent(interval=1)
        memory = psutil.virtual_memory()
        disk = psutil.disk_usage('/')

        # GPU check
        gpu_available = torch.cuda.is_available()
        gpu_memory = None
        if gpu_available:
            gpu_memory = {
                "allocated": torch.cuda.memory_allocated(),
                "cached": torch.cuda.memory_reserved(),
                "total": torch.cuda.get_device_properties(0).total_memory
            }

        health_data = {
            "status": "healthy",
            "timestamp": time.time(),
            "system": {
                "cpu_percent": cpu_percent,
                "memory_percent": memory.percent,
                "disk_percent": disk.percent,
                "gpu_available": gpu_available,
                "gpu_memory": gpu_memory
            },
            "services": {
                "api": "running",
                "model_loaded": hasattr(app.state, 'llm_adapter'),
                "rag_index": os.path.exists("indices/general.faiss")
            }
        }

        return health_data

    except Exception as e:
        logger.error(f"Health check failed: {e}")
        return JSONResponse(
            status_code=503,
            content={"status": "unhealthy", "error": str(e)}
        )

# ===========================================
# Metrics Endpoint
# ===========================================
@app.get("/metrics")
async def metrics():
    """Prometheus-compatible metrics endpoint"""

    metrics_text = f"""# HELP ragent_requests_total Total requests processed
# TYPE ragent_requests_total counter
ragent_requests_total {{method="GET"}} {getattr(app.state, 'get_requests', 0)}
ragent_requests_total {{method="POST"}} {getattr(app.state, 'post_requests', 0)}

# HELP ragent_response_duration_seconds Response duration
# TYPE ragent_response_duration_seconds histogram
ragent_response_duration_seconds_bucket {{le="0.1"}} {getattr(app.state, 'fast_responses', 0)}
ragent_response_duration_seconds_bucket {{le="1.0"}} {getattr(app.state, 'medium_responses', 0)}
ragent_response_duration_seconds_bucket {{le="10.0"}} {getattr(app.state, 'slow_responses', 0)}

# HELP ragent_model_inference_duration_seconds Model inference time
# TYPE ragent_model_inference_duration_seconds histogram
ragent_model_inference_duration_seconds_sum {getattr(app.state, 'total_inference_time', 0)}
ragent_model_inference_duration_seconds_count {getattr(app.state, 'inference_count', 0)}
"""

    return Response(content=metrics_text, media_type="text/plain")
'''

    # Write middleware to shared_utils
    middleware_path = pathlib.Path("shared_utils/api/security.py")
    middleware_path.parent.mkdir(exist_ok=True)
    middleware_path.write_text(middleware_code, encoding="utf-8")
    print(f"✅ Created security middleware at {middleware_path}")

    return middleware_code


# Create security middleware
security_code = create_security_middleware()

In [None]:
# =============================================================================
# Cell 6: Startup Scripts & Environment Validation
# =============================================================================


def create_startup_scripts():
    """Create startup and validation scripts for deployment"""

    # Startup script
    startup_script = """#!/bin/bash
# ragent-text-lab startup script

set -e

echo "🚀 Starting ragent-text-lab deployment..."

# ===========================================
# Environment Validation
# ===========================================
validate_env() {
    echo "📋 Validating environment variables..."

    required_vars=(
        "MODEL_ID"
        "BACKEND"
        "API_PORT"
        "AI_CACHE_ROOT"
    )

    for var in "${required_vars[@]}"; do
        if [[ -z "${!var}" ]]; then
            echo "❌ Required environment variable $var is not set"
            exit 1
        fi
    done

    echo "✅ Environment validation passed"
}

# ===========================================
# Docker Setup
# ===========================================
setup_docker() {
    echo "🐳 Setting up Docker environment..."

    # Create necessary directories
    mkdir -p data outs indices logs

    # Set permissions
    chmod 755 data outs indices logs

    echo "✅ Docker setup completed"
}

# ===========================================
# Model Download
# ===========================================
download_models() {
    echo "📥 Downloading required models..."

    python3 -c "
import os
from transformers import AutoTokenizer, AutoModel
from sentence_transformers import SentenceTransformer

# Download LLM tokenizer (lightweight check)
model_id = os.getenv('MODEL_ID', 'Qwen/Qwen2.5-7B-Instruct')
print(f'Checking {model_id}...')
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
print(f'✅ {model_id} tokenizer ready')

# Download embedding model
embed_model = os.getenv('EMBEDDING_MODEL', 'BAAI/bge-m3')
print(f'Downloading {embed_model}...')
embedder = SentenceTransformer(embed_model)
print(f'✅ {embed_model} ready')

print('🎯 All models downloaded successfully')
"
}

# ===========================================
# Health Check
# ===========================================
health_check() {
    echo "🔍 Running health checks..."

    # Wait for service to start
    sleep 10

    # Check API health
    if curl -f http://localhost:${API_PORT:-8000}/health > /dev/null 2>&1; then
        echo "✅ API health check passed"
    else
        echo "❌ API health check failed"
        return 1
    fi

    # Check model loading
    if curl -f http://localhost:${API_PORT:-8000}/v1/models > /dev/null 2>&1; then
        echo "✅ Model endpoint accessible"
    else
        echo "⚠️  Model endpoint not ready (may still be loading)"
    fi
}

# ===========================================
# Main Execution
# ===========================================
main() {
    validate_env
    setup_docker

    if [[ "${DOWNLOAD_MODELS:-true}" == "true" ]]; then
        download_models
    fi

    echo "🚀 Starting services with docker-compose..."
    docker-compose up -d

    health_check

    echo "🎉 ragent-text-lab deployment completed!"
    echo "📊 API: http://localhost:${API_PORT:-8000}"
    echo "🎨 UI: http://localhost:${UI_PORT:-7860}"
    echo "📈 Health: http://localhost:${API_PORT:-8000}/health"
}

# Run main function
main "$@"
"""

    startup_path = pathlib.Path("scripts/startup.sh")
    startup_path.parent.mkdir(exist_ok=True)
    startup_path.write_text(startup_script, encoding="utf-8")
    startup_path.chmod(0o755)
    print(f"✅ Created startup script at {startup_path}")

    # Environment validation script
    validate_script = '''#!/usr/bin/env python3
"""
ragent-text-lab environment validation script
Checks all dependencies and configurations before deployment
"""

import os
import sys
import importlib
import subprocess
import pathlib
import json
from typing import Dict, List, Tuple

class EnvironmentValidator:
    def __init__(self):
        self.errors = []
        self.warnings = []
        self.success = []

    def check_python_version(self) -> bool:
        """Check Python version compatibility"""
        version = sys.version_info
        if version.major == 3 and version.minor >= 8:
            self.success.append(f"✅ Python {version.major}.{version.minor}.{version.micro}")
            return True
        else:
            self.errors.append(f"❌ Python {version.major}.{version.minor} not supported (需要 ≥ 3.8)")
            return False

    def check_required_packages(self) -> bool:
        """Check required Python packages"""
        required_packages = [
            "torch", "transformers", "sentence_transformers",
            "faiss", "fastapi", "uvicorn", "gradio",
            "pydantic", "opencc", "trafilatura"
        ]

        missing = []
        for package in required_packages:
            try:
                importlib.import_module(package.replace("-", "_"))
                self.success.append(f"✅ {package}")
            except ImportError:
                missing.append(package)

        if missing:
            self.errors.append(f"❌ Missing packages: {', '.join(missing)}")
            return False

        return True

    def check_environment_variables(self) -> bool:
        """Check required environment variables"""
        required_vars = {
            "MODEL_ID": "LLM model identifier",
            "BACKEND": "LLM backend (transformers/llama_cpp/ollama)",
            "AI_CACHE_ROOT": "Cache directory path",
            "API_PORT": "API server port"
        }

        missing = []
        for var, desc in required_vars.items():
            value = os.getenv(var)
            if value:
                self.success.append(f"✅ {var}={value}")
            else:
                missing.append(f"{var} ({desc})")

        if missing:
            self.errors.append(f"❌ Missing env vars: {', '.join(missing)}")
            return False

        return True

    def check_docker_availability(self) -> bool:
        """Check Docker and docker-compose availability"""
        try:
            result = subprocess.run(["docker", "--version"],
                                  capture_output=True, text=True, check=True)
            self.success.append(f"✅ {result.stdout.strip()}")

            result = subprocess.run(["docker-compose", "--version"],
                                  capture_output=True, text=True, check=True)
            self.success.append(f"✅ {result.stdout.strip()}")
            return True

        except (subprocess.CalledProcessError, FileNotFoundError):
            self.errors.append("❌ Docker or docker-compose not available")
            return False

    def check_file_structure(self) -> bool:
        """Check required file structure"""
        required_files = [
            "Dockerfile",
            "docker-compose.yml",
            ".env.example",
            "requirements.txt",
            "shared_utils/__init__.py",
            "apps/api/main.py"
        ]

        missing = []
        for file_path in required_files:
            if pathlib.Path(file_path).exists():
                self.success.append(f"✅ {file_path}")
            else:
                missing.append(file_path)

        if missing:
            self.errors.append(f"❌ Missing files: {', '.join(missing)}")
            return False

        return True

    def check_gpu_availability(self) -> bool:
        """Check GPU availability and CUDA"""
        try:
            import torch
            if torch.cuda.is_available():
                gpu_count = torch.cuda.device_count()
                gpu_name = torch.cuda.get_device_name(0)
                memory_mb = torch.cuda.get_device_properties(0).total_memory // 1024**2

                self.success.append(f"✅ GPU: {gpu_name} ({memory_mb}MB)")

                if memory_mb < 8192:  # 8GB
                    self.warnings.append(f"⚠️  GPU memory ({memory_mb}MB) may be insufficient for large models")

                return True
            else:
                self.warnings.append("⚠️  No GPU available - will use CPU (slower)")
                return True

        except ImportError:
            self.errors.append("❌ PyTorch not available for GPU check")
            return False

    def generate_report(self) -> Dict:
        """Generate comprehensive validation report"""
        report = {
            "timestamp": __import__("time").time(),
            "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
            "success_count": len(self.success),
            "warning_count": len(self.warnings),
            "error_count": len(self.errors),
            "success": self.success,
            "warnings": self.warnings,
            "errors": self.errors,
            "deployment_ready": len(self.errors) == 0
        }

        return report

    def run_all_checks(self) -> bool:
        """Run all validation checks"""
        print("🔍 Running ragent-text-lab environment validation...\n")

        checks = [
            ("Python Version", self.check_python_version),
            ("Required Packages", self.check_required_packages),
            ("Environment Variables", self.check_environment_variables),
            ("File Structure", self.check_file_structure),
            ("GPU Availability", self.check_gpu_availability),
            ("Docker Availability", self.check_docker_availability),
        ]

        all_passed = True
        for check_name, check_func in checks:
            print(f"📋 {check_name}...")
            try:
                passed = check_func()
                if not passed:
                    all_passed = False
            except Exception as e:
                self.errors.append(f"❌ {check_name} check failed: {e}")
                all_passed = False
            print()

        return all_passed

def main():
    validator = EnvironmentValidator()
    success = validator.run_all_checks()

    # Generate report
    report = validator.generate_report()

    # Print summary
    print("=" * 60)
    print("📊 VALIDATION SUMMARY")
    print("=" * 60)

    for item in report["success"]:
        print(item)

    for item in report["warnings"]:
        print(item)

    for item in report["errors"]:
        print(item)

    print(f"\n🎯 Result: {'READY FOR DEPLOYMENT' if success else 'NEEDS ATTENTION'}")

    # Save report
    report_path = pathlib.Path("outs/validation_report.json")
    report_path.parent.mkdir(exist_ok=True)
    with open(report_path, "w", encoding="utf-8") as f:
        json.dump(report, f, indent=2, ensure_ascii=False)

    print(f"📄 Detailed report saved to {report_path}")

    return 0 if success else 1

if __name__ == "__main__":
    sys.exit(main())
'''

    validate_path = pathlib.Path("scripts/validate_env.py")
    validate_path.write_text(validate_script, encoding="utf-8")
    validate_path.chmod(0o755)
    print(f"✅ Created validation script at {validate_path}")

    return startup_script, validate_script


# Create startup and validation scripts
startup_content, validate_content = create_startup_scripts()

In [None]:
# =============================================================================
# Cell 7: Nginx Configuration for Production
# =============================================================================


def create_nginx_config():
    """Create nginx configuration for production deployment"""

    nginx_config = """# ragent-text-lab Nginx Configuration
# Production-ready reverse proxy with SSL support

events {
    worker_connections 1024;
}

http {
    # Basic Settings
    sendfile on;
    tcp_nopush on;
    tcp_nodelay on;
    keepalive_timeout 65;
    types_hash_max_size 2048;

    # MIME Types
    include /etc/nginx/mime.types;
    default_type application/octet-stream;

    # Logging
    log_format main '$remote_addr - $remote_user [$time_local] "$request" '
                    '$status $body_bytes_sent "$http_referer" '
                    '"$http_user_agent" "$http_x_forwarded_for"';

    access_log /var/log/nginx/access.log main;
    error_log /var/log/nginx/error.log;

    # Gzip Compression
    gzip on;
    gzip_vary on;
    gzip_min_length 10240;
    gzip_proxied expired no-cache no-store private must-revalidate auth;
    gzip_types
        text/plain
        text/css
        text/xml
        text/javascript
        application/json
        application/javascript
        application/xml+rss
        application/atom+xml
        image/svg+xml;

    # Rate Limiting
    limit_req_zone $binary_remote_addr zone=api:10m rate=10r/s;
    limit_req_zone $binary_remote_addr zone=ui:10m rate=5r/s;

    # Upstream Services
    upstream ragent_api {
        server ragent-api:8000;
        keepalive 32;
    }

    upstream ragent_ui {
        server ragent-ui:7860;
        keepalive 16;
    }

    # ===========================================
    # HTTP to HTTPS Redirect
    # ===========================================
    server {
        listen 80;
        server_name _;
        return 301 https://$host$request_uri;
    }

    # ===========================================
    # Main HTTPS Server
    # ===========================================
    server {
        listen 443 ssl http2;
        server_name _;

        # SSL Configuration
        ssl_certificate /etc/nginx/ssl/cert.pem;
        ssl_certificate_key /etc/nginx/ssl/key.pem;
        ssl_protocols TLSv1.2 TLSv1.3;
        ssl_ciphers ECDHE-RSA-AES256-GCM-SHA512:DHE-RSA-AES256-GCM-SHA512:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES256-GCM-SHA384;
        ssl_prefer_server_ciphers off;

        # Security Headers
        add_header Strict-Transport-Security "max-age=63072000" always;
        add_header X-Frame-Options DENY;
        add_header X-Content-Type-Options nosniff;
        add_header X-XSS-Protection "1; mode=block";
        add_header Referrer-Policy "strict-origin-when-cross-origin";

        # ===========================================
        # API Routes
        # ===========================================
        location /api/ {
            limit_req zone=api burst=20 nodelay;

            proxy_pass http://ragent_api/;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;

            # Timeouts
            proxy_connect_timeout 60s;
            proxy_send_timeout 300s;
            proxy_read_timeout 300s;

            # Buffering
            proxy_buffering on;
            proxy_buffer_size 4k;
            proxy_buffers 8 4k;
            proxy_busy_buffers_size 8k;

            # WebSocket Support
            proxy_http_version 1.1;
            proxy_set_header Upgrade $http_upgrade;
            proxy_set_header Connection "upgrade";
        }

        # Health Check (no rate limit)
        location /health {
            proxy_pass http://ragent_api/health;
            proxy_set_header Host $host;
            access_log off;
        }

        # Metrics (restrict access)
        location /metrics {
            allow 127.0.0.1;
            allow 10.0.0.0/8;
            allow 172.16.0.0/12;
            allow 192.168.0.0/16;
            deny all;

            proxy_pass http://ragent_api/metrics;
            proxy_set_header Host $host;
        }

        # ===========================================
        # UI Routes
        # ===========================================
        location / {
            limit_req zone=ui burst=10 nodelay;

            proxy_pass http://ragent_ui/;
            proxy_set_header Host $host;
            proxy_set_header X-Real-IP $remote_addr;
            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
            proxy_set_header X-Forwarded-Proto $scheme;

            # WebSocket Support for Gradio
            proxy_http_version 1.1;
            proxy_set_header Upgrade $http_upgrade;
            proxy_set_header Connection "upgrade";

            # Timeouts
            proxy_connect_timeout 60s;
            proxy_send_timeout 60s;
            proxy_read_timeout 60s;
        }

        # Static Files (if any)
        location /static/ {
            expires 1y;
            add_header Cache-Control "public, immutable";
            try_files $uri =404;
        }

        # Favicon
        location = /favicon.ico {
            access_log off;
            log_not_found off;
            expires 1y;
        }

        # Security.txt
        location = /.well-known/security.txt {
            return 200 "Contact: security@yourcompany.com\nExpires: 2025-12-31T23:59:59.000Z\n";
            add_header Content-Type text/plain;
        }
    }
}
"""

    nginx_path = pathlib.Path("nginx.conf")
    nginx_path.write_text(nginx_config.strip(), encoding="utf-8")
    print(f"✅ Created nginx configuration at {nginx_path}")

    return nginx_config


# Create nginx configuration
nginx_content = create_nginx_config()

In [None]:
# =============================================================================
# Cell 8: Smoke Test - Container Startup & Validation
# =============================================================================


def run_deployment_smoke_test():
    """Comprehensive smoke test for deployment setup"""

    print("🧪 Running Deployment Smoke Test")
    print("=" * 50)

    # Test 1: Environment validation
    print("\n📋 Test 1: Environment Validation")
    try:
        # Load environment from .env.example
        env_vars = {}
        env_example_path = pathlib.Path(".env.example")
        if env_example_path.exists():
            for line in env_example_path.read_text().split("\n"):
                if "=" in line and not line.startswith("#"):
                    key, value = line.split("=", 1)
                    env_vars[key.strip()] = value.strip()

        required_keys = ["MODEL_ID", "BACKEND", "API_PORT", "AI_CACHE_ROOT"]
        missing = [k for k in required_keys if k not in env_vars]

        if missing:
            print(f"❌ Missing environment variables: {missing}")
        else:
            print("✅ Environment template validation passed")

    except Exception as e:
        print(f"❌ Environment validation failed: {e}")

    # Test 2: Docker files validation
    print("\n🐳 Test 2: Docker Configuration")
    docker_files = ["Dockerfile", "docker-compose.yml"]
    for file_name in docker_files:
        file_path = pathlib.Path(file_name)
        if file_path.exists():
            size_kb = file_path.stat().st_size / 1024
            print(f"✅ {file_name} ({size_kb:.1f}KB)")
        else:
            print(f"❌ Missing {file_name}")

    # Test 3: Security configuration test
    print("\n🔒 Test 3: Security Configuration")
    try:
        security_path = pathlib.Path("shared_utils/api/security.py")
        if security_path.exists():
            security_content = security_path.read_text()
            security_features = [
                "RateLimitMiddleware",
                "SecurityHeadersMiddleware",
                "CORSMiddleware",
                "TrustedHostMiddleware",
            ]

            found_features = [f for f in security_features if f in security_content]
            print(
                f"✅ Security features: {len(found_features)}/{len(security_features)}"
            )

            if len(found_features) < len(security_features):
                missing = set(security_features) - set(found_features)
                print(f"⚠️  Missing security features: {missing}")
        else:
            print("❌ Security middleware not found")

    except Exception as e:
        print(f"❌ Security validation failed: {e}")

    # Test 4: Startup scripts validation
    print("\n🚀 Test 4: Startup Scripts")
    scripts = [
        ("scripts/startup.sh", "startup script"),
        ("scripts/validate_env.py", "environment validator"),
    ]

    for script_path, description in scripts:
        path = pathlib.Path(script_path)
        if path.exists():
            # Check if executable
            is_executable = path.stat().st_mode & 0o111
            status = "✅" if is_executable else "⚠️ "
            print(f"{status} {description}: {script_path}")
        else:
            print(f"❌ Missing {description}: {script_path}")

    # Test 5: Production readiness checklist
    print("\n🎯 Test 5: Production Readiness")
    production_items = [
        ("Health check endpoint", "/health"),
        ("Metrics endpoint", "/metrics"),
        ("CORS configuration", "CORS_ORIGINS"),
        ("Rate limiting", "RATE_LIMIT_REQUESTS"),
        ("Security headers", "SecurityHeadersMiddleware"),
        ("Nginx configuration", "nginx.conf"),
        ("SSL support", "ssl_certificate"),
        ("Docker multi-stage", "FROM.*as.*production"),
    ]

    ready_count = 0
    for item, check in production_items:
        # Simple existence checks for production features
        found = False

        if check.startswith("/"):
            # Check if endpoint exists in security.py
            security_path = pathlib.Path("shared_utils/api/security.py")
            if security_path.exists() and check in security_path.read_text():
                found = True
        elif check.endswith(".conf"):
            # Check if config file exists
            found = pathlib.Path(check).exists()
        elif "=" not in check and check in globals().get("security_code", ""):
            # Check for middleware/feature in security code
            found = True
        elif check in env_vars:
            # Check for environment variable
            found = True
        elif "FROM" in check:
            # Check Dockerfile for multi-stage
            dockerfile = pathlib.Path("Dockerfile")
            if dockerfile.exists() and "as production" in dockerfile.read_text():
                found = True

        if found:
            ready_count += 1
            print(f"✅ {item}")
        else:
            print(f"❌ {item}")

    # Final assessment
    print(f"\n📊 Production Readiness: {ready_count}/{len(production_items)} items")

    if ready_count >= len(production_items) * 0.8:  # 80% threshold
        print("🎉 DEPLOYMENT READY - 可以進行生產部署")
        deployment_status = "READY"
    else:
        print("⚠️  NEEDS ATTENTION - 需要完善配置後部署")
        deployment_status = "NEEDS_WORK"

    # Generate deployment report
    report = {
        "timestamp": __import__("time").time(),
        "deployment_status": deployment_status,
        "readiness_score": f"{ready_count}/{len(production_items)}",
        "environment_vars_count": len(env_vars),
        "docker_files_ready": len(
            [f for f in docker_files if pathlib.Path(f).exists()]
        ),
        "security_features": len(found_features) if "found_features" in locals() else 0,
        "production_readiness": ready_count / len(production_items),
    }

    report_path = pathlib.Path("outs/deployment_report.json")
    report_path.parent.mkdir(exist_ok=True)
    with open(report_path, "w", encoding="utf-8") as f:
        __import__("json").dump(report, f, indent=2)

    print(f"\n📄 Deployment report saved to {report_path}")

    return deployment_status == "READY"


# Run comprehensive smoke test
deployment_ready = run_deployment_smoke_test()

print(
    f"""

🎯 **Stage 8 Complete: Docker Deployment & Production Setup**

## 完成項目 Completed Features
✅ **環境配置**: .env.example 模板與變數管理
✅ **容器化**: 多階段 Dockerfile 與 docker-compose 編排
✅ **安全防護**: CORS、速率限制、安全標頭中介軟體
✅ **監控健檢**: /health 與 /metrics 端點
✅ **生產部署**: Nginx 反向代理與 SSL 支援
✅ **自動化腳本**: 啟動與環境驗證腳本

## 核心概念 Key Concepts
- **Multi-stage Docker builds** 最佳化映像檔大小與安全性
- **Environment-driven configuration** 環境變數驅動的配置管理
- **Production security middleware** 生產級安全防護機制
- **Container orchestration** 多服務容器編排
- **Health monitoring & metrics** 服務健康監控與指標

## 常見陷阱 Pitfalls
⚠️ **記憶體限制**: 容器記憶體配置需考慮模型大小 (建議 ≥8GB)
⚠️ **快取掛載**: AI_CACHE_ROOT 需正確掛載避免重複下載
⚠️ **權限問題**: 容器內檔案權限與主機用戶映射
⚠️ **網路配置**: 容器間通訊與外部存取端口設定
⚠️ **SSL 憑證**: 生產環境需配置有效 SSL 憑證

## 使用時機 When to Use
🎯 **生產部署**: 正式環境的容器化部署
🎯 **開發環境**: 統一的開發環境配置
🎯 **CI/CD**: 自動化建構與部署流程
🎯 **擴展部署**: 多實例負載均衡部署
🎯 **雲端部署**: 雲平台容器服務部署

## 下一步 Next Steps
🚀 設定 SSL 憑證與域名
🚀 配置監控告警系統 (Prometheus + Grafana)
🚀 建立 CI/CD 自動化流程
🚀 效能調優與擴展策略
🚀 備份與災害復原計畫

部署狀態: {'🎉 READY FOR PRODUCTION' if deployment_ready else '⚠️ NEEDS CONFIGURATION'}
"""
)

# 🎯 End of nb73_dockerfile_and_env.ipynb