# CI/CD para ML - Caso Integrador: HealthCheck

## BLOQUE 1 - Configuración del Pipeline CI/CD con Triggers

In [4]:
yaml_example = """
name: HealthCheck ML Pipeline

# TRIGGERS: push, pull_request, schedule
on:
  push:
    branches: [main]
    paths:
      - 'src/**'
      - 'model/**'

  pull_request:
    branches: [main]
    types: [opened, synchronize]

  schedule:
    - cron: '0 2 * * 0'  # Domingos 2:00 AM para monitoreo

env:
  PYTHON_VERSION: '3.10'

jobs:
  # JOB 1: Tests automáticos (disparado por push y PR)
  test:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout del repositorio
        uses: actions/checkout@v3

      - name: Setup Python
        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.PYTHON_VERSION }}

      - name: Instalar dependencias
        run: |
          pip install --upgrade pip
          pip install -r requirements.txt
          pip install pytest

      - name: Ejecutar tests de inferencia
        run: pytest src/inference.py -v

  # JOB 2: Monitoreo de drift (solo para schedule)
  monitor:
    runs-on: ubuntu-latest
    if: github.event_name == 'schedule'
    steps:
      - name: Checkout del repositorio
        uses: actions/checkout@v3

      - name: Setup Python
        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.PYTHON_VERSION }}

      - name: Ejecutar monitoreo de drift
        run: python src/model_monitoring.py
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}

      - name: Subir reporte de drift
        uses: actions/upload-artifact@v3
        with:
          name: drift-report
          path: drift_report.json

  # JOB 3: Despliegue (solo en push a main, después de tests exitosos)
  deploy:
    runs-on: ubuntu-latest
    needs: test
    if: github.event_name == 'push' && github.ref == 'refs/heads/main'
    steps:
      - name: Checkout del repositorio
        uses: actions/checkout@v3

      - name: Build Docker image
        run: docker build -t healthcheck-model:latest .

      - name: Push to ECR
        run: |
          aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin $ECR_REGISTRY
          docker tag healthcheck-model:latest $ECR_REGISTRY/healthcheck:latest
          docker push $ECR_REGISTRY/healthcheck:latest
        env:
          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
          ECR_REGISTRY: ${{ secrets.ECR_REGISTRY }}

  # JOB 4: Quality Gates con SonarCloud
  quality:
    runs-on: ubuntu-latest
    if: github.event_name == 'pull_request'
    steps:
      - name: Checkout del repositorio
        uses: actions/checkout@v3
        with:
          fetch-depth: 0  # Full history for better analysis

      - name: SonarCloud Scan
        uses: SonarSource/sonarcloud-github-action@master
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
        with:
          args: >
            -Dsonar.projectKey=HealthCheck
            -Dsonar.organization=tu-org
            -Dsonar.sources=src
            -Dsonar.python.coverage.reportPaths=coverage.xml
"""

print(yaml_example)


name: HealthCheck ML Pipeline

# TRIGGERS: push, pull_request, schedule
on:
  push:
    branches: [main]
    paths:
      - 'src/**'
      - 'model/**'
  
  pull_request:
    branches: [main]
    types: [opened, synchronize]
  
  schedule:
    - cron: '0 2 * * 0'  # Domingos 2:00 AM para monitoreo

env:
  PYTHON_VERSION: '3.10'

jobs:
  # JOB 1: Tests automáticos (disparado por push y PR)
  test:
    runs-on: ubuntu-latest
    steps:
      - name: Checkout del repositorio
        uses: actions/checkout@v3

      - name: Setup Python
        uses: actions/setup-python@v4
        with:
          python-version: ${{ env.PYTHON_VERSION }}

      - name: Instalar dependencias
        run: |
          pip install --upgrade pip
          pip install -r requirements.txt
          pip install pytest

      - name: Ejecutar tests de inferencia
        run: pytest src/inference.py -v

  # JOB 2: Monitoreo de drift (solo para schedule)
  monitor:
    runs-on: ubuntu-latest
    if: github.event_na

## BLOQUE 2 - Test de inferencia del modelo

In [3]:
# src/inference.py
#import sys
#sys.path.append('src')
from inference import predict_risk

def test_predict_risk():
    """
    Test que valida que el modelo responde correctamente
    con la estructura esperada.

    Este test se ejecuta automáticamente en cada push y PR.
    """
    sample_input = {
        "age": 35,
        "gender": "male",
        "smoker": "no",
        "bmi": 26.7
    }

    result = predict_risk(sample_input)

    # Validaciones
    assert "risk" in result, "La respuesta debe contener la clave 'risk'"
    assert isinstance(result["risk"], bool), "El riesgo debe ser un booleano"
    assert "confidence" in result, "La respuesta debe incluir confianza"
    assert 0 <= result["confidence"] <= 1, "Confianza entre 0 y 1"

    print(f"Test exitoso: {result}")

def test_predict_risk_invalid_input():
    """
    Test que verifica el manejo de inputs inválidos
    """
    invalid_input = {"age": -5}

    try:
        result = predict_risk(invalid_input)
        assert False, "Debería haber lanzado una excepción"
    except ValueError as e:
        print(f"Error manejado correctamente: {e}")

# Ejecutar si se corre directamente
if __name__ == "__main__":
    test_predict_risk()
    test_predict_risk_invalid_input()

Test exitoso: {'risk': True, 'confidence': 0.5791478427996359}
Error manejado correctamente: columns are missing: {'gender', 'bmi', 'smoker'}


## BLOQUE 3 - Monitoreo de drift (ejecutado semanalmente)

In [12]:
# src/model_monitoring.py
import json
import numpy as np
from datetime import datetime


def compute_drift_metrics():
    """
    Calcula métricas de drift entre distribución de entrenamiento
    y datos de producción.

    Este script se ejecuta automáticamente cada domingo a las 2 AM
    mediante el trigger 'schedule' del workflow.
    """
    # Simular cálculo de métricas (en producción: comparar distribuciones reales)
    psi = np.random.uniform(0.1, 0.4)  # Population Stability Index
    ks = np.random.uniform(0.1, 0.4)   # Kolmogorov-Smirnov

    # Umbrales de alerta
    drift_detected = psi > 0.25 or ks > 0.3

    result = {
        "timestamp": datetime.now().isoformat(),
        "metrics": {
            "psi": round(psi, 3),
            "ks": round(ks, 3)
        },
        "drift_detected": drift_detected,
        "status": "DRIFT DETECTED" if drift_detected else "OK",
        "recommendation": "Considerar reentrenamiento" if drift_detected else "No action needed"
    }

    # Guardar reporte
    with open("drift_report.json", "w") as f:
        json.dump(result, f, indent=2)

    print(json.dumps(result, indent=2))

    # Enviar alerta si hay drift
    #if drift_detected:
        #send_slack_alert(result)  # Implementar según necesidad

    return result

if __name__ == "__main__":
    compute_drift_metrics()

{
  "timestamp": "2025-11-23T19:01:59.756816",
  "metrics": {
    "psi": 0.336,
    "ks": 0.177
  },
  "drift_detected": true,
  "status": "DRIFT DETECTED",
  "recommendation": "Considerar reentrenamiento"
}


## BLOQUE 4 - Dockerfile para despliegue

In [5]:
# Dockerfile
dockerfile = """
FROM python:3.10-slim

WORKDIR /app

# Copiar modelo y código
COPY model/health_risk_model.pkl ./model/
COPY src/ ./src/
COPY requirements.txt .

# Instalar dependencias
RUN pip install --no-cache-dir -r requirements.txt

# Exponer puerto
EXPOSE 8000

# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
  CMD curl -f http://localhost:8000/health || exit 1

# Comando de inicio
CMD ["uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "8000"]
"""

print(dockerfile)

# Este Dockerfile es usado por el job 'deploy' del workflow
# cuando hay un push exitoso a la rama main


FROM python:3.10-slim

WORKDIR /app

# Copiar modelo y código
COPY model/health_risk_model.pkl ./model/
COPY src/ ./src/
COPY requirements.txt .

# Instalar dependencias
RUN pip install --no-cache-dir -r requirements.txt

# Exponer puerto
EXPOSE 8000

# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3   CMD curl -f http://localhost:8000/health || exit 1

# Comando de inicio
CMD ["uvicorn", "src.api:app", "--host", "0.0.0.0", "--port", "8000"]



## BLOQUE 5 - Calidad de código con SonarCloud

In [6]:
# Este análisis se ejecuta automáticamente en cada Pull Request
# mediante el job 'quality' del workflow.

# SonarCloud revisa:
# - Bugs potenciales
# - Vulnerabilidades de seguridad
# - Code smells
# - Duplicación de código
# - Cobertura de tests

# Ejemplo de configuración en sonar-project.properties:
sonar_properties = """
sonar.projectKey=HealthCheck
sonar.organization=tu-organizacion
sonar.projectName=HealthCheck ML

# Paths
sonar.sources=src
sonar.tests=tests
sonar.python.coverage.reportPaths=coverage.xml

# Exclusions
sonar.exclusions=**/*_test.py,**/test_*.py

# Quality Gates
sonar.qualitygate.wait=true
"""

print(sonar_properties)

# El workflow falla si SonarCloud detecta issues críticos,
# previniendo merge de código con problemas de calidad


sonar.projectKey=HealthCheck
sonar.organization=tu-organizacion
sonar.projectName=HealthCheck ML

# Paths
sonar.sources=src
sonar.tests=tests
sonar.python.coverage.reportPaths=coverage.xml

# Exclusions
sonar.exclusions=**/*_test.py,**/test_*.py

# Quality Gates
sonar.qualitygate.wait=true



In [2]:
############################