In [1]:
# === C5.3.2 — Génère les fichiers d'inférence et API ===
from pathlib import Path
Path("serve").mkdir(exist_ok=True)

# -------- serve/infer.py --------
(Path("serve")/"infer.py").write_text(r'''
from pathlib import Path
import joblib, numpy as np
from typing import List, Tuple, Optional

# =========================
# SENTIMENT (TF-IDF -> LR)
# =========================

def _find_sentiment_artifact() -> Path:
    cands = []
    cands += sorted(Path("models").glob("clf_logreg_chi2_gridcal_final_*.joblib"))
    cands += [Path("artifacts")/"sentiment_grid_best_calibrated.joblib"]
    cands += sorted(Path("models").glob("clf_logreg_chi2_final_*.joblib"))
    cands = [p for p in cands if p.exists()]
    if not cands:
        raise FileNotFoundError("No sentiment artifact found.")
    return cands[-1]

def load_sentiment_bundle(path: Optional[str]=None):
    p = Path(path) if path else _find_sentiment_artifact()
    obj = joblib.load(p)
    # bundle may be dict or pipeline
    bundle = {"path": str(p)}
    if isinstance(obj, dict):
        bundle.update(obj)
    else:
        bundle["model"] = obj
    # Optional: load TF-IDF vectorizer if saved
    tfidf = None
    for cand in ["models/tfidf_vectorizer.joblib", "artifacts/tfidf_vectorizer.joblib"]:
        q = Path(cand)
        if q.exists():
            tfidf = joblib.load(q)
            break
    bundle["tfidf"] = tfidf  # may be None if your pipeline embeds it
    return bundle

def predict_sentiment_text(texts: List[str], bundle=None):
    """Predict from raw texts. If bundle.model is a full Pipeline it will handle vectorization.
       Else we require a saved TF-IDF vectorizer (bundle['tfidf'])."""
    b = bundle or load_sentiment_bundle()
    clf = b.get("model_cal") or b.get("model_uncal") or b.get("model")
    if hasattr(clf, "predict") and hasattr(clf, "predict_proba") and not b.get("tfidf"):
        # assume pipeline includes vectorizer
        y = clf.predict(texts)
        proba = clf.predict_proba(texts)
        return y.tolist(), proba[:,1].tolist()
    # otherwise we need the vectorizer
    tfidf = b.get("tfidf", None)
    if tfidf is None:
        raise RuntimeError("No TF-IDF in bundle and model is not a full pipeline. Save your vectorizer as models/tfidf_vectorizer.joblib")
    X = tfidf.transform(texts)
    y = clf.predict(X)
    proba = clf.predict_proba(X)[:,1] if hasattr(clf, "predict_proba") else None
    return y.tolist(), (proba.tolist() if proba is not None else None)

# =========================
# EMOTIONS (SBERT -> OVR)
# =========================

class OVRListWrapper:
    def __init__(self, models):
        self.models = models
    def predict_proba(self, X: np.ndarray, batch: int = 8192) -> np.ndarray:
        n, L = X.shape[0], len(self.models)
        out = np.empty((n, L), dtype=np.float32)
        for i in range(0, n, batch):
            j = min(n, i+batch)
            Xb = np.asarray(X[i:j], dtype=np.float32)
            for k, m in enumerate(self.models):
                try:
                    out[i:j, k] = m.predict_proba(Xb)[:,1]
                except Exception:
                    out[i:j, k] = m.decision_function(Xb)
        return out

def _find_emotions_artifact() -> Path:
    c = []
    c += [Path("artifacts")/"emo_grid_best_bundle.joblib"]
    c += sorted(Path("artifacts").glob("emo_sgd_partial_models_resumed_final.joblib"))
    c += sorted(Path("artifacts").glob("emo_sgd_partial_models_final.joblib"))
    c = [p for p in c if p.exists()]
    if not c:
        raise FileNotFoundError("No emotions artifact found.")
    return c[0]

def load_emotions_runtime(path: Optional[str]=None):
    import joblib, json
    p = Path(path) if path else _find_emotions_artifact()
    obj = joblib.load(p)
    thresholds = None
    labels = None
    if isinstance(obj, dict):
        est = obj.get("best_estimator", obj.get("estimator", None))
        thresholds = obj.get("thresholds", thresholds)
        labels = obj.get("label_names_kept", labels)
        if est is None:
            raise ValueError(f"Bundle {p.name} has no best_estimator")
    elif isinstance(obj, list):
        est = OVRListWrapper(obj)
        # try thresholds file
        from glob import glob
        cand_thr = glob("artifacts/emo_thr_mean_floor*.joblib")
        if cand_thr:
            thresholds = joblib.load(cand_thr[-1])
    else:
        est = obj
    return est, thresholds, labels

def encode_sbert(texts: List[str], model_name: str = "all-MiniLM-L6-v2"):
    from sentence_transformers import SentenceTransformer
    import torch
    dev = "cuda" if torch.cuda.is_available() else "cpu"
    sbert = SentenceTransformer(model_name, device=dev)
    X = sbert.encode(texts, batch_size=256, convert_to_numpy=True, normalize_embeddings=True).astype("float32")
    return X

def predict_emotions_text(texts: List[str], runtime=None, thresholds=None, label_names=None):
    est, thr, labels = runtime if runtime else load_emotions_runtime()
    thr = thresholds if thresholds is not None else thr
    labels = label_names if label_names is not None else labels
    X = encode_sbert(texts)
    proba = est.predict_proba(X)
    if thr is None:
        thr = 0.5
    thr_arr = np.asarray(thr) if not np.isscalar(thr) else np.full(proba.shape[1], thr, dtype=float)
    Y = (proba >= thr_arr.reshape(1,-1)).astype(int)
    return (labels or [f"label_{i}" for i in range(proba.shape[1])]), Y.tolist(), proba.tolist()
''', encoding="utf-8")

# -------- serve/app.py --------
(Path("serve")/"app.py").write_text(r'''
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List, Optional
from serve.infer import load_sentiment_bundle, predict_sentiment_text, load_emotions_runtime, predict_emotions_text

app = FastAPI(title="Amazon Reviews — Sentiment & Emotions API", version="1.0")

class SentimentIn(BaseModel):
    texts: List[str]

class EmotionsIn(BaseModel):
    texts: List[str]

@app.get("/health")
def health():
    return {"status": "ok"}

@app.post("/predict/sentiment")
def predict_sentiment(payload: SentimentIn):
    bundle = load_sentiment_bundle()
    y, proba = predict_sentiment_text(payload.texts, bundle=bundle)
    return {"labels": y, "probas": proba}

@app.post("/predict/emotions")
def predict_emotions(payload: EmotionsIn):
    runtime = load_emotions_runtime()
    labels, Y, proba = predict_emotions_text(payload.texts, runtime=runtime)
    return {"labels": labels, "multi_hot": Y, "probas": proba}
''', encoding="utf-8")

# -------- serve/requirements.txt --------
(Path("serve")/"requirements.txt").write_text("""fastapi
uvicorn
joblib
scikit-learn
numpy
pandas
sentence-transformers
torch
""", encoding="utf-8")

# -------- Dockerfile --------
Path("Dockerfile").write_text(r'''
FROM python:3.11-slim

WORKDIR /app

# System deps (optional but useful for torch CPU wheels)
RUN apt-get update && apt-get install -y --no-install-recommends gcc g++ && rm -rf /var/lib/apt/lists/*

# Copy code and artifacts
COPY serve ./serve
COPY artifacts ./artifacts
COPY models ./models
COPY docs ./docs

# Install python deps
RUN pip install --no-cache-dir -r serve/requirements.txt

EXPOSE 8000
CMD ["uvicorn", "serve.app:app", "--host", "0.0.0.0", "--port", "8000"]
''', encoding="utf-8")

# -------- tests/test_smoke.py --------
Path("tests").mkdir(exist_ok=True, parents=True)
Path("tests/test_smoke.py").write_text(r'''
import sys, os
sys.path.append(os.path.abspath("."))

def test_import():
    import serve.infer as inf
    assert hasattr(inf, "load_sentiment_bundle")

def test_health_like():
    # Just ensure loaders don't crash if artifacts are present
    import serve.infer as inf
    try:
        b = inf.load_sentiment_bundle()
    except Exception:
        b = None
    try:
        e = inf.load_emotions_runtime()
    except Exception:
        e = None
    assert (b is not None) or (e is not None)
''', encoding="utf-8")

# -------- .github/workflows/ci.yml --------
Path(".github").mkdir(exist_ok=True)
Path(".github/workflows").mkdir(exist_ok=True)
Path(".github/workflows/ci.yml").write_text(r'''
name: ci

on:
  push:
    branches: [ main, master ]
  pull_request:

jobs:
  test-build:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: "3.11"
      - name: Install test deps
        run: |
          python -m pip install --upgrade pip
          pip install pytest
          pip install -r serve/requirements.txt
      - name: Run tests
        run: pytest -q

  docker:
    if: github.ref == 'refs/heads/main'
    needs: test-build
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Build image
        run: docker build -t reviews-api:latest .
      # - name: Login & Push (optional)
      #   run: |
      #     echo "${{ secrets.DOCKERHUB_TOKEN }}" | docker login -u "${{ secrets.DOCKERHUB_USER }}" --password-stdin
      #     docker tag reviews-api:latest ${{ secrets.DOCKERHUB_USER }}/reviews-api:latest
      #     docker push ${{ secrets.DOCKERHUB_USER }}/reviews-api:latest
''', encoding="utf-8")

print("✅ Fichiers générés: serve/infer.py, serve/app.py, serve/requirements.txt, Dockerfile, tests/test_smoke.py, .github/workflows/ci.yml")

✅ Fichiers générés: serve/infer.py, serve/app.py, serve/requirements.txt, Dockerfile, tests/test_smoke.py, .github/workflows/ci.yml
