# But du notebook
Ce notebook a pour but de faire la chaine de traitement complet pour pouvoir avoir les indicateurs rapidement. 

Ce notebook sert de **client** pour piloter ton **API (Application Programming Interface)** FastAPI : il enchaîne les phases du projet via appels HTTP, récupère les résultats, et affiche des tableaux/graphes.

Notebook client pour piloter l'API FastAPI en respectant l'architecture de ton projet :
- `app/routers/*` : routes HTTP
- `app/services/*` : logique métier
- `app/repositories/dataset_store.py` : stockage/caching datasets






## 0) Configuration et helpers
- Définition de `BASE_URL`
- Helpers `GET/POST`
- Fonctions utilitaires pour convertir des réponses API en `pandas.DataFrame`


In [73]:
from __future__ import annotations

import os
import json
from typing import Any, Dict, Optional, Tuple

import requests
import pandas as pd

pd.set_option("display.max_columns", 200)
pd.set_option("display.width", 160)

# -------------------------
# Configuration
# -------------------------
# Bonnes pratiques :
# - Utiliser une variable d'environnement pour BASE_URL afin de switcher facilement local/Docker/CI.
# - Ne jamais hardcoder des secrets (token, password) dans le notebook : passer par env vars.
BASE_URL = os.getenv("BASE_URL", "http://localhost:8000").rstrip("/")
TIMEOUT_S = int(os.getenv("TIMEOUT_S", "120"))

# Répertoire local pour sauvegarder des artefacts (rapports, métriques, exports...)
ARTIFACT_DIR = os.getenv("ARTIFACT_DIR", "artifacts")
os.makedirs(ARTIFACT_DIR, exist_ok=True)

session = requests.Session()
session.headers.update({"Content-Type": "application/json"})


def _url(path: str) -> str:
    if not path.startswith("/"):
        path = "/" + path
    return f"{BASE_URL}{path}"


def http_get(path: str, params: Optional[dict] = None) -> dict:
    r = session.get(_url(path), params=params, timeout=TIMEOUT_S)
    r.raise_for_status()
    return r.json()


def http_post(path: str, payload: Optional[dict] = None, params: Optional[dict] = None) -> dict:
    r = session.post(_url(path), json=payload, params=params, timeout=TIMEOUT_S)
    r.raise_for_status()
    return r.json()


def pretty(obj: Any) -> None:
    print(json.dumps(obj, indent=2, ensure_ascii=False))


def extract_dataset_id(resp: dict) -> Optional[str]:
    # Bonnes pratiques :
    # - Centraliser l'extraction des champs, pour éviter les répétitions et faciliter les changements de schéma.
    candidates = [
        ("meta", "dataset_id"),
        ("meta", "id"),
        ("result", "meta", "dataset_id"),
        ("result", "dataset_id"),
        ("dataset_id",),
    ]
    for path in candidates:
        cur: Any = resp
        ok = True
        for key in path:
            if isinstance(cur, dict) and key in cur:
                cur = cur[key]
            else:
                ok = False
                break
        if ok and isinstance(cur, str):
            return cur
    return None


def unwrap_result(resp: dict) -> Any:
    # Beaucoup de réponses API suivent: {"meta": {...}, "result": ...}
    if isinstance(resp, dict) and "result" in resp:
        return resp["result"]
    return resp


def to_df(obj: Any) -> pd.DataFrame:
    # Bonnes pratiques :
    # - Convertir proprement les payloads vers DataFrame, sans supposer un format unique.
    if obj is None:
        return pd.DataFrame()
    if isinstance(obj, pd.DataFrame):
        return obj
    if isinstance(obj, list):
        return pd.DataFrame(obj)
    if isinstance(obj, dict):
        for k in ("data", "rows", "sample", "data_sample", "preview"):
            if k in obj and isinstance(obj[k], list):
                return pd.DataFrame(obj[k])
    return pd.DataFrame()


def safe_call(fn, *args, **kwargs) -> dict:
    # Bonnes pratiques :
    # - Remonter proprement les erreurs HTTP avec le body pour debug.
    # - Éviter de masquer les exceptions : afficher un maximum de contexte, puis re-raise.
    try:
        return fn(*args, **kwargs)
    except requests.HTTPError as e:
        body = getattr(e.response, "text", "")
        print("HTTPError:", e)
        print("Body (truncated):", body[:2000])
        raise
    except Exception as e:
        print("Unexpected error:", repr(e))
        raise


print("BASE_URL =", BASE_URL)
print("ARTIFACT_DIR =", ARTIFACT_DIR)


BASE_URL = http://localhost:8000
ARTIFACT_DIR = artifacts


## 1) Définir les endpoints (alignés avec tes `routers/*`)

In [74]:
# Bonnes pratiques :
# - Centraliser ici les routes consommées par le notebook.
# - Nommer les endpoints par intention, pas par implémentation.
# - Garder une convention stable sur les prefixes (/dataset, /m15, /features, /eda, /baseline, /ml, /rl, /evaluate).

ENDPOINTS = {
    # Health
    "health": ["/health"],

    # Dataset / Import M1
    "load_m1": "/dataset/load_m1",          # POST {"year": 2022} (ou adapte)
    "dataset_info": "/dataset/info",        # GET  ?dataset_id=...
    "dataset_preview": "/dataset/preview",  # GET  ?dataset_id=...

    # M15 aggregation & clean (router m15.py / clean.py)
    "m15_aggregate": "/m15/aggregate",      # POST ?dataset_id=...  (ou JSON)
    "m15_clean": "/m15/clean",              # POST ?dataset_id=...  (ou JSON)

    # Features (router features.py)
    "features_compute": "/features/compute",  # POST ?dataset_id=...

    # EDA (router eda.py)
    "eda_run": "/eda/run",                  # POST ?dataset_id=...

    # Baseline (router baseline.py)
    "baseline_run": "/baseline/run",        # POST ?dataset_id=...

    # ML (router ml.py / trading_ml.py)
    "ml_train": "/ml/train",                # POST ?dataset_id=... + payload
    "ml_predict": "/ml/predict",            # POST ?dataset_id=... + payload

    # RL (router rl.py)
    "rl_design": "/rl/design",              # GET/POST ?dataset_id=...
    "rl_train": "/rl/train",                # POST ?dataset_id=... + payload

    # Evaluate (router evaluate.py)
    "evaluate_run": "/evaluate/run",        # POST ?dataset_id=... (+ ids modèles si besoin)
}

# Si tes routes diffèrent : modifie ENDPOINTS ici, et le reste du notebook suit.
pretty(ENDPOINTS)


{
  "health": [
    "/health"
  ],
  "load_m1": "/dataset/load_m1",
  "dataset_info": "/dataset/info",
  "dataset_preview": "/dataset/preview",
  "m15_aggregate": "/m15/aggregate",
  "m15_clean": "/m15/clean",
  "features_compute": "/features/compute",
  "eda_run": "/eda/run",
  "baseline_run": "/baseline/run",
  "ml_train": "/ml/train",
  "ml_predict": "/ml/predict",
  "rl_design": "/rl/design",
  "rl_train": "/rl/train",
  "evaluate_run": "/evaluate/run"
}


## 2) Healthcheck

In [75]:
health_resp = None
for ep in ENDPOINTS["health"]:
    try:
        health_resp = safe_call(http_get, ep)
        print("Health OK:", ep)
        break
    except Exception:
        continue

if health_resp is None:
    raise RuntimeError("Healthcheck impossible. Vérifie BASE_URL et les routes.")

pretty(health_resp)


Health OK: /health
{
  "status": "healthy",
  "service": "trading-gbpusd-api",
  "version": "2.0.0"
}


## 3) Phase 1 — Importation M1 (`routers/dataset.py`)

In [76]:
# Bonnes pratiques :
# - Paramétrer l'année et le couple/timeframe dans une seule cellule.
# - Conserver les IDs de datasets en variables pour enchaîner les phases.
YEAR = 2022

payload = {"year": YEAR}
resp_m1 = safe_call(http_post, ENDPOINTS["load_m1"], payload=payload)

pretty(resp_m1)
DATASET_ID_M1 = extract_dataset_id(resp_m1)
if not DATASET_ID_M1:
    raise RuntimeError("dataset_id introuvable dans la réponse. Vérifie le schéma de réponse.")
print("DATASET_ID_M1 =", DATASET_ID_M1)

df_m1_preview = to_df(unwrap_result(resp_m1))
df_m1_preview.head(10)


{
  "meta": {
    "dataset_id": "m1_2022_3a9c2fa5"
  },
  "result": {
    "file_path": "/code/data/DAT_MT_GBPUSD_M1_2022.csv",
    "shape": [
      372494,
      8
    ],
    "columns": [
      "timestamp",
      "Date",
      "Time",
      "Open",
      "High",
      "Low",
      "Close",
      "Volume"
    ],
    "sample": [
      {
        "timestamp": "2022-01-02T17:01:00",
        "Date": "2022.01.02",
        "Time": "17:01",
        "Open": 1.35199,
        "High": 1.35213,
        "Low": 1.35199,
        "Close": 1.35213,
        "Volume": 0
      },
      {
        "timestamp": "2022-01-02T17:02:00",
        "Date": "2022.01.02",
        "Time": "17:02",
        "Open": 1.35232,
        "High": 1.35232,
        "Low": 1.35232,
        "Close": 1.35232,
        "Volume": 0
      },
      {
        "timestamp": "2022-01-02T17:04:00",
        "Date": "2022.01.02",
        "Time": "17:04",
        "Open": 1.35233,
        "High": 1.35233,
        "Low": 1.35233,
        "Close": 1

Unnamed: 0,timestamp,Date,Time,Open,High,Low,Close,Volume
0,2022-01-02T17:01:00,2022.01.02,17:01,1.35199,1.35213,1.35199,1.35213,0
1,2022-01-02T17:02:00,2022.01.02,17:02,1.35232,1.35232,1.35232,1.35232,0
2,2022-01-02T17:04:00,2022.01.02,17:04,1.35233,1.35233,1.35233,1.35233,0
3,2022-01-02T17:06:00,2022.01.02,17:06,1.35233,1.35233,1.35233,1.35233,0
4,2022-01-02T17:10:00,2022.01.02,17:10,1.35233,1.35233,1.35233,1.35233,0
5,2022-01-02T17:11:00,2022.01.02,17:11,1.35232,1.35232,1.35226,1.35227,0
6,2022-01-02T17:13:00,2022.01.02,17:13,1.35223,1.35229,1.35223,1.35229,0
7,2022-01-02T17:14:00,2022.01.02,17:14,1.35223,1.35223,1.35223,1.35223,0
8,2022-01-02T17:16:00,2022.01.02,17:16,1.35231,1.35231,1.35231,1.35231,0
9,2022-01-02T17:17:00,2022.01.02,17:17,1.35232,1.35232,1.35232,1.35232,0


## 4) Phase 2 — Agrégation M1 → M15 (`routers/m15.py`)

In [77]:
# Le notebook tente d'abord en query param (?dataset_id=...), puis en JSON {"dataset_id": ...}.
resp_m15 = None

try:
    resp_m15 = safe_call(
        http_post,
        ENDPOINTS["m15_aggregate"],
        payload={"dataset_id": DATASET_ID_M1}
    )
except Exception:
    resp_m15 = safe_call(http_post, ENDPOINTS["m15_aggregate"], payload={"dataset_id": DATASET_ID_M1})

pretty(resp_m15)
DATASET_ID_M15 = extract_dataset_id(resp_m15) or (DATASET_ID_M1 + "_m15")
print("DATASET_ID_M15 =", DATASET_ID_M15)

df_m15_preview = to_df(unwrap_result(resp_m15))
df_m15_preview.head(10)


{
  "dataset_id": "m1_2022_3a9c2fa5_m15",
  "n_rows": 24247,
  "columns": [
    "timestamp",
    "open_15m",
    "high_15m",
    "low_15m",
    "close_15m",
    "volume"
  ],
  "report": {
    "input_rows": 372494,
    "dropped_bad_timestamp": 0,
    "resample_rows_before_dropna": 34752,
    "resample_rows_after_dropna": 24947,
    "dropped_incomplete_m15": 700
  }
}
DATASET_ID_M15 = m1_2022_3a9c2fa5_m15


## 5) Phase 3 — Nettoyage M15 (`routers/clean.py` ou `routers/m15.py`)

In [78]:
resp_clean = safe_call(
    http_post,
    ENDPOINTS["m15_clean"],
    payload={"dataset_id": DATASET_ID_M15}
)

pretty(resp_clean)
DATASET_ID_M15_CLEAN = extract_dataset_id(resp_clean) or (DATASET_ID_M15 + "_clean")
print("DATASET_ID_M15_CLEAN =", DATASET_ID_M15_CLEAN)

df_clean_preview = to_df(unwrap_result(resp_clean))
df_clean_preview.head(10)


{
  "dataset_id": "m1_2022_3a9c2fa5_m15_clean",
  "n_rows": 24246,
  "columns": [
    "timestamp",
    "open_15m",
    "high_15m",
    "low_15m",
    "close_15m",
    "volume"
  ],
  "report": {
    "input_rows": 24247,
    "dropped_bad_timestamp": 0,
    "dropped_incomplete_ohlc": 0,
    "dropped_negative_prices": 0,
    "dropped_ohlc_incoherence": 0,
    "flagged_gaps": 1,
    "dropped_gaps": 1,
    "gap_return_threshold": 0.02,
    "output_rows": 24246,
    "dropped_total": 1
  }
}
DATASET_ID_M15_CLEAN = m1_2022_3a9c2fa5_m15_clean


## 6) Feature Engineering (`routers/features.py`)

In [79]:
resp_feat = None

try:
    resp_feat = safe_call(http_post, ENDPOINTS["features_compute"], params={"dataset_id": DATASET_ID_M15_CLEAN})
except Exception:
    resp_feat = safe_call(http_post, ENDPOINTS["features_compute"], payload={"dataset_id": DATASET_ID_M15_CLEAN})

pretty(resp_feat)
DATASET_ID_FEAT = extract_dataset_id(resp_feat) or (DATASET_ID_M15_CLEAN + "_feat")
print("DATASET_ID_FEAT =", DATASET_ID_FEAT)

df_feat_preview = to_df(unwrap_result(resp_feat))
df_feat_preview.head(10)


HTTPError: 422 Client Error: Unprocessable Entity for url: http://localhost:8000/features/compute?dataset_id=m1_2022_3a9c2fa5_m15_clean
Body (truncated): {"detail":[{"type":"missing","loc":["body"],"msg":"Field required","input":null}]}
{
  "dataset_id": "m1_2022_3a9c2fa5_m15_clean_features",
  "n_rows": 24147,
  "n_features": 20,
  "features": [
    "return_1",
    "return_4",
    "ema_20",
    "ema_50",
    "ema_diff",
    "rsi_14",
    "rolling_std_20",
    "range_15m",
    "body",
    "upper_wick",
    "lower_wick",
    "ema_200",
    "distance_to_ema200",
    "slope_ema50",
    "atr_14",
    "rolling_std_100",
    "volatility_ratio",
    "adx_14",
    "macd",
    "macd_signal"
  ],
  "report": {
    "n_input": 24246,
    "n_output": 24147,
    "n_dropped_warmup": 99,
    "features_computed": [
      "return_1",
      "return_4",
      "ema_20",
      "ema_50",
      "ema_diff",
      "rsi_14",
      "rolling_std_20",
      "range_15m",
      "body",
      "upper_wick",
      "lowe

## 7) EDA — Analyse exploratoire (`routers/eda.py`)

In [80]:
openapi = safe_call(http_get, "/openapi.json")
paths = list(openapi.get("paths", {}).keys())

# filtre “eda”
eda_paths = [p for p in paths if "eda" in p.lower()]
eda_paths[:50], len(eda_paths)


(['/eda/full_report/{dataset_id}',
  '/eda/returns/{dataset_id}',
  '/eda/volatility/{dataset_id}',
  '/eda/hourly/{dataset_id}',
  '/eda/autocorrelation/{dataset_id}',
  '/eda/adf/{dataset_id}'],
 6)

In [81]:
resp_eda = safe_call(
    http_get,
    f"/eda/full_report/{DATASET_ID_FEAT}"
)

pretty(resp_eda)

eda_path = os.path.join(ARTIFACT_DIR, f"eda_{DATASET_ID_FEAT}.json")
with open(eda_path, "w", encoding="utf-8") as f:
    json.dump(resp_eda, f, ensure_ascii=False, indent=2)

print("EDA report saved to:", eda_path)


{
  "dataset_id": "m1_2022_3a9c2fa5_m15_clean_features",
  "eda_report": {
    "returns": {
      "n_observations": 24146,
      "mean": -4.126559793538866e-06,
      "std": 0.000827764056632929,
      "min": -0.022436910717274494,
      "max": 0.01639703625761757,
      "skewness": -0.7447752921334935,
      "kurtosis_excess": 49.96158173270455,
      "percentiles": {
        "p1": -0.0021341734129063815,
        "p5": -0.0011515293597946508,
        "p25": -0.00034399755620284633,
        "p50": 0.0,
        "p75": 0.0003263584902729044,
        "p95": 0.001149373069878612,
        "p99": 0.00222939020916757
      },
      "sharpe_annualised": -0.4035232124482408,
      "normality_shapiro": {
        "statistic": 0.8882206678390503,
        "p_value": 1.4051589001064658e-18,
        "is_normal_95": false
      },
      "histogram": {
        "counts": [
          1,
          0,
          0,
          0,
          0,
          0,
          0,
          0,
          1,
          0,
  

## 8) Baseline (`routers/baseline.py`)

In [82]:
openapi = safe_call(http_get, "/openapi.json")
paths = list(openapi.get("paths", {}).keys())

baseline_paths = [p for p in paths if "baseline" in p.lower()]
baseline_paths, len(baseline_paths)


(['/baseline/compare/{dataset_id}',
  '/baseline/random/{dataset_id}',
  '/baseline/buy_and_hold/{dataset_id}',
  '/baseline/fixed_rules/{dataset_id}'],
 4)

In [83]:
ENDPOINTS["baseline_run"] = "/baseline/compare/{dataset_id}"  

baseline_path = ENDPOINTS["baseline_run"].replace("{dataset_id}", DATASET_ID_FEAT)
resp_base = safe_call(http_get, baseline_path)

pretty(resp_base)

baseline_json_path = os.path.join(ARTIFACT_DIR, f"baseline_{DATASET_ID_FEAT}.json")
with open(baseline_json_path, "w", encoding="utf-8") as f:
    json.dump(resp_base, f, ensure_ascii=False, indent=2)

print("Baseline report saved to:", baseline_json_path)


{
  "dataset_id": "m1_2022_3a9c2fa5_m15_clean_features",
  "baselines": {
    "random": {
      "strategy": "random",
      "seed": 42,
      "total_return_pct": -95.2214,
      "max_drawdown_pct": -95.2222,
      "sharpe": -17.3311,
      "profit_factor": 0.4338,
      "n_trades": 15123,
      "win_rate_pct": 18.45,
      "final_equity": 0.047786,
      "equity_curve": [
        1.0,
        0.998423,
        0.998351,
        0.998679,
        0.996232,
        0.995634,
        0.997319,
        0.9973,
        0.997452,
        0.995813,
        0.994243,
        0.993521,
        0.991419,
        0.990859,
        0.988375,
        0.986031,
        0.984325,
        0.981999,
        0.980502,
        0.97882,
        0.976282,
        0.973922,
        0.973085,
        0.970596,
        0.968837,
        0.967697,
        0.96593,
        0.963552,
        0.962376,
        0.961455,
        0.960643,
        0.958797,
        0.958389,
        0.957305,
        0.956231,
    

## 9) ML — entraînement + prédiction (`routers/ml.py`)

In [84]:
openapi = safe_call(http_get, "/openapi.json")
paths = list(openapi.get("paths", {}).keys())

ml_paths = [p for p in paths if "/ml" in p.lower() or "ml" in p.lower()]
ml_paths[:200], len(ml_paths)

(['/trading_ml/train',
  '/trading_ml/models',
  '/trading_ml/best_model',
  '/trading_ml/model/{model_id}',
  '/trading_ml/predict/{dataset_id}',
  '/trading_ml/predict_latest/{dataset_id}'],
 6)

In [86]:
# -------------------------
# Trading ML — Train (cellule complète, model_type string correct)
# -------------------------

ENDPOINTS["ml_train"] = "/trading_ml/train"
ENDPOINTS["ml_models"] = "/trading_ml/models"
ENDPOINTS["ml_best_model"] = "/trading_ml/best_model"
ENDPOINTS["ml_model_by_id"] = "/trading_ml/model/{model_id}"
ENDPOINTS["ml_predict"] = "/trading_ml/predict/{dataset_id}"
ENDPOINTS["ml_predict_latest"] = "/trading_ml/predict_latest/{dataset_id}"

# 1) OpenAPI -> schéma attendu
openapi = safe_call(http_get, "/openapi.json")
train_spec = openapi["paths"][ENDPOINTS["ml_train"]]["post"]

schema = (
    train_spec.get("requestBody", {})
    .get("content", {})
    .get("application/json", {})
    .get("schema", {})
)

def _resolve_ref(ref: str) -> dict:
    name = ref.split("/")[-1]
    return openapi["components"]["schemas"][name]

if isinstance(schema, dict) and "$ref" in schema:
    schema = _resolve_ref(schema["$ref"])

props = schema.get("properties", {}) if isinstance(schema, dict) else {}
required = schema.get("required", []) if isinstance(schema, dict) else []

print("ML train required:", required)
print("ML train properties:", list(props.keys()))

# 2) Récupérer les model_type autorisés
allowed_model_types = None

# 2.a) via OpenAPI enum si dispo
if isinstance(props.get("model_type"), dict):
    allowed_model_types = props["model_type"].get("enum")

# 2.b) via /trading_ml/models (peut renvoyer une liste d'objets)
if not allowed_model_types:
    try:
        models_resp = safe_call(http_get, ENDPOINTS["ml_models"])

        # cas 1 : {"models": [...]}
        if isinstance(models_resp, dict) and "models" in models_resp:
            models_list = models_resp["models"]
        else:
            models_list = models_resp

        # models_list peut être une liste de strings OU une liste d'objets
        if isinstance(models_list, list) and models_list:
            if all(isinstance(x, str) for x in models_list):
                allowed_model_types = models_list
            elif all(isinstance(x, dict) for x in models_list):
                # extraire model_type de chaque objet
                allowed_model_types = sorted(
                    {x.get("model_type") for x in models_list if isinstance(x.get("model_type"), str)}
                )
    except Exception:
        allowed_model_types = None

# 2.c) fallback hard
if not allowed_model_types:
    allowed_model_types = ["logreg", "rf", "gbm"]

print("Allowed model_type (strings):", allowed_model_types)

# 3) Choisir un model_type valide
preferred_order = ["gbm", "rf", "logreg"]
model_type = next((m for m in preferred_order if m in allowed_model_types), allowed_model_types[0])
print("Selected model_type:", model_type)

# 4) Construire payload conforme
DATASET_TRAIN_ID = os.getenv("DATASET_TRAIN_ID", DATASET_ID_FEAT)
DATASET_VAL_ID = os.getenv("DATASET_VAL_ID", DATASET_ID_FEAT)
DATASET_TEST_ID = os.getenv("DATASET_TEST_ID", "")

payload = {}
if "dataset_train_id" in props:
    payload["dataset_train_id"] = DATASET_TRAIN_ID
if "dataset_val_id" in props:
    payload["dataset_val_id"] = DATASET_VAL_ID
if "dataset_test_id" in props and DATASET_TEST_ID:
    payload["dataset_test_id"] = DATASET_TEST_ID
if "model_type" in props:
    payload["model_type"] = model_type  # string uniquement

missing = [k for k in required if k not in payload]
if missing:
    raise RuntimeError(f"Champs requis manquants: {missing}")

print("Payload envoyé à /trading_ml/train :")
pretty(payload)

# 5) Appel
resp_ml_train = safe_call(http_post, ENDPOINTS["ml_train"], payload=payload)
pretty(resp_ml_train)

# 6) MODEL_ID (si renvoyé)
MODEL_ID = None
for keypath in [("model_id",), ("result", "model_id"), ("result", "id"), ("id",)]:
    cur = resp_ml_train
    ok = True
    for k in keypath:
        if isinstance(cur, dict) and k in cur:
            cur = cur[k]
        else:
            ok = False
            break
    if ok and isinstance(cur, str):
        MODEL_ID = cur
        break

print("MODEL_ID =", MODEL_ID)

# 7) Artefacts
stamp = pd.Timestamp.utcnow().strftime("%Y%m%d_%H%M%S")
ml_payload_path = os.path.join(ARTIFACT_DIR, f"ml_train_payload_{DATASET_ID_FEAT}_{stamp}.json")
ml_train_path = os.path.join(ARTIFACT_DIR, f"ml_train_resp_{DATASET_ID_FEAT}_{stamp}.json")

with open(ml_payload_path, "w", encoding="utf-8") as f:
    json.dump(payload, f, ensure_ascii=False, indent=2)
with open(ml_train_path, "w", encoding="utf-8") as f:
    json.dump(resp_ml_train, f, ensure_ascii=False, indent=2)

print("ML train payload saved to:", ml_payload_path)
print("ML train response saved to:", ml_train_path)


ML train required: ['dataset_train_id', 'dataset_val_id']
ML train properties: ['dataset_train_id', 'dataset_val_id', 'dataset_test_id', 'model_type']
Allowed model_type (strings): ['gbm']
Selected model_type: gbm
Payload envoyé à /trading_ml/train :
{
  "dataset_train_id": "m1_2022_3a9c2fa5_m15_clean_features",
  "dataset_val_id": "m1_2022_3a9c2fa5_m15_clean_features",
  "model_type": "gbm"
}
{
  "model_id": "trading_gbm_20260213_110438_9c89",
  "model_type": "gbm",
  "version": "v3",
  "created_at": "2026-02-13T11:04:38.226787",
  "features": [
    "return_1",
    "return_4",
    "ema_20",
    "ema_50",
    "ema_diff",
    "rsi_14",
    "rolling_std_20",
    "range_15m",
    "body",
    "upper_wick",
    "lower_wick",
    "ema_200",
    "distance_to_ema200",
    "slope_ema50",
    "atr_14",
    "rolling_std_100",
    "volatility_ratio",
    "adx_14",
    "macd",
    "macd_signal"
  ],
  "n_features": 20,
  "n_train": 24146,
  "n_val": 24146,
  "n_test": 0,
  "hyperparams": {
    "ccp

## 10) RL — design + train (`routers/rl.py`)

In [None]:
resp_rl_design = None
try:
    resp_rl_design = safe_call(http_get, ENDPOINTS["rl_design"], params={"dataset_id": DATASET_ID_FEAT})
except Exception:
    try:
        resp_rl_design = safe_call(http_post, ENDPOINTS["rl_design"], params={"dataset_id": DATASET_ID_FEAT})
    except Exception:
        resp_rl_design = safe_call(http_post, ENDPOINTS["rl_design"], payload={"dataset_id": DATASET_ID_FEAT})

pretty(resp_rl_design)

rl_design_path = os.path.join(ARTIFACT_DIR, f"rl_design_{DATASET_ID_FEAT}.json")
with open(rl_design_path, "w", encoding="utf-8") as f:
    json.dump(resp_rl_design, f, ensure_ascii=False, indent=2)
print("RL design saved to:", rl_design_path)


{
  "1_probleme_metier": {
    "objectif": "Maximiser le PnL cumulé sur GBP/USD M15 sous contraintes réalistes.",
    "contraintes": [
      "Coûts de transaction : 2 pips par trade",
      "Drawdown maximum autorisé : 20%",
      "Horizon : épisodique (une année par episode)",
      "Décisions : toutes les 15 minutes"
    ]
  },
  "2_donnees": {
    "qualite": "CSV M1 → agrégé M15 → nettoyé → features V2",
    "alignement": "Split strict 2022 train / 2023 val / 2024 test",
    "couts": "Spread 2 pips = 0.02% par transaction"
  },
  "3_state": {
    "features": [
      "return_1",
      "return_4",
      "ema_diff",
      "rsi_14",
      "rolling_std_20",
      "distance_to_ema200",
      "slope_ema50",
      "atr_14",
      "macd",
      "adx_14"
    ],
    "normalisation": "z-score sur fenêtre glissante de 20 bougies",
    "warm_up": "20 bougies M15 nécessaires au démarrage",
    "dimension": "10 features + 3 (position, pnl, drawdown)"
  },
  "4_action": {
    "espace": "Discret",
  

In [None]:
# -------------------------
# RL — Train (cellule complète, alignée avec le schéma API)
# -------------------------
ENDPOINTS["rl_train"] = "/rl/train"

# 1) OpenAPI -> schéma RL train
openapi = safe_call(http_get, "/openapi.json")
train_spec = openapi["paths"][ENDPOINTS["rl_train"]]["post"]

schema = (
    train_spec.get("requestBody", {})
    .get("content", {})
    .get("application/json", {})
    .get("schema", {})
)

def _resolve_ref(ref: str) -> dict:
    name = ref.split("/")[-1]
    return openapi["components"]["schemas"][name]

if "$ref" in schema:
    schema = _resolve_ref(schema["$ref"])

props = schema.get("properties", {}) if isinstance(schema, dict) else {}
required = schema.get("required", []) if isinstance(schema, dict) else []

print("RL train required:", required)
print("RL train properties:", list(props.keys()))

# 2) Déterminer les algos autorisés (si OpenAPI expose enum)
allowed_algos = None
if isinstance(props.get("algo"), dict):
    allowed_algos = props["algo"].get("enum")

# Fallback : si pas d'enum, on garde ppo comme tentative (ou tu adaptes)
if not allowed_algos:
    allowed_algos = ["ppo"]

print("Allowed algo:", allowed_algos)

# Choix algo (prendre ppo si dispo)
algo = "ppo" if "ppo" in allowed_algos else allowed_algos[0]

# 3) Dataset IDs requis (train/val)
# Recommandé : de vrais splits temporels en amont.
DATASET_TRAIN_ID = os.getenv("RL_DATASET_TRAIN_ID", DATASET_ID_FEAT)
DATASET_VAL_ID = os.getenv("RL_DATASET_VAL_ID", DATASET_ID_FEAT)

# 4) Construire payload conforme
payload = {}

if "dataset_train_id" in props:
    payload["dataset_train_id"] = DATASET_TRAIN_ID
if "dataset_val_id" in props:
    payload["dataset_val_id"] = DATASET_VAL_ID

# Paramètres RL optionnels si présents
if "algo" in props:
    payload["algo"] = algo
if "timesteps" in props:
    payload["timesteps"] = 50000
if "seed" in props:
    payload["seed"] = 42

missing = [k for k in required if k not in payload]
if missing:
    print("Champs requis manquants:", missing)
    pretty(payload)
    raise RuntimeError("Complète le payload RL selon le schéma OpenAPI.")

print("Payload envoyé à /rl/train :")
pretty(payload)

# 5) Call
resp_rl_train = safe_call(http_post, ENDPOINTS["rl_train"], payload=payload)
pretty(resp_rl_train)

# 6) Artefacts
stamp = pd.Timestamp.utcnow().strftime("%Y%m%d_%H%M%S")
rl_payload_path = os.path.join(ARTIFACT_DIR, f"rl_train_payload_{DATASET_ID_FEAT}_{stamp}.json")
rl_train_path = os.path.join(ARTIFACT_DIR, f"rl_train_resp_{DATASET_ID_FEAT}_{stamp}.json")

with open(rl_payload_path, "w", encoding="utf-8") as f:
    json.dump(payload, f, ensure_ascii=False, indent=2)
with open(rl_train_path, "w", encoding="utf-8") as f:
    json.dump(resp_rl_train, f, ensure_ascii=False, indent=2)

print("RL train payload saved to:", rl_payload_path)
print("RL train response saved to:", rl_train_path)


RL train required: ['dataset_train_id', 'dataset_val_id']
RL train properties: ['dataset_train_id', 'dataset_val_id', 'dataset_test_id', 'n_episodes', 'seed']
Allowed algo: ['ppo']
Payload envoyé à /rl/train :
{
  "dataset_train_id": "m1_2022_986913dc_m15_clean_features",
  "dataset_val_id": "m1_2022_986913dc_m15_clean_features",
  "seed": 42
}
Unexpected error: JSONDecodeError('Expecting value: line 1 column 1 (char 0)')
Unexpected exception formatting exception. Falling back to standard exception


Traceback (most recent call last):
  File "/Users/raphc/Documents/Cours M2/Datascience/projet-COLNOT-METOIS/.venv/lib/python3.8/site-packages/requests/models.py", line 976, in json
  File "/Users/raphc/.pyenv/versions/3.8.18/lib/python3.8/json/__init__.py", line 357, in loads
    return _default_decoder.decode(s)
  File "/Users/raphc/.pyenv/versions/3.8.18/lib/python3.8/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/Users/raphc/.pyenv/versions/3.8.18/lib/python3.8/json/decoder.py", line 355, in raw_decode
    raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/raphc/Documents/Cours M2/Datascience/projet-COLNOT-METOIS/.venv/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.

## 11) Evaluate — métriques finales et comparaison (`routers/evaluate.py`)

In [90]:
# ============================================================
# FIN DE PIPELINE — EVALUATION / BENCHMARK (cellule unique)
# ============================================================


import os
import json
import pandas as pd

stamp = pd.Timestamp.utcnow().strftime("%Y%m%d_%H%M%S")

def save_artifact(obj, name: str):
    path = os.path.join(ARTIFACT_DIR, f"{name}_{stamp}.json")
    with open(path, "w", encoding="utf-8") as f:
        json.dump(obj, f, ensure_ascii=False, indent=2)
    print("Saved:", path)
    return path

# ---------- Endpoints confirmés via OpenAPI ----------
ENDPOINTS["rl_evaluate"] = "/rl/evaluate/{dataset_id}"          # POST
ENDPOINTS["evaluate_compare"] = "/evaluate/compare/{dataset_id}" # GET
ENDPOINTS["evaluate_stress_test"] = "/evaluate/stress_test/{dataset_id}" # GET
ENDPOINTS["evaluate_robustness"] = "/evaluate/robustness/{model_id}"     # GET

results = {}

# ============================================================
# 1) COMPARE (ML vs Baseline) — GET /evaluate/compare/{dataset_id}
# ============================================================
try:
    path = ENDPOINTS["evaluate_compare"].replace("{dataset_id}", DATASET_ID_FEAT)
    resp_compare = safe_call(http_get, path)
    results["compare"] = resp_compare
    pretty(resp_compare)
    save_artifact(resp_compare, f"evaluate_compare_{DATASET_ID_FEAT}")
except Exception as e:
    results["compare_error"] = str(e)
    print("COMPARE failed:", repr(e))

# ============================================================
# 2) STRESS TEST — GET /evaluate/stress_test/{dataset_id}
# ============================================================
try:
    path = ENDPOINTS["evaluate_stress_test"].replace("{dataset_id}", DATASET_ID_FEAT)
    resp_stress = safe_call(http_get, path)
    results["stress_test"] = resp_stress
    pretty(resp_stress)
    save_artifact(resp_stress, f"stress_test_{DATASET_ID_FEAT}")
except Exception as e:
    results["stress_test_error"] = str(e)
    print("STRESS TEST failed:", repr(e))

# ============================================================
# 3) ROBUSTNESS (ML) — GET /evaluate/robustness/{model_id}
#    -> nécessite un MODEL_ID valide (ex: trading_gbm_...)
#    Si absent, on tente de le récupérer via /trading_ml/best_model
# ============================================================
try:
    if not globals().get("MODEL_ID"):
        # fallback best_model si tu as ENDPOINTS["ml_best_model"]
        if "ml_best_model" in ENDPOINTS and ENDPOINTS["ml_best_model"]:
            try:
                best = safe_call(http_get, ENDPOINTS["ml_best_model"])
                pretty(best)
                # extraire un model_id si possible
                mid = None
                if isinstance(best, dict):
                    for kp in [("model_id",), ("result", "model_id"), ("result", "id"), ("id",)]:
                        cur = best
                        ok = True
                        for k in kp:
                            if isinstance(cur, dict) and k in cur:
                                cur = cur[k]
                            else:
                                ok = False
                                break
                        if ok and isinstance(cur, str):
                            mid = cur
                            break
                if not mid and isinstance(best, dict) and "model" in best and isinstance(best["model"], dict):
                    if isinstance(best["model"].get("model_id"), str):
                        mid = best["model"]["model_id"]
                if mid:
                    MODEL_ID = mid
                    print("MODEL_ID récupéré via best_model:", MODEL_ID)
                    save_artifact(best, "ml_best_model")
            except Exception:
                pass

    if not globals().get("MODEL_ID"):
        raise RuntimeError("MODEL_ID absent (robustness skip). Entraîne ML ou récupère best_model.")

    path = ENDPOINTS["evaluate_robustness"].replace("{model_id}", MODEL_ID)
    resp_rob = safe_call(http_get, path)
    results["robustness"] = resp_rob
    pretty(resp_rob)
    save_artifact(resp_rob, f"robustness_{MODEL_ID}")
except Exception as e:
    results["robustness_error"] = str(e)
    print("ROBUSTNESS failed:", repr(e))

# ============================================================
# 4) RL EVALUATE — POST /rl/evaluate/{dataset_id}
#    -> nécessite un modèle RL déjà entraîné (sinon l'API renvoie "Aucun modèle RL")
# ============================================================
try:
    path = ENDPOINTS["rl_evaluate"].replace("{dataset_id}", DATASET_ID_FEAT)
    resp_rl_eval = safe_call(http_post, path, payload=None)
    results["rl_evaluate"] = resp_rl_eval
    pretty(resp_rl_eval)
    save_artifact(resp_rl_eval, f"rl_evaluate_{DATASET_ID_FEAT}")
except Exception as e:
    # Ici, un 404 métier "Aucun modèle RL" est attendu si /rl/train n'a pas abouti.
    results["rl_evaluate_error"] = str(e)
    print("RL EVALUATE failed (souvent normal si pas de RL):", repr(e))

# ============================================================
# 5) Résumé compact exploitable pour ton rapport
# ============================================================
summary = {
    "dataset_id_features": DATASET_ID_FEAT,
    "model_id_ml": globals().get("MODEL_ID"),
    "available_reports": [k for k in results.keys() if not k.endswith("_error")],
    "errors": {k: v for k, v in results.items() if k.endswith("_error")},
}
pretty(summary)
save_artifact(summary, f"summary_benchmark_{DATASET_ID_FEAT}")

summary


{
  "dataset_id": "m1_2022_3a9c2fa5_m15_clean_features",
  "strategies": {
    "random": {
      "strategy": "random",
      "seed": 42,
      "total_return_pct": -95.2214,
      "max_drawdown_pct": -95.2222,
      "sharpe": -17.3311,
      "profit_factor": 0.4338,
      "n_trades": 15123,
      "win_rate_pct": 18.45,
      "final_equity": 0.047786,
      "equity_curve": [
        1.0,
        0.998423,
        0.998351,
        0.998679,
        0.996232,
        0.995634,
        0.997319,
        0.9973,
        0.997452,
        0.995813,
        0.994243,
        0.993521,
        0.991419,
        0.990859,
        0.988375,
        0.986031,
        0.984325,
        0.981999,
        0.980502,
        0.97882,
        0.976282,
        0.973922,
        0.973085,
        0.970596,
        0.968837,
        0.967697,
        0.96593,
        0.963552,
        0.962376,
        0.961455,
        0.960643,
        0.958797,
        0.958389,
        0.957305,
        0.956231,
   

{'dataset_id_features': 'm1_2022_3a9c2fa5_m15_clean_features',
 'model_id_ml': 'trading_gbm_20260213_110438_9c89',
 'available_reports': ['compare', 'stress_test', 'robustness'],
 'errors': {'rl_evaluate_error': '404 Client Error: Not Found for url: http://localhost:8000/rl/evaluate/m1_2022_3a9c2fa5_m15_clean_features'}}