# KPI Baseline Calculator

Ce notebook telecharge le dataset Module 1 et calcule 5 KPIs de base.
Sorties attendues:
- module1_kpi_results.json
- module1_run_log.txt


In [None]:
import json
import pandas as pd
from datetime import datetime

seed = "demo"
url = f"https://api.innovaplus.africa/innova/api/school/data-analyst/module-1/dataset?seed={seed}"
df = pd.read_csv(url)

total_enrolled = df[df["event_type"] == "enrolled"]["user_id"].nunique()
completed_users = df[df["event_type"] == "module_completed"]["user_id"].nunique()
completion_rate = completed_users / total_enrolled if total_enrolled else 0
avg_lessons_completed = df[df["event_type"] == "lesson_completed"].groupby("user_id").size().sum() / total_enrolled
active_users_7d = df[df["event_type"] != "enrolled"]["user_id"].nunique()

payload = {
    "seed": seed,
    "generated_at": datetime.utcnow().isoformat(),
    "kpis": {
        "total_enrolled": int(total_enrolled),
        "completed_users": int(completed_users),
        "completion_rate": round(completion_rate, 4),
        "avg_lessons_completed": round(avg_lessons_completed, 2),
        "active_users_7d": int(active_users_7d),
    }
}

with open("module1_kpi_results.json", "w", encoding="utf-8") as f:
    json.dump(payload, f, ensure_ascii=True, indent=2)

with open("module1_run_log.txt", "w", encoding="utf-8") as f:
    f.write(f"run_at={datetime.utcnow().isoformat()}\n")
    f.write(f"rows={len(df)}\n")
