# Thème 5 — Capstone Packager (Notebook obligatoire)\n\n## Objectif\n- Générer automatiquement le pack final du Module 1 (ZIP)\n- Forcer la présence des fichiers Thème 2/3/4 + cohérence minimale\n- Produire un brief 1 page (500–800 mots) + checklist + README\n\n## Entrées attendues (dans le même dossier d’exécution)\n- `theme2_baseline_metrics.json`\n- `theme3_kpi_dictionary.csv`\n- `theme4_analysis_plan.md`\n- `theme4_data_requirements.csv`\n- `theme4_acceptance_criteria.json`\n\n## Sorties\n- `capstone_brief.md`\n- `capstone_checklist.json`\n- `README.md`\n- `module1_capstone_submission.zip`\n

In [None]:
import os\nimport json\nimport zipfile\nfrom datetime import datetime\nimport pandas as pd\n\nREQUIRED = [\n  "theme2_baseline_metrics.json",\n  "theme3_kpi_dictionary.csv",\n  "theme4_analysis_plan.md",\n  "theme4_data_requirements.csv",\n  "theme4_acceptance_criteria.json"\n]\n\ndef word_count(text: str) -> int:\n    return len([w for w in text.replace("\\n", " ").split(" ") if w.strip()])\n\nchecks = []\ndef add_check(name, ok, detail=""):\n    checks.append({"check": name, "ok": bool(ok), "detail": detail})\n\nprint("✅ Imports OK")\n\n# ---- 1) Existence checks ----\nmissing = [f for f in REQUIRED if not os.path.exists(f)]\nadd_check("Required input files exist", len(missing) == 0, f"missing={missing}")\nif missing:\n    raise FileNotFoundError(f"Missing required files: {missing}")\n\nbaseline = json.load(open("theme2_baseline_metrics.json", "r", encoding="utf-8"))\nkpi = pd.read_csv("theme3_kpi_dictionary.csv")\nreq = pd.read_csv("theme4_data_requirements.csv")\nacc = json.load(open("theme4_acceptance_criteria.json", "r", encoding="utf-8"))\n\nanalysis_plan_text = open("theme4_analysis_plan.md", "r", encoding="utf-8").read().strip()\n\nprint("✅ Loaded all required inputs")\n\n# ---- 2) Build brief (student inputs) ----\nprint("\\n--- Capstone Brief Builder ---")\ncontext = input("Contexte (5–8 lignes): ").strip()\ndecision = input("Décision à changer (1 phrase): ").strip()\nobjective = input("Objectif SMART (1 phrase): ").strip()\nmain_kpi = input("KPI principal (slug, doit exister dans le dictionnaire): ").strip()\nguardrail = input("Guardrail (slug, optionnel): ").strip()\nscope_in = input("Scope IN (comma): ").strip()\nscope_out = input("Scope OUT (comma): ").strip()\ndata_sources = input("Données (sources + grain, 1–3 lignes): ").strip()\nmethod = input("Méthode (3–6 étapes courtes, en 1 bloc): ").strip()\ndeliverables = input("Livrables (comma): ").strip()\nvalidation_summary = input("Validation (résumé 3–6 bullets, en 1 bloc): ").strip()\n\nbrief = f"""# Capstone Brief — Module 1\n\n## Contexte\n{context}\n\n## Décision à changer\n{decision}\n\n## Objectif (SMART)\n{objective}\n\n## KPIs\n- KPI principal: {main_kpi}\n- Guardrail: {guardrail if guardrail else 'N/A'}\n\n## Scope\n**IN**: {scope_in}\n**OUT**: {scope_out}\n\n## Données (sources + grain)\n{data_sources}\n\n## Méthode (résumé)\n{method}\n\n## Livrables\n{deliverables}\n\n## Validation (résumé)\n{validation_summary}\n\n---  \nRéférences fichiers: `theme2_baseline_metrics.json`, `theme3_kpi_dictionary.csv`, `theme4_analysis_plan.md`, `theme4_data_requirements.csv`, `theme4_acceptance_criteria.json`\n"""\n\nwith open("capstone_brief.md", "w", encoding="utf-8") as f:\n    f.write(brief)\n\nbrief_words = word_count(brief)\nprint("✅ Wrote capstone_brief.md | words =", brief_words)\n\n# ---- 3) README (reproduction) ----\nreadme = """# README — Module 1 Capstone Pack\n\n## What this ZIP contains\n- capstone_brief.md\n- theme2_baseline_metrics.json\n- theme3_kpi_dictionary.csv\n- theme4_analysis_plan.md\n- theme4_data_requirements.csv\n- theme4_acceptance_criteria.json\n- capstone_checklist.json\n\n## How to reproduce\n1) Make sure you have completed Theme 2, Theme 3, Theme 4 exports.\n2) Run the Theme 5 notebook to regenerate:\n   - capstone_brief.md\n   - capstone_checklist.json\n   - module1_capstone_submission.zip\n3) Open capstone_checklist.json and confirm:\n   - passed == true\n\n## Notes\n- If passed == false, fix the failing checks (see \"checks\" list).\n"""\n\nwith open("README.md", "w", encoding="utf-8") as f:\n    f.write(readme)\n\nprint("✅ Wrote README.md")\n\n# ---- 4) Automated checklist (quality gates) ----\nadd_check("Baseline has enrolled_users", "enrolled_users" in baseline, f"keys={list(baseline.keys())}")\nadd_check("Baseline has validated_users", "validated_users" in baseline, f"keys={list(baseline.keys())}")\nadd_check(\n    "Baseline has completion rate key",\n    ("completion_rate" in baseline) or ("completion_rate_m1" in baseline),\n    f"keys={list(baseline.keys())}",\n)\n\nrequired_kpi_cols = {\n  "kpi_name","definition","objective","formula","unit","granularity","segment","source","owner","refresh"\n}\nadd_check(\n    "KPI dictionary has required columns",\n    required_kpi_cols.issubset(set(kpi.columns)),\n    f"missing={sorted(list(required_kpi_cols - set(kpi.columns)))}",\n)\n\nadd_check(\n    "KPI dictionary kpi_name is unique",\n    not kpi["kpi_name"].duplicated().any(),\n    "duplicate_names_found" if kpi["kpi_name"].duplicated().any() else "",\n)\nadd_check("KPI dictionary has >= 6 KPIs", len(kpi) >= 6, f"rows={len(kpi)}")\nadd_check("Main KPI exists in KPI dictionary", main_kpi in set(kpi["kpi_name"]), f"main_kpi={main_kpi}")\nif guardrail:\n    add_check("Guardrail KPI exists in KPI dictionary", guardrail in set(kpi["kpi_name"]), f"guardrail={guardrail}")\nelse:\n    add_check("Guardrail provided (recommended)", True, "guardrail=N/A (allowed)")\n\nrequired_req_cols = {"field","source","grain","refresh","owner","notes"}\nadd_check(\n    "Data requirements has required columns",\n    required_req_cols.issubset(set(req.columns)),\n    f"missing={sorted(list(required_req_cols - set(req.columns)))}",\n)\nadd_check("Data requirements has >= 12 rows", len(req) >= 12, f"rows={len(req)}")\n\nacc_list = acc.get("acceptance_criteria", [])\nadd_check("Acceptance criteria JSON has a list", isinstance(acc_list, list), f"type={type(acc_list)}")\nadd_check(\n    "Acceptance criteria has >= 8 items",\n    isinstance(acc_list, list) and len(acc_list) >= 8,\n    f"count={len(acc_list) if isinstance(acc_list, list) else 'N/A'}",\n)\n\nadd_check("Analysis plan MD is non-empty", len(analysis_plan_text) > 50, f"len={len(analysis_plan_text)}")\nadd_check("Brief word count is within 500–800", 500 <= brief_words <= 800, f"words={brief_words}")\nadd_check(\n    "README mentions reproduction steps",\n    "reproduce" in readme.lower() or "reprodu" in readme.lower(),\n    "ok" if ("reproduce" in readme.lower() or "reprodu" in readme.lower()) else "missing keywords",\n)\n\npassed = all(c["ok"] for c in checks)\nsummary = {\n  "created_at": datetime.utcnow().isoformat()+"Z",\n  "passed": passed,\n  "checks": checks\n}\n\nwith open("capstone_checklist.json", "w", encoding="utf-8") as f:\n    json.dump(summary, f, ensure_ascii=False, indent=2)\n\nprint("✅ Wrote capstone_checklist.json | passed =", passed)\n\n# ---- 5) Package ZIP ----\nzip_name = "module1_capstone_submission.zip"\nfiles_to_zip = REQUIRED + ["capstone_brief.md", "capstone_checklist.json", "README.md"]\n\nwith zipfile.ZipFile(zip_name, "w", compression=zipfile.ZIP_DEFLATED) as z:\n    for fp in files_to_zip:\n        z.write(fp)\n\nwith zipfile.ZipFile(zip_name, "r") as z:\n    names = set(z.namelist())\n    expected = set(files_to_zip)\n    add_check("ZIP contains all required files", expected.issubset(names), f"missing_in_zip={sorted(list(expected - names))}")\n\npassed = all(c["ok"] for c in checks)\nsummary = {\n  "created_at": datetime.utcnow().isoformat()+"Z",\n  "passed": passed,\n  "checks": checks\n}\nwith open("capstone_checklist.json", "w", encoding="utf-8") as f:\n    json.dump(summary, f, ensure_ascii=False, indent=2)\n\nprint("✅ Generated:", zip_name)\nprint("✅ Final passed:", passed)\n