#  Test Pipeline - ALL Notebooks

Ten notebook uruchamia **WSZYSTKIE** notebooki (Demo + Workshop) w kolejności.

**Uwaga:** Uruchamia 12 notebooków - może zająć kilka minut!

In [None]:
import time
from datetime import datetime

DEMO_NOTEBOOKS = [
    ("Demo 00_Setup", "../demo/00_Setup"),
    ("Demo 01_EDA_and_Validation", "../demo/01_EDA_and_Validation"),
    ("Demo 02_Data_Splitting", "../demo/02_Data_Splitting"),
    ("Demo 03_Data_Imputing", "../demo/03_Data_Imputing"),
    ("Demo 04_Feature_Transformation", "../demo/04_Feature_Transformation"),
    ("Demo 05_Feature_Engineering", "../demo/05_Feature_Engineering"),
    ("Demo 06_ML_Pipelines", "../demo/06_ML_Pipelines"),
    ("Demo 07_Feature_Store_MLflow", "../demo/07_Feature_Store_MLflow"),
]

WORKSHOP_NOTEBOOKS = [
    ("Workshop 00_Setup", "../workshop/00_Workshop_Setup"),
    ("Workshop 01_Data_Exploration", "../workshop/01_Workshop_Data_Exploration"),
    ("Workshop 02_Data_Cleaning", "../workshop/02_Workshop_Data_Cleaning_and_Features"),
    ("Workshop 03_ML_Pipeline", "../workshop/03_Workshop_ML_Pipeline"),
]

# Co uruchomić?
RUN_DEMO = True
RUN_WORKSHOP = True

ALL_NOTEBOOKS = []
if RUN_DEMO: ALL_NOTEBOOKS.extend(DEMO_NOTEBOOKS)
if RUN_WORKSHOP: ALL_NOTEBOOKS.extend(WORKSHOP_NOTEBOOKS)

NOTEBOOK_TIMEOUT = 600
print(f" Zaplanowano: {len(ALL_NOTEBOOKS)} notebooków")

In [None]:
results = []
print("=" * 70)
print(f" START FULL PIPELINE: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("=" * 70)

pipeline_start = time.time()

for i, (name, path) in enumerate(ALL_NOTEBOOKS, 1):
    print(f"\n[{i}/{len(ALL_NOTEBOOKS)}] ▶️ {name}")
    start_time = time.time()
    
    try:
        result = dbutils.notebook.run(path, NOTEBOOK_TIMEOUT)
        elapsed = time.time() - start_time
        results.append({"#": i, "notebook": name, "status": " SUCCESS", "time_sec": round(elapsed, 2)})
        print(f"          SUCCESS ({elapsed:.2f}s)")
    except Exception as e:
        elapsed = time.time() - start_time
        results.append({"#": i, "notebook": name, "status": " FAILED", "time_sec": round(elapsed, 2), "error": str(e)[:150]})
        print(f"          FAILED ({elapsed:.2f}s)")

total = time.time() - pipeline_start
print(f"\n KONIEC: {total:.1f}s ({total/60:.1f} min)")

In [None]:
import pandas as pd

df = pd.DataFrame(results)
success = len([r for r in results if 'SUCCESS' in r['status']])
failed = len([r for r in results if 'FAILED' in r['status']])

demo_ok = len([r for r in results if 'Demo' in r['notebook'] and 'SUCCESS' in r['status']])
workshop_ok = len([r for r in results if 'Workshop' in r['notebook'] and 'SUCCESS' in r['status']])

print(f"\n Demo: {demo_ok}/8")
print(f" Workshop: {workshop_ok}/4")
print(f"\n Łącznie: {success}/{len(results)}")
print(f" Błędy: {failed}")

display(df[['#', 'notebook', 'status', 'time_sec']])

if failed == 0:
    print("\n" + "" * 15 + "\n WSZYSTKIE TESTY PRZESZŁY!\n" + "" * 15)
else:
    print(f"\n️ {failed} BŁĘDÓW:")
    for r in results:
        if 'FAILED' in r['status']:
            print(f" {r['notebook']}: {r.get('error', 'Unknown error')}")