In [2]:
from src.crispdm.api.preview_facade_api import run_preview
from pathlib import Path
from crispdm.core.logging_utils_core import init_logging, build_log_file, get_logger

def find_project_root(start: Path | None = None) -> Path:
    p = (start or Path.cwd()).resolve()
    for parent in [p, *p.parents]:
        if (parent / "pyproject.toml").exists() or (parent / ".git").exists():
            return parent
    raise RuntimeError("Project root not found (pyproject.toml/.git)")

PROJECT_ROOT = find_project_root()
CONFIG_ROOT = PROJECT_ROOT / "config"

# ---------- Paths (IMPORTANT) ----------
preset_path = CONFIG_ROOT / "pipelines" / "classification_pipeline_config.yml"
dataset_cfg_path = CONFIG_ROOT / "datasets" / "dataset_config.yml"
# ---------- Logging ----------
OUT_ROOT = PROJECT_ROOT / "out"

log_file = build_log_file(output_root=OUT_ROOT, run_name="config_smoke_test")
init_logging(log_file, level="DEBUG")
log = get_logger(__name__)
log.info("=== CONFIG SMOKE TEST START ===")
res = run_preview(
    pipeline_config_path = preset_path,
    dataset_config_path = dataset_cfg_path,
    dataset_key="microsoft_security_incident" ,
    notebook_vars={
        "dataset_path": None,
        "target_col": None,
        "time_col": None,
        "id_cols": None,
        "output_root": str(OUT_ROOT),
    }
)
res.suggestions



[10:35:36] [DEBUG] MPPRAI - Logging initialized. log_file=K:\00_Code\DataScience\Project_DS_Microsoft_Security_Incident_Prediction\out\logs\config_smoke_test_20260203_103536.log level=DEBUG
[10:35:36] [INFO] MPPRAI.__main__ - === CONFIG SMOKE TEST START ===
[10:35:36] [INFO] MPPRAI.src.crispdm.api.preview_facade_api - Start [build_factory_config.build_preview_config]...with params: pipeline_config_path=K:\00_Code\DataScience\Project_DS_Microsoft_Security_Incident_Prediction\config\pipelines\classification_pipeline_config.yml dataset_config_path=K:\00_Code\DataScience\Project_DS_Microsoft_Security_Incident_Prediction\config\datasets\dataset_config.yml dataset_key=microsoft_security_incident notebook_vars={'dataset_path': None, 'target_col': None, 'time_col': None, 'id_cols': None, 'output_root': 'K:\\00_Code\\DataScience\\Project_DS_Microsoft_Security_Incident_Prediction\\out'}
[10:35:36] [INFO] MPPRAI.crispdm.config.build_factory_config - Start [build_preview_config] notebooks_vars={'d

{'dataset_path': 'data/raw/train/GUIDE_Train.csv',
 'rows_profiled': 200000,
 'cols': 45,
 'suggestions': {'target_candidates': ['IncidentGrade',
   'Category',
   'EntityType',
   'EvidenceRole',
   'RegistryKey',
   'RegistryValueName',
   'RegistryValueData',
   'ApplicationName',
   'ResourceIdName',
   'OSFamily'],
  'time_candidates': ['Timestamp'],
  'id_candidates': ['Id',
   'OrgId',
   'IncidentId',
   'AlertId',
   'DetectorId',
   'DeviceId',
   'AccountSid',
   'AccountObjectId',
   'NetworkMessageId',
   'EmailClusterId']},
 'artifacts': {'profile_table_png': 'K:/00_Code/DataScience/Project_DS_Microsoft_Security_Incident_Prediction/out/tables_png/stage2/stage2_profile_top25.png'}}