# Stage 0 LightGBM Baseline

In [None]:
import sys
from pathlib import Path
ROOT = Path('..').resolve()
if str(ROOT) not in sys.path:
    sys.path.insert(0, str(ROOT))
ROOT

In [None]:
from src.data.downsample import DownsampleConfig, create_downsampled_dataset
from src.train.lightgbm_runner import LightGBMConfig, train_lightgbm
from src.train.log_utils import append_metric, MetricRecord, default_run_id


## Generate / Load Downsampled Dataset

In [None]:
cfg = DownsampleConfig(
    raw_path='../data/train.parquet',
    output_path='../data/processed/train_downsample_1_2.parquet',
    negative_multiplier=2.0,
    seed=42,
    shuffle=True,
)
downsample_path = create_downsampled_dataset(cfg)
downsample_path

## Train LightGBM

In [None]:
lgb_cfg = LightGBMConfig(
    train_path=str(downsample_path),
    random_state=42,
    enable_wandb=False
)
result = train_lightgbm(lgb_cfg)
result.metrics

## Log Metrics

In [None]:
run_id = result.run_id
append_metric(
    MetricRecord(
        run_id=run_id,
        stage='stage0',
        dataset='validation',
        metrics=result.metrics,
        notes='stage0 baseline (notebook)',
    )
)
run_id