## Code skeleton example for PriorLabs Fine-Tuning Program

In [1]:
from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Callable, Dict, List, Tuple
import warnings

import numpy as np
import xgboost as xgb

In [2]:
from src.utils import  benchmark_datasets

#### 🧪  Minimal Example Implementation (synthetic data)

In [None]:
# Implement your own
from src.data_processing.custom_datamodule import ExampleDataModule

#### 🚀  Quick sanity check: classification

In [4]:
dm = ExampleDataModule()
X_train_list, y_train_list = dm.test_datasets()

model = xgb.XGBClassifier(
    eval_metric="logloss", random_state=42, n_estimators=200,
)

scores = benchmark_datasets(model, X_train_list, y_train_list)
print("Mean scores across synthetic datasets:")
for name, val in scores.items():
    print(f"  {name:8s}: {val:.4f}")

Datasets:   0%|          | 0/3 [00:00<?, ?it/s]

Datasets: 100%|██████████| 3/3 [00:02<00:00,  1.47it/s]

Mean scores across synthetic datasets:
  MetricType.ACCURACY: 0.8397
  MetricType.ROC_AUC: 0.9262
  MetricType.F1: 0.8396
  MetricType.LOG_LOSS: 0.4956





#### 🚀  Quick sanity check: regression

In [5]:
dm = ExampleDataModule()
X_train_list, y_train_list = dm.test_datasets()

model = xgb.XGBRFRegressor(
    eval_metric="logloss", random_state=42, n_estimators=200,
)

scores = benchmark_datasets(model, X_train_list, y_train_list)
print("Mean scores across synthetic datasets:")
for name, val in scores.items():
    print(f"  {name:8s}: {val:.4f}")

Datasets:  67%|██████▋   | 2/3 [00:01<00:00,  1.12it/s]

Datasets: 100%|██████████| 3/3 [00:02<00:00,  1.14it/s]

Mean scores across synthetic datasets:
  MetricType.RMSE: 0.3216
  MetricType.MSE: 0.1034
  MetricType.MAE: 0.2088



