# Experiment Template (Industrial Notebook)

This notebook is a reusable template:
- deterministic seeds
- configuration block
- dataset generation/loading
- split + leakage checks
- baseline model + metrics
- artifact export


In [None]:
from dataclasses import dataclass
import os
import json
import numpy as np
import pandas as pd

SEED = 1337
rng = np.random.default_rng(SEED)

@dataclass
class Config:
    n: int = 5000
    noise: float = 0.5
    out_dir: str = 'artifacts'

cfg = Config()
os.makedirs(cfg.out_dir, exist_ok=True)
cfg

In [None]:
# Example: binary classification synthetic dataset
X1 = rng.normal(0, 1, size=cfg.n)
X2 = rng.normal(0, 1, size=cfg.n)
logits = 1.2 * X1 - 0.8 * X2 + rng.normal(0, cfg.noise, size=cfg.n)
p = 1 / (1 + np.exp(-logits))
y = (p > 0.5).astype(int)

df = pd.DataFrame({'x1': X1, 'x2': X2, 'y': y})
df.head()

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score

X = df[['x1','x2']].values
y = df['y'].values
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.2, random_state=SEED, stratify=y)

pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('clf', LogisticRegression(max_iter=200))
])
pipe.fit(Xtr, ytr)
p = pipe.predict_proba(Xte)[:,1]
print('ROC-AUC', roc_auc_score(yte, p))
print('AP', average_precision_score(yte, p))

In [None]:
# Export artifacts
import joblib
model_path = os.path.join(cfg.out_dir, 'baseline.joblib')
joblib.dump(pipe, model_path)
with open(os.path.join(cfg.out_dir, 'run.json'), 'w', encoding='utf-8') as f:
    json.dump({'seed': SEED, 'n': cfg.n, 'noise': cfg.noise}, f, indent=2)
model_path