# Quickstart: Synthetic Data Training
# Generate synthetic data, train a small model, and evaluate.

In [1]:
# Generate tiny synthetic dataset
import os, sys
from pathlib import Path

# Ensure project root is on sys.path so we can import `scripts` and `src`
# Notebook is in `<project>/notebook`; add parent directory to Python path.
PROJECT_ROOT = str(Path.cwd().parent)
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

from scripts.generate_synthetic import make_file

DATA_DIR = 'data/nb_quick'
os.makedirs(DATA_DIR, exist_ok=True)
labels = ['Healthy','Bearing','BrokenRotorBar','StatorShort']
for i, lab in enumerate(labels):
    make_file(os.path.join(DATA_DIR, f'short_{i}_{lab}.csv'), length=2048, fs=1000, label=lab)
print('Files generated:', os.listdir(DATA_DIR))

Files generated: ['short_0_Healthy.csv', 'short_1_Bearing.csv', 'short_2_BrokenRotorBar.csv', 'short_3_StatorShort.csv']


In [5]:
# Prepare config overrides
import yaml, json, os
cfg = yaml.safe_load(open('../config.yaml'))['defaults']
cfg.update({'epochs': 3, 'batch_size': 8, 'save_dir': 'runs/nb_demo', 'augment': {'enabled': True, 'noise': True, 'noise_snr_db': 25, 'scale': True, 'time_shift': True, 'freq_aug': False}})
os.makedirs(cfg['save_dir'], exist_ok=True)
json.dump(cfg, open(cfg['save_dir'] + '/cfg_snapshot.json','w'), indent=2)
cfg

{'window_length': 3000,
 'hop_length': 1500,
 'batch_size': 8,
 'epochs': 3,
 'lr': '1e-3',
 'seed': 42,
 'num_workers': 4,
 'classes': ['Healthy', 'Bearing', 'BrokenRotorBar', 'StatorShort'],
 'use_gpu': True,
 'save_dir': 'runs/nb_demo',
 'scheduler': 'plateau',
 'plateau_patience': 5,
 'early_stop_patience': 10,
 'mixed_precision': False,
 'oversample': False,
 'prefer_voltage': False,
 'include_aux': False,
 'scaling_type': 'zscore',
 'dropout': 0.5,
 'base_channels': 64,
 'augment': {'enabled': True,
  'noise': True,
  'noise_snr_db': 25,
  'scale': True,
  'time_shift': True,
  'freq_aug': False}}

In [2]:
# Train
from src.train import Trainer
trainer = Trainer(cfg, 'data/nb_quick')
trainer.fit()

ModuleNotFoundError: No module named 'src'

In [None]:
# Evaluate best checkpoint
from src.eval import evaluate_checkpoint
classes = cfg['classes']
metrics = evaluate_checkpoint('runs/nb_demo/best.pth', 'data/nb_quick', classes, save_dir='runs/nb_demo/eval')
metrics