# 90 · Generative Synthesis
        
        Train a lightweight VAE on tabular features to synthesize anomalies/augment data.
        - Load numeric fraud features (processed parquet).
        - Inspect feature stats.
        - Fit the VAE and plot training loss.


In [None]:
from pathlib import Path
import sys
import pandas as pd
import matplotlib.pyplot as plt

project_root = Path('..').resolve()
src_path = project_root / 'src'
if str(src_path) not in sys.path:
    sys.path.append(str(src_path))

from uais.generative.train_vae import VAEConfig, run_vae_pipeline


In [None]:
# Load numeric features
dataset_path = project_root / 'data' / 'processed' / 'fraud' / 'fraud_features.parquet'
if not dataset_path.exists():
    dataset_path = project_root / 'data' / 'raw' / 'fraud' / 'creditcard.csv'
if not dataset_path.exists():
    raise FileNotFoundError(f'Missing dataset for VAE: {dataset_path}')
df = pd.read_parquet(dataset_path) if dataset_path.suffix == '.parquet' else pd.read_csv(dataset_path)
num_df = df.select_dtypes(include=['number']).dropna()
print('Using dataset:', dataset_path)
print('Rows:', len(num_df), 'Cols:', num_df.shape[1])
print(num_df.describe().head())


In [None]:
# Train VAE and plot loss
config = VAEConfig(dataset_path=dataset_path, latent_dim=8, epochs=5, batch_size=256)
metrics = run_vae_pipeline(config)
print('Recon error mean/std:', metrics['reconstruction_error_mean'], metrics['reconstruction_error_std'])
hist = metrics.get('history', {})
if hist:
    plt.figure(figsize=(5,3))
    plt.plot(hist.get('loss', []), label='train')
    plt.plot(hist.get('val_loss', []), label='val')
    plt.legend(); plt.title('VAE loss'); plt.xlabel('epoch'); plt.ylabel('loss'); plt.tight_layout(); plt.show()
