# Differential Privacy: Insurance Dataset


In [None]:
from pathlib import Path
import sys

import matplotlib.pyplot as plt

repo_root = Path('..').resolve()
sys.path.append(str(repo_root / 'src'))

from dp.pipeline import load_dataset
from dp.models import build_model_registry
from dp.evaluation import privacy_utility_sweep, plot_privacy_utility, plot_roc_curves


In [None]:
DATA_PATH = repo_root / 'data' / 'insurance.csv'
df = load_dataset(DATA_PATH)
df.head()


In [None]:
sweep = privacy_utility_sweep(
    df,
    target='smoker',
    epsilons=[0.1, 0.5, 1.0],
    mechanism='laplace',
    random_state=42,
    models=build_model_registry(),
)
sweep.results


In [None]:
fig = plot_privacy_utility(sweep.results)
fig


In [None]:
fig = plot_roc_curves(sweep.roc_curves['1.0'], title='ROC Curves (epsilon=1.0)')
fig
