In [None]:
import sys
from pathlib import Path

# Adding the repository root
cwd = Path.cwd().resolve()
repo_root = cwd if (cwd / "src").exists() else next((p for p in cwd.parents if (p / "src").exists()), cwd)
sys.path.insert(0, str(repo_root))
print('Added to sys.path:', repo_root)


Added to sys.path: C:\Users\WT\Desktop\ChildSafe detc


In [15]:
import os
import pandas as pd
import matplotlib.pyplot as plt
from src.data.synth_generator import generate

# Ensure artifacts directory exists
os.makedirs('artifacts', exist_ok=True)

# Generate synthetic data
df = generate()
if 'is_child' not in df.columns:
    raise KeyError("'is_child' column not found in generated dataframe")

feature_cols = [c for c in df.columns if c != 'is_child']
for col in feature_cols:
    plt.figure()
    df[df['is_child'] == 1][col].hist(alpha=0.5, label='Child', bins=30)
    df[df['is_child'] == 0][col].hist(alpha=0.5, label='Adult', bins=30)
    plt.title(f'{col} Distribution')
    plt.legend()
    plt.savefig(f'artifacts/{col}_dist.png')
    plt.close()


In [19]:
from sklearn.calibration import calibration_curve
from src.evaluate import ensemble_predict
import joblib
import os
import matplotlib.pyplot as plt

model_path = '../src/artifacts/lgbm_ensemble.joblib'
if not os.path.exists(model_path):
    print(f"Model not found at {model_path}. Skipping calibration plot.")
else:
    models = joblib.load(model_path)
    X, y = df.drop(columns=['is_child']), df['is_child']
    probs = ensemble_predict(models, X)
    prob_true, prob_pred = calibration_curve(y, probs, n_bins=10)
    plt.figure()
    plt.plot(prob_pred, prob_true, marker='.')
    plt.plot([0, 1], [0, 1], linestyle='--')
    plt.title('Calibration Plot')
    plt.savefig('../src/artifacts/calibration.png')
    plt.close()