# Quick Inspect: Features & Augmented Samples

This notebook shows how to load `data/features_aggregated.csv`, inspect top features (RF importances + permutation), and preview a couple of augmented audio samples under `data/augmented/sample/`.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import Audio, display
from pathlib import Path

sns.set(style="whitegrid")
REPO = Path('..').resolve() if Path('.').name == 'notebooks' else Path('.')
feats = pd.read_csv(REPO / 'data' / 'features_aggregated.csv')
fi = pd.read_csv(REPO / 'models' / 'baseline' / 'feature_importances.csv')
perm = pd.read_csv(REPO / 'models' / 'baseline' / 'permutation_importance_top10_per_class.csv')

# show head of features
display(feats.head())

# plot top 10 RF importances
top10 = fi.head(10)
plt.figure(figsize=(8,5))
sns.barplot(x='importance', y='feature', data=top10)
plt.title('Top 10 RandomForest feature importances')
plt.tight_layout()
plt.show()

In [None]:
# show permutation top features for a couple of example classes
classes = perm['class'].unique()[:3]  # first 3 classes
for cls in classes:
    sub = perm[perm['class'] == cls].sort_values('importance', ascending=False).head(10)
    print(f'Class: {cls}')
    display(sub)

In [None]:
# preview a couple of augmented audio files (if present)
aug_root = REPO / 'data' / 'augmented' / 'sample'
if aug_root.exists():
    examples = list(aug_root.rglob('*.wav'))[:4]
    for p in examples:
        print(p)
        display(Audio(str(p), rate=22050))
else:
    print('No augmented samples found at', aug_root)