# Prosodic Feature-Based Behaviour Classification

This notebook demonstrates the complete workflow from data loading to model evaluation and interpretability analysis.


In [None]:
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Add src to path
sys.path.insert(0, str(Path('..') / 'src'))

from utils.config import load_config
from utils.logger import setup_logger
from data_loader import load_ravdess, load_crema_d, load_savee
from preprocessing import AudioPreprocessor
from feature_extraction import ProsodicFeatureExtractor
from models import ModelTrainer
from evaluation import Evaluator
from visualization import Plotter


## 1. Load Configuration


In [None]:
config = load_config('../configs/experiment.yaml')
print(f"Experiment: {config.experiment_name}")
print(f"Datasets: {config.datasets}")
print(f"Model: {config.model.model_type}")


## 2. Load Datasets


In [None]:
# Load datasets
datasets = []

if 'ravdess' in config.datasets:
    df_ravdess = load_ravdess(data_dir=f"{config.data_dir}/ravdess", download=True)
    datasets.append(df_ravdess)

if 'crema_d' in config.datasets:
    df_crema = load_crema_d(data_dir=f"{config.data_dir}/crema_d", download=True)
    datasets.append(df_crema)

if 'savee' in config.datasets:
    df_savee = load_savee(data_dir=f"{config.data_dir}/savee", download=True)
    datasets.append(df_savee)

# Combine datasets
data_df = pd.concat(datasets, ignore_index=True)
print(f"Total samples: {len(data_df)}")
print(f"\nLabel distribution:")
print(data_df['label'].value_counts())


## 3. Extract Features


In [None]:
# Initialize feature extractor
extractor = ProsodicFeatureExtractor(
    extract_prosodic=config.feature_extraction.extract_prosodic,
    extract_spectral=config.feature_extraction.extract_spectral,
    extract_mfcc=config.feature_extraction.extract_mfcc,
    extract_formants=config.feature_extraction.extract_formants
)

# Extract features (using a subset for demonstration)
sample_df = data_df.head(100)  # Use first 100 files for demo
features_list = []

for idx, row in sample_df.iterrows():
    try:
        features = extractor.extract_from_file(row['file_path'])
        features['label'] = row['label']
        features['speaker_id'] = row['speaker_id']
        features_list.append(features)
    except Exception as e:
        print(f"Error processing {row['file_path']}: {e}")

features_df = pd.DataFrame(features_list)
print(f"Extracted features for {len(features_df)} files")
print(f"Feature columns: {len([c for c in features_df.columns if c not in ['label', 'speaker_id', 'file_path']])}")


## 4. Train and Evaluate Model


In [None]:
# Initialize trainer
trainer = ModelTrainer(
    model_type=config.model.model_type,
    hyperparameters=getattr(config.model, config.model.model_type, {}),
    random_state=config.seed,
    test_size=config.model.test_size,
    cv_folds=config.model.cv_folds,
    speaker_independent=config.model.speaker_independent
)

# Prepare data
X, y, speaker_ids, feature_names = trainer.prepare_data(features_df)
print(f"Feature matrix shape: {X.shape}")
print(f"Number of classes: {len(np.unique(y))}")

# Split data
X_train, X_test, y_train, y_test = trainer.split_data(X, y, speaker_ids)
print(f"Train set: {len(X_train)}, Test set: {len(X_test)}")

# Train model
train_metrics = trainer.train(X_train, y_train)
print(f"\nTraining metrics: {train_metrics}")

# Evaluate on test set
test_metrics = trainer.evaluate(X_test, y_test)
print(f"\nTest Accuracy: {test_metrics['accuracy']:.4f}")
print(f"Test F1 (Macro): {test_metrics['f1_macro']:.4f}")
print(f"Test UAR: {test_metrics['uar']:.4f}")


## 5. Generate Visualizations


In [None]:
# Generate visualizations
evaluator = Evaluator(results_dir='../results')
evaluator.plot_confusion_matrix(
    y_test,
    np.array(test_metrics['y_pred']),
    trainer.label_encoder.classes_,
    'notebook_analysis'
)

evaluator.plot_roc_curves(
    y_test,
    np.array(test_metrics['y_pred_proba']),
    trainer.label_encoder.classes_,
    'notebook_analysis'
)
