In [1]:
import numpy as np
import os
from sklearn.svm import SVC
from sklearn.model_selection import GroupKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.impute import SimpleImputer
import pandas as pd

from utils import load_labels_from_dataset, load_features_from_dataset

# Set random seeds for reproducibility
np.random.seed(42)

In [2]:
train_df = pd.read_csv(os.path.join('datasets', 'DAIC-WOZ', 'train_split_Depression_AVEC2017.csv'))
dev_df = pd.read_csv(os.path.join('datasets', 'DAIC-WOZ', 'dev_split_Depression_AVEC2017.csv'))
test_df = pd.read_csv(os.path.join('datasets', 'DAIC-WOZ', 'full_test_split.csv'))

dataset_name = "DAIC-WOZ-Cleaned"

y_train = load_labels_from_dataset(train_df)
y_dev = load_labels_from_dataset(dev_df) 
y_test = load_labels_from_dataset(test_df)

X_train_artic = load_features_from_dataset(train_df, dataset_name, 'articulation')
X_dev_artic = load_features_from_dataset(dev_df, dataset_name, 'articulation') 
X_test_artic = load_features_from_dataset(test_df, dataset_name, 'articulation')

X_train_phona = load_features_from_dataset(train_df, dataset_name, 'phonation')
X_dev_phona = load_features_from_dataset(dev_df, dataset_name, 'phonation') 
X_test_phona = load_features_from_dataset(test_df, dataset_name, 'phonation')

X_train_proso = load_features_from_dataset(train_df, dataset_name, 'prosody')
X_dev_proso = load_features_from_dataset(dev_df, dataset_name, 'prosody')
X_test_proso = load_features_from_dataset(test_df, dataset_name, 'prosody')

print(f"Training label distribution: {np.bincount(y_train)}")
print(f"Dev label distribution: {np.bincount(y_dev)}")
print(f"Test label distribution: {np.bincount(y_test)}")

imputer_proso = SimpleImputer(strategy='mean')
X_train_proso = imputer_proso.fit_transform(X_train_proso)
X_dev_proso = imputer_proso.transform(X_dev_proso) 
X_test_proso = imputer_proso.transform(X_test_proso)

# Scale features
scaler_artic = StandardScaler()
X_train_artic_scaled = scaler_artic.fit_transform(X_train_artic)
X_dev_artic_scaled = scaler_artic.transform(X_dev_artic) 
X_test_artic_scaled = scaler_artic.transform(X_test_artic)

scaler_phona = StandardScaler()
X_train_phona_scaled = scaler_phona.fit_transform(X_train_phona)
X_dev_phona_scaled = scaler_phona.transform(X_dev_phona) 
X_test_phona_scaled = scaler_phona.transform(X_test_phona)

scaler_proso = StandardScaler()
X_train_proso_scaled = scaler_proso.fit_transform(X_train_proso)
X_dev_proso_scaled = scaler_proso.transform(X_dev_proso)
X_test_proso_scaled = scaler_proso.transform(X_test_proso)

# Create baseline by fusing all three feature types
X_train_baseline = np.concatenate([X_train_artic_scaled, X_train_phona_scaled, X_train_proso_scaled], axis=1)
X_dev_baseline = np.concatenate([X_dev_artic_scaled, X_dev_phona_scaled, X_dev_proso_scaled], axis=1)
X_test_baseline = np.concatenate([X_test_artic_scaled, X_test_phona_scaled, X_test_proso_scaled], axis=1)

scaler_baseline = StandardScaler()
X_train_baseline_scaled = scaler_baseline.fit_transform(X_train_baseline)
X_dev_baseline_scaled = scaler_baseline.fit_transform(X_dev_baseline) 
X_test_baseline_scaled = scaler_baseline.fit_transform(X_test_baseline)

Training label distribution: [77 30]
Dev label distribution: [23 12]
Test label distribution: [33 14]


In [3]:
def train_svm(X_train, y_train, X_dev, y_dev):
    best_score = 0
    best_params = None
    best_model = None

    param_grid = {
        'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
        'gamma': [1e-3, 1e-2, 1e-1, 1, 'scale', 'auto'],
        'kernel': ['rbf', 'linear'],
        'class_weight': ['balanced']
    }

    for C in param_grid['C']:
        for gamma in param_grid['gamma']:
            for kernel in param_grid['kernel']:
                for class_weight in param_grid['class_weight']:
                    if kernel == 'linear' and gamma not in ['scale', 'auto']:
                        continue  # Linear non usa gamma

                    # Addestra sul training set
                    svm = SVC(C=C, gamma=gamma, kernel=kernel, random_state=42, class_weight=class_weight)
                    svm.fit(X_train, y_train)

                    # Valuta sul dev set
                    y_dev_pred = svm.predict(X_dev)
                    dev_accuracy = accuracy_score(y_dev, y_dev_pred)

                    if dev_accuracy > best_score:
                        best_score = dev_accuracy
                        best_params = {'C': C, 'gamma': gamma, 'kernel': kernel, 'class_weight': class_weight}
                        best_model = svm
                        
    print(f"Best parameters: {best_params}")
    print(f"Best dev accuracy: {best_score:.3f}")

    return best_model

In [4]:
'''
y_train = np.concatenate([y_train, y_dev], axis=0)
full_train_df = pd.concat([train_df, dev_df], ignore_index=True)
X_train_artic = load_features_from_dataset(full_train_df, dataset_name, 'articulation')
X_test_artic = load_features_from_dataset(test_df, dataset_name, 'articulation')

X_train_phona = load_features_from_dataset(full_train_df, dataset_name, 'phonation')
X_test_phona = load_features_from_dataset(test_df, dataset_name, 'phonation')

X_train_proso = load_features_from_dataset(full_train_df, dataset_name, 'prosody')
X_test_proso = load_features_from_dataset(test_df, dataset_name, 'prosody')

imputer_proso = SimpleImputer(strategy='mean')
X_train_proso = imputer_proso.fit_transform(X_train_proso)
X_test_proso = imputer_proso.transform(X_test_proso)
scaler_artic = StandardScaler()
X_train_artic_scaled = scaler_artic.fit_transform(X_train_artic)
X_test_artic_scaled = scaler_artic.transform(X_test_artic)
scaler_phona = StandardScaler()
X_train_phona_scaled = scaler_phona.fit_transform(X_train_phona)
X_test_phona_scaled = scaler_phona.transform(X_test_phona)
scaler_proso = StandardScaler()
X_train_proso_scaled = scaler_proso.fit_transform(X_train_proso)
X_test_proso_scaled = scaler_proso.transform(X_test_proso)
X_train_baseline = np.concatenate([X_train_artic_scaled, X_train_phona_scaled, X_train_proso_scaled], axis=1)
X_test_baseline = np.concatenate([X_test_artic_scaled, X_test_phona_scaled, X_test_proso_scaled], axis=1)
scaler_baseline = StandardScaler()
X_train_baseline_scaled = scaler_baseline.fit_transform(X_train_baseline)
X_test_baseline_scaled = scaler_baseline.fit_transform(X_test_baseline)

def train_svm(X, y):
    param_grid = {
        'C': [1e-3, 1e-2, 1e-1, 1, 10, 100, 1000],
        'gamma': [1e-3, 1e-2, 1e-1, 1, 'scale', 'auto'],
        'kernel': ['rbf', 'linear'],
        'class_weight': ['balanced', None]
    }

    groups = full_train_df['Participant_ID']

    group_kfold = GroupKFold(n_splits=10)

    best_score = 0
    best_params = None
    best_model = None

    for C in param_grid['C']:
        for gamma in param_grid['gamma']:
            for kernel in param_grid['kernel']:
                for class_weight in param_grid['class_weight']:
                    if kernel == 'linear' and gamma not in ['scale', 'auto']:
                        continue  # 'gamma' non si applica a kernel lineare

                    accuracies = []
                    
                    for train_idx, val_idx in group_kfold.split(X, y, groups):
                        X_train_fold, X_val_fold = X[train_idx], X[val_idx]
                        y_train_fold, y_val_fold = y[train_idx], y[val_idx]

                        model = SVC(C=C, gamma=gamma, kernel=kernel,
                                    class_weight=class_weight, random_state=42)
                        model.fit(X_train_fold, y_train_fold)
                        y_val_pred = model.predict(X_val_fold)
                        acc = accuracy_score(y_val_fold, y_val_pred)
                        accuracies.append(acc)

                    mean_accuracy = np.mean(accuracies)

                    if mean_accuracy > best_score:
                        best_score = mean_accuracy
                        best_params = {'C': C, 'gamma': gamma, 'kernel': kernel, 'class_weight': class_weight}
                        best_model = SVC(C=C, gamma=gamma, kernel=kernel,
                                         class_weight=class_weight, random_state=42)
                        best_model.fit(X, y)  # retrain su tutto il training set

    best_model = SVC(C=C, gamma=gamma, kernel=kernel,
                     class_weight=class_weight, random_state=42)
    best_model.fit(X, y)  # retrain su tutto il training set
    print(f"\nBest parameters: {best_params}")
    print(f"Best mean CV accuracy: {best_score:.3f}")
    return best_model
'''

'\ny_train = np.concatenate([y_train, y_dev], axis=0)\nfull_train_df = pd.concat([train_df, dev_df], ignore_index=True)\nX_train_artic = load_features_from_dataset(full_train_df, dataset_name, \'articulation\')\nX_test_artic = load_features_from_dataset(test_df, dataset_name, \'articulation\')\n\nX_train_phona = load_features_from_dataset(full_train_df, dataset_name, \'phonation\')\nX_test_phona = load_features_from_dataset(test_df, dataset_name, \'phonation\')\n\nX_train_proso = load_features_from_dataset(full_train_df, dataset_name, \'prosody\')\nX_test_proso = load_features_from_dataset(test_df, dataset_name, \'prosody\')\n\nimputer_proso = SimpleImputer(strategy=\'mean\')\nX_train_proso = imputer_proso.fit_transform(X_train_proso)\nX_test_proso = imputer_proso.transform(X_test_proso)\nscaler_artic = StandardScaler()\nX_train_artic_scaled = scaler_artic.fit_transform(X_train_artic)\nX_test_artic_scaled = scaler_artic.transform(X_test_artic)\nscaler_phona = StandardScaler()\nX_train_

In [5]:
#'''
svm_arti = train_svm(X_train_artic_scaled, y_train, X_dev_artic_scaled, y_dev)
svm_phon = train_svm(X_train_phona_scaled, y_train, X_dev_phona_scaled, y_dev)
svm_proso = train_svm(X_train_proso_scaled, y_train, X_dev_proso_scaled, y_dev)
svm_baseline = train_svm(X_train_baseline_scaled, y_train, X_dev_baseline_scaled, y_dev)

'''
svm_arti = train_svm(X_train_artic_scaled, y_train)
svm_phon = train_svm(X_train_phona_scaled, y_train)
svm_proso = train_svm(X_train_proso_scaled, y_train)
svm_baseline = train_svm(X_train_baseline_scaled, y_train)
'''

y_test_pred_artic = svm_arti.predict(X_test_artic_scaled)
y_test_pred_phona = svm_phon.predict(X_test_phona_scaled)
y_test_pred_proso = svm_proso.predict(X_test_proso_scaled)
y_test_pred_baseline = svm_baseline.predict(X_test_baseline_scaled)

print("\nClassification Report (Art):")
print(classification_report(y_test, y_test_pred_artic))
tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred_artic).ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
print(f"Sensitivity: {sensitivity:.3f}")
print(f"Specificity: {specificity:.3f}")

print("\nClassification Report (Phon):")
print(classification_report(y_test, y_test_pred_phona))
tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred_phona).ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
print(f"Sensitivity: {sensitivity:.3f}")
print(f"Specificity: {sensitivity:.3f}")

print("\nClassification Report (Proso):")
print(classification_report(y_test, y_test_pred_proso))
tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred_proso).ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
print(f"Sensitivity: {sensitivity:.3f}")
print(f"Specificity: {specificity:.3f}")

print("\nClassification Report (Baseline - Fused Features):")
print(classification_report(y_test, y_test_pred_baseline))
tn, fp, fn, tp = confusion_matrix(y_test, y_test_pred_baseline).ravel()
sensitivity = tp / (tp + fn)
specificity = tn / (tn + fp)
print(f"Sensitivity: {sensitivity:.3f}")
print(f"Specificity: {specificity:.3f}")

Best parameters: {'C': 0.1, 'gamma': 'scale', 'kernel': 'linear', 'class_weight': 'balanced'}
Best dev accuracy: 0.657
Best parameters: {'C': 1, 'gamma': 0.1, 'kernel': 'rbf', 'class_weight': 'balanced'}
Best dev accuracy: 0.714
Best parameters: {'C': 10, 'gamma': 'scale', 'kernel': 'linear', 'class_weight': 'balanced'}
Best dev accuracy: 0.686
Best parameters: {'C': 1, 'gamma': 0.001, 'kernel': 'rbf', 'class_weight': 'balanced'}
Best dev accuracy: 0.714

Classification Report (Art):
              precision    recall  f1-score   support

           0       0.69      0.61      0.65        33
           1       0.28      0.36      0.31        14

    accuracy                           0.53        47
   macro avg       0.48      0.48      0.48        47
weighted avg       0.57      0.53      0.55        47

Sensitivity: 0.357
Specificity: 0.606

Classification Report (Phon):
              precision    recall  f1-score   support

           0       0.68      0.85      0.76        33
      