In [29]:
import os
import numpy as np
from disvoice.articulation import Articulation
import math
from joblib import Parallel, delayed, cpu_count

In [30]:
def process_participant_chunk(participant_chunk, input_dir, output_dir):
    """Processa un chunk di partecipanti in un singolo processo"""    
    
    articulation = Articulation()

    for participant_dir in participant_chunk:
        participant_path = os.path.join(input_dir, participant_dir)
        participant_id = participant_dir.replace("_P", "")
        audio_path = os.path.join(participant_path, f"{participant_id}_AUDIO.wav")
        
        output_participant_path = os.path.join(output_dir, participant_dir)
        os.makedirs(output_participant_path, exist_ok=True)
        output_file = os.path.join(output_participant_path, "articulation_features.npy")

        features = articulation.extract_features_file(audio_path, static=True, plots=False, fmt="npy")
        np.save(output_file, features)

In [31]:
def extract_articulation_features_parallel(input_dir, output_dir):
    """Estrae features di articolazione in parallelo usando joblib"""
    
    # Crea la directory di output se non esiste
    os.makedirs(output_dir, exist_ok=True)
    
    # Trova tutte le directory dei partecipanti che terminano con "_P"
    all_participant_dirs = [d for d in os.listdir(input_dir) 
                           if d.endswith("_P") and os.path.isdir(os.path.join(input_dir, d))]
    
    # Filtra solo i partecipanti che non hanno già le features
    participant_dirs = []
    for participant_dir in all_participant_dirs:
        output_file = os.path.join(output_dir, participant_dir, "articulation_features.npy")
        if not os.path.exists(output_file):
            participant_dirs.append(participant_dir)
    
    n_jobs = cpu_count()  # Numero di processi da utilizzare
    # Dividi le sessioni in chunks
    chunk_size = math.ceil(len(participant_dirs) / n_jobs)
    participant_chunks = [participant_dirs[i:i + chunk_size] for i in range(0, len(participant_dirs), chunk_size)]
    
    print(f"Processing {len(participant_dirs)} participants in {len(participant_chunks)} chunks...")
    
    # Processa i chunks in parallelo
    Parallel(n_jobs=n_jobs, verbose=0)(
        delayed(process_participant_chunk)(chunk, input_dir, output_dir)
        for chunk in participant_chunks
    )

In [32]:
# Elaborazione features
dataset_dir = "datasets/DAIC-WOZ-Cleaned"
output_dir = "features/DAIC-WOZ-Cleaned"
extract_articulation_features_parallel(dataset_dir, output_dir)

Processing 1 participants in 1 chunks...


################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################

