Calculating summed up durations of .wav snippets in database based on labeled category.

In [None]:
import os
import wave

def get_wav_length(file_path):
    """
    Get the duration of a wav file
    Args:
        file_path: path to the wav file
    """
    with wave.open(file_path, 'r') as wav_file:
        frames = wav_file.getnframes()
        rate = wav_file.getframerate()
        duration = frames / float(rate)
        return duration

def categorize_wav_files(base_path):
    """
    Categorize wav files by label class based on their filenames
    Args:
        base_path: path to the directory containing the wav files
    """
    categories = {
        "chewing": 0,
        "swallowing": 0,
        "others": 0,
        "resting": 0
    }

    for root, _, files in os.walk(base_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)
                duration = get_wav_length(file_path)
                category = file.split('_')[0].lower()
                if category in categories:
                    categories[category] += duration

    return categories

In [6]:
base_path = '/Users/jannisdaiber/Documents/Repos/github/ProjectMedicalWearables/Database'
categories = categorize_wav_files(base_path)

for category, total_duration in categories.items():
    print(f"Total duration for {category}: {total_duration} seconds")

Total duration for chewing: 191.0 seconds
Total duration for swallowing: 72.0 seconds
Total duration for others: 38.0 seconds
Total duration for resting: 3.0 seconds
