### Sound Snippet Extraction
Automation scripts for extracting labeled .wav snippets with duration of 1 sec from session recordings

##### Extracting and labeling food intake snippets

In [2]:
import wave
import csv
import os
from decimal import Decimal

In [10]:
# Open the wav file
def extract_snippets(wav_file, label_file, output_dir):
    with wave.open(wav_file, 'rb') as wav:
        params = wav.getparams()
        framerate = wav.getframerate()
        n_channels = wav.getnchannels()
        sampwidth = wav.getsampwidth()

        
        # Read the frames from the wav file
        frames = wav.readframes(wav.getnframes())
        
        # Create the output directory if it doesn't exist
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        
        # Read the csv file
        with open(label_file, 'r') as labelfile:
            reader = csv.DictReader(labelfile, delimiter=';')
            for row in reader:
                start_time = Decimal(row['start_time'])
                end_time = Decimal(row['end_time'])
                label = row['label']
                
                # Create the output wav file
                if label in {'chewing', 'swallowing'}:
                    for i in range(-1,2):
                        shifted_start_time = start_time + Decimal(i * 0.25)
                        shifted_end_time = end_time + Decimal(i * 0.25)
                        shifted_start_frame = int(shifted_start_time * framerate)
                        shifted_end_frame = int(shifted_end_time * framerate)
                        
                        # Extract the frames for the segment
                        segment_frames = frames[shifted_start_frame * n_channels * sampwidth:shifted_end_frame * n_channels * sampwidth]
                        output_wav_file = os.path.join(output_dir, f"{label}_{row['food_type']}_{shifted_start_time}_{shifted_end_time}.wav")
                        with wave.open(output_wav_file, 'wb') as output_wav:
                            output_wav.setparams(params)
                            output_wav.writeframes(segment_frames)
                elif label in {'others', 'resting'}: # others and resting are labeld as longer segements -> split them into 1 second segments
                    i = start_time
                    while (i + 1)<= end_time:
                        start_frame = int(i * framerate)
                        end_frame = int(((i + 1)) * framerate)
                        segment_frames = frames[start_frame * n_channels * sampwidth:end_frame * n_channels * sampwidth]
                        output_wav_file = os.path.join(output_dir, f"{label}_{i}_{i + 1}.wav")
                        with wave.open(output_wav_file, 'wb') as output_wav:
                            output_wav.setparams(params)
                            output_wav.writeframes(segment_frames)
                        i += 1

In [None]:
base_dir = '/Users/jannisdaiber/Documents/Repos/github/ProjectMedicalWearables/Database'
label_dir = base_dir + '/label_metadata'

for file in os.listdir(label_dir):
    if not file.endswith('.csv'):
        continue

    participant = file.split('_')[0]
    session = file.split('_')[1].split('.')[0]

    label_file = os.path.join(label_dir, file)
    wav_file = base_dir + '/raw_wavs' + '/' + participant + '/' + session + '/' + participant + '_' + session + '.wav'
    output_dir =  base_dir + '/snippet_wavs' + '/' + participant + '/' + session
    extract_snippets(wav_file, label_file, output_dir)