### Spectrograms Extraction
Automation script for extracting spectrograms from .wav snippets and creating metadata csv file.

In [7]:
import librosa
import scipy
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
import csv

def high_pass_filter(data, sr, cutoff=500, order=4):
    nyquist = 0.5 * sr  # Nyquist frequency
    normal_cutoff = cutoff / nyquist  # Normalize cutoff frequency
    # Design filter
    b, a = scipy.signal.butter(order, normal_cutoff, btype="high", analog=False)
    # Apply filter
    filtered_data = scipy.signal.filtfilt(b, a, data)
    return filtered_data

def audio_to_spectrogram(file_path, cutoff_frequency=40, resampling_rate = 7920, fft_size=511, hop_length=31):
    audio_data, sample_rate = librosa.load(file_path, sr=resampling_rate) #loading and resampling

    # Apply a highpass filter with a cutoff frequency of 40Hz
    audio_data = high_pass_filter(audio_data, sample_rate, cutoff=cutoff_frequency)

    # Compute Short-Time Fourier Transform (STFT)
    S = librosa.stft(audio_data, n_fft=fft_size, hop_length=hop_length ,window='hann')
    S_dB = librosa.amplitude_to_db(np.abs(S), ref=np.max)
    
    return S_dB

In [9]:
base_path_wav = '/Users/jannisdaiber/Documents/Repos/github/ProjectMedicalWearables/Database/snippet_wavs'
base_path_spectrograms = '/Users/jannisdaiber/Documents/Repos/github/ProjectMedicalWearables/Database/spectrograms'
metadata = []
food_type = None
label_class = None

chewing_count = 0
swallowing_count = 0
others_count = 0
resting_count = 0

for root, directories, files in os.walk(base_path_wav):
    for dir in directories:
        if 'participant' in dir:
            for participant_root, _, wav_snippets in os.walk(os.path.join(base_path_wav, dir)):
                for snippet in wav_snippets:
                    if snippet.endswith('.wav'):
                        label_class = snippet.split('_')[0].lower()
                        spectrogram = audio_to_spectrogram(os.path.join(participant_root, snippet))
                
                        if label_class == 'chewing':
                            chewing_count += 1
                            spectrogram_path = os.path.join(base_path_spectrograms, dir, label_class + str(chewing_count) + '.npy')
                            food_type = snippet.split('_')[1].lower()
                            np.save(spectrogram_path, spectrogram)
                        elif label_class == 'swallowing':
                            swallowing_count += 1
                            spectrogram_path = os.path.join(base_path_spectrograms, dir, label_class + str(swallowing_count) + '.npy')
                            food_type = snippet.split('_')[1].lower()
                            np.save(spectrogram_path, spectrogram)
                        elif label_class == 'others':
                            others_count += 1
                            spectrogram_path = os.path.join(base_path_spectrograms, dir, label_class + str(others_count)  + '.npy')
                            food_type = "none"
                            np.save(spectrogram_path, spectrogram)
                        elif label_class == 'resting':
                            resting_count += 1
                            spectrogram_path = os.path.join(base_path_spectrograms, dir, label_class + str(resting_count) + '.npy')
                            food_type = "none"
                            np.save(spectrogram_path, spectrogram)

                        metadata.append({'participant': dir, 'label': label_class, 'spectrogram_path': spectrogram_path, 'food_type': food_type})

with open(os.path.join(base_path_spectrograms, 'metadata.csv'), 'w', newline='') as csvfile:
    fieldnames = ["participant", "label", "spectrogram_path", "food_type"]
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(metadata)                    