In [1]:
import os
import librosa
import numpy as np
import pandas as pd
import noisereduce as nr
from scipy.fftpack import dct
from scipy.signal import lfilter

In [2]:
df = pd.read_csv('../labeled.csv')

In [3]:
df

Unnamed: 0,Participant_ID,PHQ8_Binary
0,300,0
1,301,0
2,302,0
3,303,0
4,304,0
...,...,...
184,488,0
185,489,0
186,490,0
187,491,0


In [4]:
def compute_plp(y, sr, numcep=13, frame_length=0.025, frame_stride=0.01, n_fft=512, n_filters=26):
    emphasized = lfilter([1, -0.97], 1, y)

    frame_len = int(sr * frame_length)
    frame_step = int(sr * frame_stride)
    frames = librosa.util.frame(emphasized, frame_length=frame_len, hop_length=frame_step).T.copy()
    frames *= np.hamming(frame_len)

    mag_frames = np.abs(np.fft.rfft(frames, n_fft))
    pow_frames = (1.0 / n_fft) * (mag_frames ** 2)

    mel_basis = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_filters)
    bark_energies = np.dot(pow_frames, mel_basis.T)

    compressed = bark_energies ** (1 / 3)
    plp = dct(compressed, type=2, axis=1, norm='ortho')[:, :numcep]

    return plp

In [5]:
def extract_mfcc_plp_windows(audio_path, n_mfcc, n_plp, window_size, label):
    y, sr = librosa.load(audio_path, sr=None)
    y, _ = librosa.effects.trim(y)
    y = nr.reduce_noise(y=y, sr=sr)
    y = librosa.util.normalize(y)

    window_length = window_size * sr
    total_samples = len(y)

    feature_list = []
    labels = []

    for start in range(0, total_samples, window_length):
        end = start + window_length
        if end > total_samples:
            break

        window = y[start:end]
        mfcc = librosa.feature.mfcc(y=window, sr=sr, n_mfcc=n_mfcc).T

        plp = compute_plp(window, sr, numcep=n_plp)

        min_frames = min(mfcc.shape[0], plp.shape[0])
        mfcc = mfcc[:min_frames, :]
        plp = plp[:min_frames, :]

        combined = np.concatenate((mfcc, plp), axis=1)
        combined = combined[..., np.newaxis]

        feature_list.append(combined)
        labels.append(label)

    return np.array(feature_list), np.array(labels)

In [6]:
labels_cnn = []
feature_matrix = []

for index, row in df.iterrows():
    pid = row['Participant_ID']
    audio_path = f'../participant_audio/{pid}_AUDIO.wav'

    if os.path.exists(audio_path):
        mfcc_plp_windows, labels = extract_mfcc_plp_windows(audio_path, 13, 13, 5, row['PHQ8_Binary'])
        feature_matrix.append(mfcc_plp_windows)
        labels_cnn.append(labels)

labels_cnn = np.concatenate(labels_cnn)
feature_matrix = np.concatenate(feature_matrix)

In [7]:
np.save('../assets/labels_cnn.npy', labels_cnn)
np.save('../assets/feature_matrix_cnn.npy', feature_matrix)