In [44]:
import librosa
import os
import pandas as pd
import numpy as np
import parselmouth  
from scipy.signal import lfilter
from scipy.fftpack import fft
from sklearn.preprocessing import StandardScaler
import numpy as np
from scipy.signal import lfilter
import speechpy

In [45]:
audio_directory = '../../data/Task 1/'
n_mels = 128 
fmax = None   
feature_list = []
labels = []
file_names = [] 

In [46]:
def lpc(y, order):
    # Calculate the autocorrelation of the signal
    n = len(y)
    r = np.correlate(y, y, mode='full')[n-1:]
    
    # Solve the Yule-Walker equations for LPC coefficients
    R = np.zeros((order + 1, order + 1))
    for i in range(order + 1):
        R[i, :] = r[i:i + order + 1]

    # Get the LPC coefficients using numpy's linear algebra solver
    a = np.linalg.solve(R, r[:order + 1])
    return a

In [47]:
def extract_lpc_plp(y, sr):
    # LPC calculation
    lpc_order = 16  # typically 12-16
    lpc_features = lpc(y, lpc_order)
    
    # PLP calculation
    plp_features = speechpy.feature.plp(y, sr)
    
    return lpc_features, plp_features

In [48]:
def extract_prosody_features(y, sr):
    # Pitch using librosa's pitch estimator
    pitch, _ = librosa.piptrack(y=y, sr=sr)
    pitch_mean = np.mean(pitch[pitch > 0])
    
    # Energy (RMS)
    energy = librosa.feature.rms(y=y)
    energy_mean = np.mean(energy)
    
    return pitch_mean, energy_mean

In [49]:
def extract_praat_features(y, sr):
    sound = parselmouth.Sound(y, sr)
    
    # Initialize default values
    hnr, jitter, shimmer = 0.0, 0.0, 0.0
    
    try:
        # Harmonic-to-Noise Ratio (HNR)
        hnr = sound.to_harmonicity().values.mean()
    except:
        pass
    
    try:
        # Extract the point process from the sound
        point_process = parselmouth.praat.call(sound, "To PointProcess (periodic, cc)", 75, 500)
        
        # Calculate Jitter
        jitter = parselmouth.praat.call(point_process, "Get jitter (local)", 0, 0, 0.0001, 0.02, 1.3)
    except:
        pass
    
    try:
        # Calculate Shimmer
        shimmer = parselmouth.praat.call([sound, point_process], "Get shimmer (local)", 0, 0, 0.0001, 0.02, 1.3)
    except:
        pass
    
    return hnr, jitter, shimmer

In [50]:
def extract_lpc_plp(y, sr):
    # LPC calculation
    lpc_order = 16  # typically 12-16
    lpc_features = lpc(y, sr, lpc_order)
    
    # PLP calculation
    plp_features = speechpy.feature.plp(y, sr)
    
    return lpc_features, plp_features


In [51]:
for file_name in os.listdir(audio_directory):
    if file_name.endswith('.wav'):
        file_path = os.path.join(audio_directory, file_name)
        label = int(file_name[-5])
        y, sr = librosa.load(file_path, sr=None)
        n_fft_value = min(512, max(256, len(y) // 2))

        # Existing feature extraction
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=n_fft_value)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr, n_fft=n_fft_value)
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr, n_fft=n_fft_value)
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=n_fft_value)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr, n_fft=n_fft_value)
        spectral_flatness = librosa.feature.spectral_flatness(y=y, n_fft=n_fft_value)
        spectral_rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, n_fft=n_fft_value)
        mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=n_fft_value, n_mels=n_mels, fmax=fmax)
        tonnetz = librosa.feature.tonnetz(y=librosa.effects.harmonic(y), sr=sr)
        
        # Extract new features
        hnr, jitter, shimmer = extract_praat_features(y, sr)
        lpc_features, plp_features = extract_lpc_plp(y, sr)
        pitch_mean, energy_mean = extract_prosody_features(y, sr)

        # Flatten existing features
        mfccs_mean = np.mean(mfcc, axis=1)
        chroma_mean = np.mean(chroma, axis=1)
        spectral_contrast_mean = np.mean(spectral_contrast, axis=1)
        spectral_centroid_mean = np.mean(spectral_centroid, axis=1)
        spectral_bandwidth_mean = np.mean(spectral_bandwidth, axis=1)
        spectral_flatness_mean = np.mean(spectral_flatness, axis=1)
        spectral_rolloff_mean = np.mean(spectral_rolloff, axis=1)
        mel_spectrogram_mean = np.mean(mel_spectrogram, axis=1)
        tonnetz_mean = np.mean(tonnetz, axis=1)

        # Combine all features into a single feature vector
        features = np.hstack([
            mfccs_mean, chroma_mean, spectral_contrast_mean, spectral_centroid_mean,
            spectral_bandwidth_mean, spectral_flatness_mean, spectral_rolloff_mean,
            mel_spectrogram_mean, tonnetz_mean, hnr, jitter, shimmer,
            lpc_features, plp_features, pitch_mean, energy_mean
        ])

        # Append features and label to the feature list
        feature_list.append(features)
        labels.append(label)
        file_names.append(file_name)

PraatError: Command "Get shimmer (local)" not available for given objects.

In [None]:
num_mel_spectrogram = mel_spectrogram_mean.shape[0]
num_tonnetz = tonnetz_mean.shape[0]
num_lpc_features = len(lpc_features)
num_plp_features = plp_features.shape[1] if plp_features.ndim > 1 else 1  # Check for PLP dimensions

columns = (
    [f'mfcc_{i}' for i in range(1, 14)] +
    [f'chroma_{i}' for i in range(1, 13)] +
    [f'spectral_contrast_{i}' for i in range(1, 8)] +
    ['spectral_centroid', 'spectral_bandwidth', 'spectral_flatness', 'spectral_rolloff'] +
    [f'mel_spectrogram_{i}' for i in range(1, num_mel_spectrogram + 1)] +
    [f'tonnetz_{i}' for i in range(1, num_tonnetz + 1)] +
    ['hnr', 'jitter', 'shimmer'] +
    [f'lpc_{i}' for i in range(1, num_lpc_features + 1)] +
    [f'plp_{i}' for i in range(1, num_plp_features + 1)] +
    ['pitch_mean', 'energy_mean'] +
    ['file_name', 'label'] 
)

# Create DataFrame
df = pd.DataFrame(feature_list, columns=columns)
df['label'] = labels  # Add label to DataFrame
df['file_name'] = file_names

# Display the DataFrame shape and first few rows
print(df.shape)
print(df.head())

In [None]:
df

In [None]:
df.to_csv('task1_updated.csv', index=False,header=True)