In [1]:
import librosa
import os
import pickle
import numpy as np
from scipy.stats import kurtosis,skew,mode

In [2]:
def get_file_paths_by_subfolder(root):
    file_dict = {}
    
    # Walk through the root directory
    for subdir, _, files in os.walk(root):
        # Get the subfolder name (last part of the path)
        subfolder_name = os.path.basename(subdir)
        file_pairs = [(os.path.join(subdir, file), os.path.splitext(file)[0]) for file in files]
        
        # Only add the subfolder if it has files
        if file_pairs:
            file_dict[subfolder_name] = file_pairs
    
    return file_dict

In [9]:
# def extract_mfcc(file_dictionary):
#     mfcc_dict = {}
    
#     for reader in file_dictionary.keys():
        
#         if reader not in mfcc_dict:
#             mfcc_dict[reader] = []

#         for audio_path, filename_no_extension in file_dictionary[reader]:
#             signal, sample_rate = librosa.load(audio_path, sr=None)
#             mfcc_data = librosa.feature.mfcc(y=signal,sr=sample_rate)

#             # Calculating various statistic measures on the coefficients.
#             mean_mfcc = np.mean(mfcc_data, axis=1)
#             median_mfcc= np.median(mfcc_data,axis=1)
#             std_mfcc = np.std(mfcc_data, axis=1)
#             skew_mfcc = skew(mfcc_data, axis=1)
#             kurt_mfcc = kurtosis(mfcc_data, axis=1)
#             maximum_mfcc = np.amax(mfcc_data, axis=1)
#             minimum_mfcc = np.amin(mfcc_data, axis=1)
            
#             feature_list = np.concatenate((mean_mfcc,median_mfcc,std_mfcc,skew_mfcc,kurt_mfcc,maximum_mfcc,minimum_mfcc))
#             mfcc_dict[reader].append((feature_list, filename_no_extension))

#     with open("../../../data/extracted_features/mfcc_stats_that/mfcc_stats_that_v2.pickle", "wb") as file:
#         pickle.dump(mfcc_dict, file)

def extract_mfcc(file_dictionary):
    mfcc_dict = {}
    
    for reader in file_dictionary.keys():
        
        if reader not in mfcc_dict:
            mfcc_dict[reader] = []

        for audio_path, filename_no_extension in file_dictionary[reader]:
            signal, sample_rate = librosa.load(audio_path, sr=None)
            mfcc_data = librosa.feature.mfcc(y=signal, sr=sample_rate)

            # Calculating various statistic measures on the MFCC coefficients
            mean_mfcc = np.mean(mfcc_data, axis=1)
            median_mfcc = np.median(mfcc_data, axis=1)
            std_mfcc = np.std(mfcc_data, axis=1)
            skew_mfcc = skew(mfcc_data, axis=1)
            kurt_mfcc = kurtosis(mfcc_data, axis=1)
            maximum_mfcc = np.amax(mfcc_data, axis=1)
            minimum_mfcc = np.amin(mfcc_data, axis=1)

            # Pitch extraction
            f0, voiced_flag, voiced_probs = librosa.pyin(
                signal, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7')
            )
            
            # Handle NaNs in pitch
            if np.all(np.isnan(f0)):
                mean_pitch, median_pitch = 0, 0  # Set default if no pitch is detected
            else:
                mean_pitch = np.nanmean(f0)  # Mean pitch, ignoring NaNs
                median_pitch = np.nanmedian(f0)  # Median pitch, ignoring NaNs

            # Concatenate all features
            feature_list = np.concatenate((
                mean_mfcc, median_mfcc, std_mfcc, skew_mfcc, kurt_mfcc, maximum_mfcc, minimum_mfcc,
                [mean_pitch, median_pitch]  # Add pitch statistics to the feature list
            ))
            
            mfcc_dict[reader].append((feature_list, filename_no_extension))

    with open("../../../data/extracted_features/mfcc_stats_that/mfcc_stats_that_v2.pickle", "wb") as file:
        pickle.dump(mfcc_dict, file)

In [10]:
file_dictionary = get_file_paths_by_subfolder(r"C:\Computer Science Programs\Fall_2024\EE502_BioMed\project\data\extracted_words\that")
print(file_dictionary.keys())
print(type(file_dictionary["19"]))
print(file_dictionary["19"])

dict_keys(['19', '201', '26', '27', '40', '87'])
<class 'list'>
[('C:\\Computer Science Programs\\Fall_2024\\EE502_BioMed\\project\\data\\extracted_words\\that\\19\\19-198-0003.wav', '19-198-0003'), ('C:\\Computer Science Programs\\Fall_2024\\EE502_BioMed\\project\\data\\extracted_words\\that\\19\\19-198-0022.wav', '19-198-0022'), ('C:\\Computer Science Programs\\Fall_2024\\EE502_BioMed\\project\\data\\extracted_words\\that\\19\\19-198-0023.wav', '19-198-0023'), ('C:\\Computer Science Programs\\Fall_2024\\EE502_BioMed\\project\\data\\extracted_words\\that\\19\\19-198-0025.wav', '19-198-0025'), ('C:\\Computer Science Programs\\Fall_2024\\EE502_BioMed\\project\\data\\extracted_words\\that\\19\\19-198-0026.wav', '19-198-0026'), ('C:\\Computer Science Programs\\Fall_2024\\EE502_BioMed\\project\\data\\extracted_words\\that\\19\\19-198-0027.wav', '19-198-0027'), ('C:\\Computer Science Programs\\Fall_2024\\EE502_BioMed\\project\\data\\extracted_words\\that\\19\\19-198-0030.wav', '19-198-0030'

In [11]:
extract_mfcc(file_dictionary)