In [6]:
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt

from fastdtw import fastdtw
from scipy.spatial.distance import euclidean

In [7]:
def get_file_paths_by_subfolder(root):
    file_dict = {}
    
    # Walk through the root directory
    for subdir, _, files in os.walk(root):
        # Get the subfolder name (last part of the path)
        subfolder_name = os.path.basename(subdir)
        file_pairs = [(os.path.join(subdir, file), os.path.splitext(file)[0]) for file in files]
        
        # Only add the subfolder if it has files
        if file_pairs:
            file_dict[subfolder_name] = file_pairs
    
    return file_dict

In [12]:
def calculate_avg_dtw_distance(audio_files):
    # Load the audio files and compute their MFCCs
    mfccs = []
    for file, _ in audio_files:
        y, sr = librosa.load(file, sr=None)  # Load audio file at its original sample rate
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, n_fft=1024)  # Extract MFCCs
        mfccs.append(mfcc.T)  # Transpose to make time steps as rows

    # Calculate DTW distance between each pair of audio files
    distances = []
    n = len(mfccs)
    for i in range(n):
        for j in range(i+1, n):
            # Compute DTW between mfccs[i] and mfccs[j]
            distance, _ = fastdtw(mfccs[i], mfccs[j], dist=euclidean)
            distances.append(distance)
    
    # Average the distances
    avg_distance = np.mean(distances)
    return avg_distance

In [9]:
file_dictionary = get_file_paths_by_subfolder(r"C:\Computer Science Programs\Fall_2024\EE502_BioMed\project\data\extracted_words\that_before_after")

print(file_dictionary.keys())

print(len(file_dictionary['201']))
print(len(file_dictionary['311']))
print(len(file_dictionary['87']))

dict_keys(['201', '311', '87'])
61
79
72


In [None]:
print(calculate_avg_dtw_distance(file_dictionary['201']))
print(calculate_avg_dtw_distance(file_dictionary['311']))
print(calculate_avg_dtw_distance(file_dictionary['87']))

1133.0583398227845
1246.7554320268562
966.3404960651809


In [19]:
# Load audio files and extract MFCC features
y1, sr1 = librosa.load(r'C:\Computer Science Programs\Fall_2024\EE502_BioMed\project\data\extracted_words\that_before_after\87\87-121553-0001_1.wav', sr=None)
y2, sr2 = librosa.load(r'C:\Computer Science Programs\Fall_2024\EE502_BioMed\project\data\extracted_words\that_before_after\201\201-122255-0006_1.wav',sr=None)

print(sr1)
print(sr2)

# Extract MFCCs (you can also use other features like chroma or spectrogram)
mfcc1 = librosa.feature.mfcc(y=y1, sr=sr1, n_mfcc=13, n_fft=1024)
mfcc2 = librosa.feature.mfcc(y=y2, sr=sr2, n_mfcc=13, n_fft=1024)

# Transpose MFCCs to get a (time, features) shape, which is necessary for DTW
mfcc1 = mfcc1.T
mfcc2 = mfcc2.T

distance, path = fastdtw(mfcc1, mfcc2, dist=euclidean)

print(f"DTW distance: {distance}")

16000
16000
DTW distance: 1001.8369781314818
