In [15]:
import numpy as np
import librosa
import os

In [16]:
def compute_mcd(reference_file, synthesized_file, target_sr=22050):
    """
    Computes the Mel-Cepstral Distortion (MCD) in dB between reference and synthesized audio files.
    Resamples both audio files to the target sample rate if necessary.

    Args:
        reference_file (str): Path to the reference audio file.
        synthesized_file (str): Path to the synthesized audio file.
        target_sr (int): Target sample rate for resampling.

    Returns:
        float: The Mel-Cepstral Distortion in dB between the two audio files.
    """
    # Load audio files
    reference, sr_ref = librosa.load(reference_file, sr=None)
    synthesized, sr_syn = librosa.load(synthesized_file, sr=None)

    # Resample if the sample rates are different
    if sr_ref != target_sr:
        reference = librosa.resample(reference, orig_sr=sr_ref, target_sr=target_sr)
    if sr_syn != target_sr:
        synthesized = librosa.resample(synthesized, orig_sr=sr_syn, target_sr=target_sr)

    # Compute Mel-frequency cepstral coefficients (MFCCs)
    mfcc_ref = librosa.feature.mfcc(y=reference, sr=target_sr, n_mfcc=13)
    mfcc_syn = librosa.feature.mfcc(y=synthesized, sr=target_sr, n_mfcc=13)

    # Compute MCD for each frame
    mcd_values = []
    for i in range(min(mfcc_ref.shape[1], mfcc_syn.shape[1])):
        mcd_frame = np.sqrt(np.sum((mfcc_ref[:, i] - mfcc_syn[:, i])**2))
        mcd_values.append(mcd_frame)

    # Average MCD over all frames
    avg_mcd = np.mean(mcd_values)

    # Convert MCD to dB
    if avg_mcd > 0:
        avg_mcd_db = 10 * np.log10(avg_mcd)
    else:
        avg_mcd_db = float('inf')  # Handle log(0) case

    return avg_mcd_db

In [17]:
# Directory where your audio files are stored
generated_audio_dir = 'generated_audio/'  # Directory for generated audio files
reference_audio_dir = 'reference_audio/'    # Directory for reference audio files

# List of generated audio files (sample_1.wav to sample_19.wav)
generated_audios = [os.path.join(generated_audio_dir, f'sample_{i}.wav') for i in range(1, 20)]

# List of reference audio files (20.wav to 25.wav)
reference_audios = [os.path.join(reference_audio_dir, f'{i}.wav') for i in range(20, 26)]

# Store MCD scores for each generated audio
mcd_scores = {audio: [] for audio in generated_audios}

# Calculate MCD for each generated audio against all reference audios
for generated_audio in generated_audios:
    for reference_audio in reference_audios:
        try:
            mcd_score = compute_mcd(reference_audio, generated_audio)
            mcd_scores[generated_audio].append(mcd_score)
        except Exception as e:
            print(f"Error processing {generated_audio} with {reference_audio}: {e}")

# Calculate average MCD for each generated audio across all reference audios
average_mcd_scores = {os.path.basename(audio): np.mean(scores) if scores else None for audio, scores in mcd_scores.items()}

# Print average MCD for each generated audio
for audio, avg_mcd in average_mcd_scores.items():
    if avg_mcd is not None:
        print(f"Average MCD for {audio}: {avg_mcd:.2f}")
    else:
        print(f"Could not calculate MCD for {audio}.")

# Calculate the overall average MCD across all generated audios
overall_average_mcd = np.mean([score for scores in mcd_scores.values() for score in scores if scores])
print(f"\nOverall Average MCD for all generated audios: {overall_average_mcd:.2f}")

Average MCD for sample_1.wav: 23.71
Average MCD for sample_2.wav: 23.48
Average MCD for sample_3.wav: 23.17
Average MCD for sample_4.wav: 23.42
Average MCD for sample_5.wav: 23.03
Average MCD for sample_6.wav: 23.55
Average MCD for sample_7.wav: 23.25
Average MCD for sample_8.wav: 23.85
Average MCD for sample_9.wav: 23.66
Average MCD for sample_10.wav: 23.62
Average MCD for sample_11.wav: 23.58
Average MCD for sample_12.wav: 23.79
Average MCD for sample_13.wav: 23.73
Average MCD for sample_14.wav: 22.58
Average MCD for sample_15.wav: 23.77
Average MCD for sample_16.wav: 23.31
Average MCD for sample_17.wav: 23.04
Average MCD for sample_18.wav: 23.75
Average MCD for sample_19.wav: 22.10

Overall Average MCD for all generated audios: 23.39
