# Evaluate audio base metrics

In [1]:
import glob
import numpy as np
from tqdm import tqdm
import scipy.stats as stats

from audio_tools.scapeplot import (
  compute_fitness_scape_plot,
  normalization_properties_SSM
)
from audio_tools.ssm_features import (
  compute_SM_from_filename,
  compute_tempo_rel_set
)

In [2]:
def preprocess_audio(audio_file_path):  
    """
    Preprocess audio file to compute features and SM

    Params
    ------
    audio_file_path : str #Path to audio file

    Returns
    -------
    feature : np.array #Feature matrix
    SM : np.array #Self-similarity matrix

    """
    tempo_rel_set = compute_tempo_rel_set(0.5, 2, 7) # for tempo invariance
    shift_set = np.array([x for x in range(12)])     # for tranposition invariance
    rel_threshold = 0.25                             # the proportion of (highest) values to retain
    penalty = -2                                     # all values below ``rel_threshold`` are set to this
    _, _, feature, _, SM, _ = compute_SM_from_filename(
        audio_file_path,
        tempo_rel_set=tempo_rel_set, 
        shift_set=shift_set, 
        thresh=rel_threshold,
        penalty=penalty
    )
    feature *= 10
    return feature, SM

def calculate_fitness(SM):
    """
    Calculate fitness of a similarity matrix (SM) using the fitness scape plot.

    Params
    ------
    SM : np.ndarray #A similarity matrix.

    returns
    -------
    fitness : float #The fitness of the similarity matrix.
    """
    SSM = normalization_properties_SSM(SM)
    fitness = compute_fitness_scape_plot(SSM)[0]
    return fitness

Preprocess

In [3]:
#-----preprocess audio ground truth data------
# get list of audio files
audio_dir = "data/audio/classical_jazz_1024/train"
audio_files = glob.glob(audio_dir + "/*.wav")
#preprocess
SMs_ground_truth = []
features_ground_truth = [] #feature of the audio
for audio_file in tqdm(audio_files, desc="Preprocessing audio data"):
    feature, SM = preprocess_audio(audio_file)
    SMs_ground_truth.append(SM)
    features_ground_truth.append(feature)

  x, Fs = librosa.load(fn_wav, Fs)
  x, Fs = librosa.load(fn_wav, Fs)
Preprocessing audio data: 100%|██████████| 2022/2022 [12:59<00:00,  2.59it/s]


In [8]:
#-----preprocess audio prediction data------
# get list of audio files
audio_dir = "/raid/koki-sakurai/model/dataset/data/audio/samples/finetuning-1600-100/sample_10_B"
audio_files = glob.glob(audio_dir + "/*.wav")
#preprocess
SMs_presiction = []
features_presiction = [] #feature of the audio
for audio_file in tqdm(audio_files, desc="Preprocessing audio data"):
    feature, SM = preprocess_audio(audio_file)
    SMs_presiction.append(SM)
    features_presiction.append(feature)

Preprocessing audio data: 100%|██████████| 100/100 [00:31<00:00,  3.16it/s]


Evaluate diversity

In [10]:
def cal_diversity(melody_A, melody_B):
    """
    Calculate the diversity between two melodies

    Params
    ------
    melody_A, melody_B: np.array
    """
    #adjust the length of two melodies
    if not melody_A.shape == melody_B.shape:
        for dim in range(len(melody_A.shape)):
            min_length = min(melody_A.shape[dim], melody_B.shape[dim])
            if melody_A.shape[dim] > min_length:
                for _ in range(melody_A.shape[dim] - min_length):
                    melody_A = np.delete(melody_A, -1, axis=dim)
            else:
                for _ in range(melody_B.shape[dim] - min_length):
                    melody_B = np.delete(melody_B, -1, axis=dim)

    
    return np.mean(np.abs(melody_A - melody_B)**2)

#ground truth
ground_truths = features_ground_truth
ground_truths_A = ground_truths[:len(ground_truths)//2]
ground_truths_B = ground_truths[len(ground_truths)//2:]
#prediction
predictions = features_presiction
predictions_A = predictions[:len(predictions)//2]
predictions_B = predictions[len(predictions)//2:]

#------calculate diversity------
diversities_ground_truth = []
diversities_prediction = []
#ground truth
for idx in tqdm(range(len(ground_truths_A)), desc="calculating diversity_ground_truth"):
    diversity_ground_truth = cal_diversity(ground_truths_A[idx], ground_truths_B[idx])
    diversities_ground_truth.append(diversity_ground_truth)
#prediction
for idx in tqdm(range(len(predictions_A)), desc="calculating diversity_prediction"):
    diversity_prediction = cal_diversity(predictions_A[idx], predictions_B[idx])
    diversities_prediction.append(diversity_prediction)

#------print result------
print("diversity_ground_truth: ", np.mean(diversities_ground_truth))
print("diversity_prediction: ", np.mean(diversities_prediction))

#------t-test------
t, p = stats.ttest_ind(diversities_ground_truth, diversities_prediction)
print("t: ", t)
print("p: ", p)

calculating diversity_ground_truth: 100%|██████████| 1011/1011 [00:00<00:00, 75010.90it/s]
calculating diversity_prediction: 100%|██████████| 50/50 [00:00<00:00, 65027.97it/s]

diversity_ground_truth:  5.586691833606032
diversity_prediction:  4.983223407306591
t:  3.029841582718565
p:  0.002506190028094198





Calculate fitness

In [13]:
#-----calculate ground truth fitness------
fitnesses_ground_truth = []
for SM in tqdm(SMs_ground_truth, desc="Calculating fitness of ground truth data"):
    fitness = calculate_fitness(SM)
    fitnesses_ground_truth.append(fitness)


Calculating fitness of ground truth data: 100%|██████████| 2022/2022 [07:54<00:00,  4.26it/s]


In [24]:
#-----calculate prediction fitness------
fitnesses_prediction = []
for SM in tqdm(SMs_presiction, desc="Calculating fitness of prediction"):
    fitness = calculate_fitness(SM)
    fitnesses_prediction.append(fitness)

Calculating fitness of prediction: 100%|██████████| 100/100 [00:23<00:00,  4.26it/s]


Evaluate SI

In [15]:
def calculate_SI(fitness):
    """
    Calculate the SI of a fitness value.

    params
    ------
    fitness : float #The fitness value.

    return
    ------
    SI : float #The SI of the fitness value.
    """
    return np.max(fitness)

#-----calculate SI------
SIs_ground_truth = []
for fitness in tqdm(fitnesses_ground_truth, desc="Calculating SI of ground truth"):
    SI = calculate_SI(fitness)
    SIs_ground_truth.append(SI)
SIs_prediction = []
for fitness in tqdm(fitnesses_prediction, desc="Calculating SI of prediction"):
    SI = calculate_SI(fitness)
    SIs_prediction.append(SI)

#-----print results------
print("SI_ground_truth", np.mean(SIs_ground_truth))
print("SI_prediction", np.mean(SIs_prediction))

Calculating SI of ground truth: 100%|██████████| 2022/2022 [00:00<00:00, 123406.76it/s]
Calculating SI of prediction: 100%|██████████| 100/100 [00:00<00:00, 169947.49it/s]

SI_ground_truth 0.36214281988231506
SI_prediction 0.34857934528497203



