In [1]:
import os
import pickle
import numpy as np
import tensorflow as tf
import madmom
import mir_eval

from modules.labels import get_label_vector
from modules.madmom_cnn_prep import cnn_preprocessor
from datasets import Dataset
from modules.analysis_funcs import get_idx_to_fold, get_segmented_data, get_test_peaks
from analyze_detection import evaluate

%load_ext autoreload
%autoreload 2

FPS = 100
CONTEXT = 7

# Load Madmom normalization
def cnn_normalize(frames):
    inv_std = np.load("models/bock2013pret_inv_std.npy")
    mean = np.load("models/bock2013pret_mean.npy")
    frames_normalized = (frames - np.reshape(mean, (1,80,3)))*np.reshape(inv_std, (1,80,3))
    return frames_normalized

In [5]:
frame = madmom.audio.signal.FramedSignalProcessor(frame_size=2048, hop_size=441)
stft = madmom.audio.stft.STFTProcessor()
spect = madmom.audio.spectrogram.SpectrogramProcessor()
proc = madmom.processors.SequentialProcessor([frame, stft, spect])

In [2]:
ds0 = Dataset("initslurtest")
ds1 = Dataset("slurtest_add_1")

audio_fnames = ds0.get_audio_paths() + ds1.get_audio_paths()
label_fnames = ds0.get_annotation_paths() + ds1.get_annotation_paths()

audios = [madmom.audio.signal.load_wave_file(filename)[0] for filename in audio_fnames]
sample_rates = [madmom.audio.signal.load_wave_file(filename)[1] for filename in audio_fnames]
onset_schedules = [np.loadtxt(label_fname, usecols=0) for label_fname in label_fnames]

  file_sample_rate, signal = wavfile.read(filename, mmap=True)


In [17]:
base_path = "results/cnn-training-220409/"
folds_path = base_path + "folds.pkl"

model_name = "added-sample-gen-nostandard"

with open(folds_path, "rb") as f:
    folds = pickle.load(f)

itf = get_idx_to_fold(folds)

neural = False
TOL = 0.025


In [23]:
CD_list = []
FN_list = []
FP_list = []
for r in range(len(itf.keys())):
    fold = itf[r]
    rec_name = os.path.basename(audio_fnames[r])
    x = get_segmented_data(audio_fnames[r])
    if neural:
        model = tf.keras.models.load_model(base_path + "fold_" + str(fold) + "_" + model_name + "_model")
        out = model.predict(x)
        peaks = get_test_peaks(out, 1./FPS)
    else:
        spectrogram = proc(audios[r])
        peaks = madmom.features.onsets.peak_picking(
                                        activations=out, 
                                        threshold=0.01
    )
    
    
    [CD,FN,FP,doubles,merged] = evaluate(onset_schedules[r], peaks, tol_sec=TOL)
    CD_list.append(CD)
    FN_list.append(FN)
    FP_list.append(FP)

    scores = mir_eval.onset.evaluate(onset_schedules[r], peaks, window=TOL)
    print(rec_name + "\t" + "F-score: {:.2f}".format(100*scores["F-measure"]))



slurtest01.wav	F-score: 0.00
slurtest02.wav	F-score: 0.55
slurtest03.wav	F-score: 0.00
slurtest04.wav	F-score: 0.56
slurtest05.wav	F-score: 0.53
slurtest06.wav	F-score: 0.63


KeyboardInterrupt: 

In [40]:
np.sum(CD_list)/(np.sum(CD_list)+.5*(np.sum(FP_list) + np.sum(FN_list)))

0.8505654281098546