In [1]:
%matplotlib inline
%load_ext Cython

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import pickle
from numba import njit
import librosa as lb
import librosa.display
from skimage.filters import threshold_li, threshold_niblack, threshold_triangle, threshold_isodata, threshold_mean
from skimage.filters import threshold_otsu
import time
import scipy
import seaborn as sns
import os
import multiprocessing
from scipy.spatial.distance import cdist
import pandas as pd
from numba import prange, njit

In [36]:
def get_chroma(audio,sr=22050,H=1024,N=2048):
    chroma = lb.feature.chroma_stft(y=audio, sr=sr, norm=2, hop_length=H, n_fft=N)
    return chroma

In [37]:
def calculate_cqt(audio, sr = 22050, hop_length = 1024, bins = 12):
    return np.abs(lb.core.cqt(audio, n_bins = 8 * bins, bins_per_octave = bins, norm = 2))

In [5]:
def get_audio(path):
    audio, sr = lb.core.load(path)
    return audio

In [6]:
# piano_audio = get_audio("data/Audio/piano/piano1.m4a")
# violin_audio = get_audio('data/Audio/violin/violin1.mp3')
# cello_audio = get_audio('data/Audio/cello/cello1.mp3')
# fullmix_audio_1 = get_audio('data/Audio/fullmix/fullmix1.mp3')
# fullmix_audio_2 = get_audio('data/Audio/fullmix/fullmix2.mp3')

In [7]:
%%cython
import numpy as np
cimport numpy as np
cimport cython

import sys
import time


DTYPE_INT32 = np.int32
ctypedef np.int32_t DTYPE_INT32_t

DTYPE_FLOAT = np.float64
ctypedef np.float64_t DTYPE_FLOAT_t

cdef DTYPE_FLOAT_t MAX_FLOAT = float('inf')

# careful, without bounds checking can mess up memory - also can't use negative indices I think (like x[-1])
@cython.boundscheck(False) # turn off bounds-checking for entire function
def DTW_Cost_To_AccumCostAndSteps(Cin, parameter):
    '''
    Inputs
        C: The cost Matrix
    '''


    '''
    Section for checking and catching errors in the inputs
    '''

    cdef np.ndarray[DTYPE_FLOAT_t, ndim=2] C
    try:
        C = np.array(Cin, dtype=DTYPE_FLOAT)
    except TypeError:
        print(bcolors.FAIL + "FAILURE: The type of the cost matrix is wrong - please pass in a 2-d numpy array" + bcolors.ENDC)
        return [-1, -1, -1]
    except ValueError:
        print(bcolors.FAIL + "FAILURE: The type of the elements in the cost matrix is wrong - please have each element be a float (perhaps you passed in a matrix of ints?)" + bcolors.ENDC)
        return [-1, -1, -1]

    cdef np.ndarray[np.uint32_t, ndim=1] dn
    cdef np.ndarray[np.uint32_t, ndim=1] dm
    cdef np.ndarray[DTYPE_FLOAT_t, ndim=1] dw
    # make sure dn, dm, and dw are setup
    # dn loading and exception handling
    if ('dn'  in parameter.keys()):
        try:

            dn = np.array(parameter['dn'], dtype=np.uint32)
        except TypeError:
            print(bcolors.FAIL + "FAILURE: The type of dn (row steps) is wrong - please pass in a 1-d numpy array that holds uint32s" + bcolors.ENDC)
            return [-1, -1, -1]
        except ValueError:
            print(bcolors.FAIL + "The type of the elements in dn (row steps) is wrong - please have each element be a uint32 (perhaps you passed a long?). You can specify this when making a numpy array like: np.array([1,2,3],dtype=np.uint32)" + bcolors.ENDC)
            return [-1, -1, -1]
    else:
        dn = np.array([1, 1, 0], dtype=np.uint32)
    # dm loading and exception handling
    if 'dm'  in parameter.keys():
        try:
            dm = np.array(parameter['dm'], dtype=np.uint32)
        except TypeError:
            print(bcolors.FAIL + "FAILURE: The type of dm (col steps) is wrong - please pass in a 1-d numpy array that holds uint32s" + bcolors.ENDC)
            return [-1, -1, -1]
        except ValueError:
            print(bcolors.FAIL + "FAILURE: The type of the elements in dm (col steps) is wrong - please have each element be a uint32 (perhaps you passed a long?). You can specify this when making a numpy array like: np.array([1,2,3],dtype=np.uint32)" + bcolors.ENDC)
            return [-1, -1, -1]
    else:
        print(bcolors.FAIL + "dm (col steps) was not passed in (gave default value [1,0,1]) " + bcolors.ENDC)
        dm = np.array([1, 0, 1], dtype=np.uint32)
    # dw loading and exception handling
    if 'dw'  in parameter.keys():
        try:
            dw = np.array(parameter['dw'], dtype=DTYPE_FLOAT)
        except TypeError:
            print(bcolors.FAIL + "FAILURE: The type of dw (step weights) is wrong - please pass in a 1-d numpy array that holds floats" + bcolors.ENDC)
            return [-1, -1, -1]
        except ValueError:
            print(bcolors.FAIL + "FAILURE:The type of the elements in dw (step weights) is wrong - please have each element be a float (perhaps you passed ints or a long?). You can specify this when making a numpy array like: np.array([1,2,3],dtype=np.float64)" + bcolors.ENDC)
            return [-1, -1, -1]
    else:
        dw = np.array([1, 1, 1], dtype=DTYPE_FLOAT)
        print(bcolors.FAIL + "dw (step weights) was not passed in (gave default value [1,1,1]) " + bcolors.ENDC)

    
    '''
    Section where types are given to the variables we're going to use 
    '''
    # create matrices to store our results (D and E)
    cdef DTYPE_INT32_t numRows = C.shape[0] # only works with np arrays, use np.shape(x) will work on lists? want to force to use np though?
    cdef DTYPE_INT32_t numCols = C.shape[1]
    cdef DTYPE_INT32_t numDifSteps = np.size(dw)

    cdef unsigned int maxRowStep = max(dn)
    cdef unsigned int maxColStep = max(dm)

    cdef np.ndarray[np.uint32_t, ndim=2] steps = np.zeros((numRows,numCols), dtype=np.uint32)
    cdef np.ndarray[DTYPE_FLOAT_t, ndim=2] accumCost = np.ones((maxRowStep + numRows, maxColStep + numCols), dtype=DTYPE_FLOAT) * MAX_FLOAT

    cdef DTYPE_FLOAT_t bestCost
    cdef DTYPE_INT32_t bestCostIndex
    cdef DTYPE_FLOAT_t costForStep
    cdef unsigned int row, col
    cdef unsigned int stepIndex

    '''
    The start of the actual algorithm, now that all our variables are set up
    '''
    # initializing the cost matrix - depends on whether its subsequence DTW
    # essentially allow us to hop on the bottom anywhere (so could start partway through one of the signals)
    if parameter['SubSequence']:
        for col in range(numCols):
            accumCost[maxRowStep, col + maxColStep] = C[0, col]
    else:
        accumCost[maxRowStep, maxColStep] = C[0,0]

    # filling the accumulated cost matrix
    for row in range(maxRowStep, numRows + maxRowStep, 1):
        for col in range(maxColStep, numCols + maxColStep, 1):
            bestCost = accumCost[<unsigned int>row, <unsigned int>col] # initialize with what's there - so if is an entry point, then can start low
            bestCostIndex = 0
            # go through each step, find the best one
            for stepIndex in range(numDifSteps):
                #costForStep = accumCost[<unsigned int>(row - dn[<unsigned int>(stepIndex)]), <unsigned int>(col - dm[<unsigned int>(stepIndex)])] + dw[<unsigned int>(stepIndex)] * C[<unsigned int>(row - maxRowStep), <unsigned int>(col - maxColStep)]
                costForStep = accumCost[<unsigned int>((row - dn[(stepIndex)])), <unsigned int>((col - dm[(stepIndex)]))] + dw[stepIndex] * C[<unsigned int>(row - maxRowStep), <unsigned int>(col - maxColStep)]
                if costForStep < bestCost:
                    bestCost = costForStep
                    bestCostIndex = stepIndex
            # save the best cost and best cost index
            accumCost[row, col] = bestCost
            steps[<unsigned int>(row - maxRowStep), <unsigned int>(col - maxColStep)] = bestCostIndex

    # return the accumulated cost along with the matrix of steps taken to achieve that cost
    return [accumCost[maxRowStep:, maxColStep:], steps]

@cython.boundscheck(False) # turn off bounds-checking for entire function
def DTW_GetPath(np.ndarray[DTYPE_FLOAT_t, ndim=2] accumCost, np.ndarray[np.uint32_t, ndim=2] stepsForCost, parameter):
    '''

    Parameter should have: 'dn', 'dm', 'dw', 'SubSequence'
    '''

    cdef np.ndarray[unsigned int, ndim=1] dn
    cdef np.ndarray[unsigned int, ndim=1] dm
    cdef np.uint8_t subseq
    # make sure dn, dm, and dw are setup
    if ('dn'  in parameter.keys()):
        dn = parameter['dn']
    else:
        dn = np.array([1, 1, 0], dtype=DTYPE_INT32)
    if 'dm'  in parameter.keys():
        dm = parameter['dm']
    else:
        dm = np.array([1, 0, 1], dtype=DTYPE_INT32)
    if 'SubSequence' in parameter.keys():
        subseq = parameter['SubSequence']
    else:
        subseq = 0

    cdef np.uint32_t numRows
    cdef np.uint32_t numCols
    cdef np.uint32_t curRow
    cdef np.uint32_t curCol
    cdef np.uint32_t endCol
    cdef DTYPE_FLOAT_t endCost

    numRows = accumCost.shape[0]
    numCols = accumCost.shape[1]

    # either start at the far corner (non sub-sequence)
    # or start at the lowest cost entry in the last row (sub-sequence)
    # where all of the signal along the row has been used, but only a 
    # sub-sequence of the signal along the columns has to be used
    curRow = numRows - 1
    if subseq:
        curCol = np.argmin(accumCost[numRows - 1, :])
    else:
        curCol = numCols - 1

    endCol = curCol
    endCost = accumCost[curRow, curCol]

    cdef np.uint32_t curRowStep
    cdef np.uint32_t curColStep
    cdef np.uint32_t curStepIndex


    cdef np.ndarray[np.uint32_t, ndim=2] path = np.zeros((2, numRows + numCols), dtype=np.uint32) # make as large as could need, then chop at the end
    path[0, 0] = curRow
    path[1, 0] = curCol

    cdef np.uint32_t stepsInPath = 1 # starts at one, we add in one before looping
    cdef np.uint32_t stepIndex = 0
    cdef np.int8_t done = (subseq and curRow == 0) or (curRow == 0 and curCol == 0)
    while not done:
        if accumCost[curRow, curCol] == MAX_FLOAT:
            print('A path is not possible')
            break

        # you're done if you've made it to the bottom left (non sub-sequence)
        # or just the bottom (sub-sequence)
        # find the step size
        curStepIndex = stepsForCost[curRow, curCol]
        curRowStep = dn[curStepIndex]
        curColStep = dm[curStepIndex]
        # backtrack by 1 step
        curRow = curRow - curRowStep
        curCol = curCol - curColStep
        # add your new location onto the path
        path[0, stepsInPath] = curRow
        path[1, stepsInPath] = curCol
        stepsInPath = stepsInPath + 1
        # check to see if you're done
        done = (subseq and curRow == 0) or (curRow == 0 and curCol == 0)

    # reverse the path (a matrix with two rows) and return it
    return [np.fliplr(path[:, 0:stepsInPath]), endCol, endCost]

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

In [8]:
def cosine_distance(X,Y):
    cost = cdist(X,Y,'cosine')
    return cost

In [79]:
def align_part_to_fullmix(query, ref, steps = [1, 1, 1, 2, 2, 1], weights = [1, 1, 2]):
    assert len(steps) % 2 == 0, "The length of steps must be even."
    dn = np.array(steps[::2], dtype=np.uint32)
    dm = np.array(steps[1::2], dtype=np.uint32)
    dw = weights
    subsequence = True
    parameter = {'dn': dn, 'dm': dm, 'dw': dw, 'SubSequence': subsequence}

    # Compute cost matrix
    cost = calculate_cost_fast(query, ref)
    
    # DTW
    [D, s] = DTW_Cost_To_AccumCostAndSteps(cost, parameter)
    [wp, endCol, endCost] = DTW_GetPath(D, s, parameter)

    # Reformat the output
    wp = wp.T[::-1]
    return wp

In [9]:
def align_audio(query, ref, steps = [1,1,1,2,2,1], weights = [2, 3, 3]):
    # set params
    assert len(steps) % 2 == 0, "The length of steps must be even."
    dn = np.array(steps[::2], dtype=np.uint32)
    dm = np.array(steps[1::2], dtype=np.uint32)
    dw = weights
    subsequence = True
    parameter = {'dn': dn, 'dm': dm, 'dw': dw, 'SubSequence': subsequence}

    # Compute cost matrix
    cost = cosine_distance(query.T, ref.T)

    # DTW
    [D, s] = DTW_Cost_To_AccumCostAndSteps(cost, parameter)
    [wp, endCol, endCost] = DTW_GetPath(D, s, parameter)

    # Reformat the output
    wp = wp.T[::-1]
    return wp

In [10]:
def frame_to_time(frame, hop_length = 1024, sr = 22050):
    return frame * hop_length / sr

In [11]:
def time_to_frame(time, hop_length = 1024, sr = 22050):
    return time * sr / hop_length

In [12]:
def calculateErrors(data, annotfile, gtfile, hop_length = 1024, sr = 22050, debug = False):
    if not os.path.exists(annotfile):
        return []
    wp = np.array(sorted(data, key = lambda x: x[0]))
    query_preds = wp[:, 0]
    ref_preds = wp[:, 1]
    query_to_ref = np.interp(list(range(max(query_preds[-1], ref_preds[-1]) + 1)), query_preds, ref_preds)

    data_gt = np.genfromtxt(gtfile, delimiter=',')
    gt_mapping = {}
    with open(gtfile,'r') as data_gt:
        for line in data_gt:
            time = float(line.strip().split()[0])
            idx = int(line.strip().split()[1])
            gt_mapping[idx] = time
        
    data_annot = np.genfromtxt(annotfile, delimiter=',')
    errors = []
    clicks = []
    idxs = []
    with open(annotfile,'r') as data_annot:
        for line in data_annot:
            time = float(line.strip().split()[0])
            idx = int(line.strip().split()[1])
            
            frame = int(np.round(time_to_frame(time)))
            ref_frame = query_to_ref[frame]
            pred_ref_time = frame_to_time(ref_frame)
            if debug:
                clicks.append(pred_ref_time)
            if idx in gt_mapping:
                error = np.abs(pred_ref_time - gt_mapping[idx])
                idxs.append(idx)
                errors.append(error)
    if debug:
        return errors, clicks
    return errors

In [19]:
def get_tolerances(errors, tols = np.arange(0,1,1/1000)):
    errors = np.array(errors)
    errorRates = []
    for tol in tols:
        toAdd = np.sum(errors > tol) * 1.0 / len(errors)
        errorRates.append(toAdd)
    return errorRates

In [20]:
@njit(parallel = True)
def calculate_cost_fast(query, ref):
    m, n1 = query.shape
    m, n2 = ref.shape
    result = np.zeros((n1, n2))
    for j1 in prange(n1):
        for j2 in prange(n2):
            for i in prange(m):
                result[j1, j2] += query[i, j1] * ref[i, j2]
    return result

In [21]:
def calculate_cost(query, ref):
    cost = calculate_cost_fast(query, ref)
    row_sums = query.sum(axis = 0) * -1
    result = cost / row_sums[:, None]
    result[result == np.inf] = 0
    result = np.nan_to_num(result)
    return result

In [22]:
def align_binarized_cqts(query, ref, steps = [1,1,1,2,2,1], weights = [1,1,2]):
    # set params
    assert len(steps) % 2 == 0, "The length of steps must be even."
    dn = np.array(steps[::2], dtype=np.uint32)
    dm = np.array(steps[1::2], dtype=np.uint32)
    dw = weights
    subsequence = True
    parameter = {'dn': dn, 'dm': dm, 'dw': dw, 'SubSequence': subsequence}

    # Compute cost matrix
    cost = calculate_cost(query, ref)

    # DTW
    [D, s] = DTW_Cost_To_AccumCostAndSteps(cost, parameter)
    [wp, endCol, endCost] = DTW_GetPath(D, s, parameter)

    # Reformat the output
    wp = wp.T[::-1]
    return wp

In [23]:
def time_stretch_part(query, ref, alignment):
    m, n = ref.shape
    feature_stretch = np.zeros((m, n))
    used = set(alignment[:, 1])
    for query_idx, ref_idx in alignment:
        feature_stretch[:, ref_idx] = query[:, query_idx]
    for j in range(n):
        if j not in used:
            feature_stretch[:, j] = feature_stretch[:, j-1]
    return feature_stretch

In [24]:
@njit(parallel = True)
def subtract_part_helper(stretched_cqt, fullmix_cqt):
    m, n = stretched_cqt.shape
    assert stretched_cqt.shape == fullmix_cqt.shape
    
    for i in prange(m):
        for j in prange(n):
            fullmix_cqt[i, j] -= stretched_cqt[i, j]
            fullmix_cqt[i, j] = max(fullmix_cqt[i, j], 0)

In [25]:
def subtract_part(stretched_part, ref):
    subtract_part_helper(stretched_part, ref)
    return ref

In [54]:
def binarize_cqt(cqt):
    rows = cqt.shape[0]
    bin_size = 12
    context = 6
    binarized = []
    for i in range(0, rows, bin_size):
        if i - context < 0:
            data = cqt[:i + context]
        elif i + context >= rows:
            data = cqt[i - context:]
        else:
            data = cqt[i-context: i+context+bin_size]
        thresh = threshold_triangle(data)
        frequency_bin = cqt[i: i+bin_size]
        x1 = frequency_bin > thresh
        binarized.extend(x1)
    return np.array(binarized).astype(float)

In [27]:
def stretch_segments(segments, wp):
    wp = np.array(sorted(wp, key = lambda x: x[0]))
    query_preds = wp[:, 0]
    ref_preds = wp[:, 1]
    query_to_ref = np.interp(list(range(max(query_preds[-1], ref_preds[-1]) + 1)), query_preds, ref_preds)
    n = len(query_to_ref) - 1
    segments[-1][1] = min(segments[-1][1], n)
    return [[int(query_to_ref[a]), int(query_to_ref[b])] for (a, b) in segments]

In [28]:
def weight_segments(segments, part_cqt, fullmix_cqt):
    alphas = np.concatenate([np.linspace(0.1, 1.0, num = 20), np.arange(1, 11, 0.3), np.arange(10, 510, 10)])
    for segment in segments:
        part_segment = part_cqt[:, segment[0]: segment[1] + 1]
        fullmix_segment = fullmix_cqt[:, segment[0]: segment[1] + 1]
        assert part_segment.shape == fullmix_segment.shape
        best = float('-inf')
        result = 0
        for alpha in alphas:
            val = np.sum(np.minimum(part_segment*alpha, fullmix_segment) - np.maximum(part_segment*alpha - fullmix_segment, 0))
            if val > best:
                best = val
                result = alpha
        part_cqt[:, segment[0]: segment[1] + 1] *= result

In [50]:
def ssa(part_cqt, fullmix_cqt, segments = []):
    part_binarized, fullmix_binarized = binarize_cqt(part_cqt), binarize_cqt(fullmix_cqt)
    print(part_binarized.shape)
    wp = align_binarized_cqts(part_binarized, fullmix_binarized)
    stretched_part = time_stretch_part(part_cqt, fullmix_cqt, wp)
    if segments:
        stretched_segments = stretch_segments(segments, wp)
        weight_segments(stretched_segments, stretched_part, fullmix_cqt)
    subtract_part(stretched_part, fullmix_cqt)
    return fullmix_cqt, wp

In [30]:
def get_silence_intervals(silence_indices):
    cur_interval = []
    start = silence_indices[0]
    for i in range(len(silence_indices) - 1):
        if silence_indices[i] + 1 != silence_indices[i+1]:
            cur_interval.append((start, silence_indices[i]))
            start = silence_indices[i+1]
    cur_interval.append((start, silence_indices[-1]))
    silence_intervals = []
    for start, end in cur_interval:
        start_time = frame_to_time(start)
        end_time = frame_to_time(end)
        if end_time - start_time < 2:
            continue
        silence_intervals.append([start, end])
    return silence_intervals

In [31]:
def get_threshold(total_energies):
    model = mixture.GaussianMixture(n_components=3, covariance_type="full")
    model.fit(total_energies)
    pi, mu, sigma = model.weights_.flatten(), model.means_.flatten(), np.sqrt(model.covariances_.flatten())
    max_idx = np.argmax(mu)
    threshold = mu[max_idx] - 4 * sigma[max_idx]
    return threshold

In [85]:
def ssa_a(part_cqt, fullmix_cqt, segments = []):
    wp = align_part_to_fullmix(part_cqt, fullmix_cqt)
    stretched_part = time_stretch_part(part_cqt, fullmix_cqt, wp)
    if segments:
        stretched_segments = stretch_segments(segments, wp)
        weight_segments(stretched_segments, stretched_part, fullmix_cqt)
    subtract_part(stretched_part, fullmix_cqt)
    return fullmix_cqt, wp

In [38]:
def get_segments(audio, H=1024, N=2048):
    stft = librosa.stft(audio, n_fft=N, hop_length=H)
    energies = np.sum(np.square(abs(stft)), axis=0)
    L = 32
    total_energies = []
    for i in range(len(energies)-L):
        total_energies.append(sum(energies[i:i+L]))
        
    total_energies = np.log(total_energies).reshape(-1, 1)
    threshold = get_threshold(total_energies)
    
    is_silence = [False] * (L//2 - 1)
    for energy in total_energies:
        if energy <= threshold:
            is_silence.append(True)
        else:
            is_silence.append(False)
    is_silence.extend([False] * (L//2))
    silence_indices = np.where(np.array(is_silence) == True)[0]
    silence_intervals = get_silence_intervals(silence_indices)
    nonsilence_segments = []
    cur = 0
    for start, end in silence_intervals:
        nonsilence_segments.append([cur, start])
        cur = end + 1
    nonsilence_segments.append([cur, len(is_silence)])
    return nonsilence_segments

In [41]:
from sklearn import mixture

In [42]:
import glob
basepath = '/home/dyang/URMP-clean/data/train'
pieces = os.listdir(basepath)

In [43]:
pieces

['01_Jupiter_vn_vc',
 '02_Sonata_vn_vn',
 '03_Dance_fl_cl',
 '04_Allegro_fl_fl',
 '07_GString_tpt_tbn',
 '11_Maria_ob_vc',
 '12_Spring_vn_vn_vc',
 '15_Surprise_tpt_tpt_tbn',
 '16_Surprise_tpt_tpt_sax',
 '18_Nocturne_vn_fl_tpt',
 '21_Rejouissance_cl_tbn_tba',
 '22_Rejouissance_sax_tbn_tba',
 '27_King_vn_vn_va_sax',
 '28_Fugue_fl_ob_cl_bn',
 '29_Fugue_fl_fl_ob_cl',
 '33_Elise_tpt_tpt_hn_tbn',
 '35_Rondeau_vn_vn_va_db',
 '36_Rondeau_vn_vn_va_vc',
 '38_Jerusalem_vn_vn_va_vc_db',
 '39_Jerusalem_vn_vn_va_sax_db']

In [88]:
piece = pieces[1]
for num in range(3):
    piece_path = os.path.join(basepath, piece)
    fullmix_path = os.path.join(piece_path, f'Vocoder{num}.wav')
    fullmix_audio = get_audio(fullmix_path)
    fullmix_cqt = calculate_cqt(fullmix_audio)
    fullmix_annot = os.path.join(piece_path,f'Vocoder{num}.txt')
    instrument_paths = sorted(glob.glob(os.path.join(piece_path,'I*.wav')))
    cqts = []
    all_errs = []
    segments = []
    clicks = []
    for idx, instrument_path in enumerate(instrument_paths):
        print(instrument_path)
        audio = get_audio(instrument_path)
        cqt = calculate_cqt(audio)
        segments = get_segments(audio)
        cqts.append(cqt)
        fullmix_cqt, align = ssa_a(cqt, fullmix_cqt)
        annot = os.path.join(piece_path,os.path.basename(instrument_path).replace('wav','txt'))
        errors, click = calculateErrors(align, annot, fullmix_annot, debug = True)
        print(np.average(errors))
        all_errs.extend(errors)
        clicks.extend(click)
#     with open(f'tols_test/baseline1/{piece}{num}.pkl','wb') as f:
#         pickle.dump(all_errs, f)

/home/dyang/URMP-clean/data/train/02_Sonata_vn_vn/I0.wav
8.918239277290963
/home/dyang/URMP-clean/data/train/02_Sonata_vn_vn/I1.wav
11.572009753766972
/home/dyang/URMP-clean/data/train/02_Sonata_vn_vn/I0.wav
8.881432357232478
/home/dyang/URMP-clean/data/train/02_Sonata_vn_vn/I1.wav
11.194332148379736
/home/dyang/URMP-clean/data/train/02_Sonata_vn_vn/I0.wav
8.050619289380737
/home/dyang/URMP-clean/data/train/02_Sonata_vn_vn/I1.wav
9.94669568343538
