In [1]:
import librosa
import soundfile as sf
import glob
import os
import copy
import sys

import numpy as np
from scipy.interpolate import interp1d
import pyrubberband as pyrb
import pretty_midi
from tqdm.auto import tqdm

from synctoolbox.dtw.mrmsdtw import sync_via_mrmsdtw
from synctoolbox.dtw.utils import (
    compute_optimal_chroma_shift,
    shift_chroma_vectors,
    make_path_strictly_monotonic,
)
from synctoolbox.feature.chroma import (
    pitch_to_chroma,
    quantize_chroma,
    quantized_chroma_to_CENS,
)
from synctoolbox.feature.dlnco import pitch_onset_features_to_DLNCO
from synctoolbox.feature.pitch import audio_to_pitch_features
from synctoolbox.feature.pitch_onset import audio_to_pitch_onset_features
from synctoolbox.feature.utils import estimate_tuning

import multiprocessing
from joblib import Parallel, delayed

In [2]:
def normalize(audio, min_y=-1.0, max_y=1.0, eps=1e-8):
    assert len(audio.shape) == 1
    max_y -= eps
    min_y += eps
    amax = audio.max()
    amin = audio.min()
    audio = (max_y - min_y) * (audio - amin) / (amax - amin) + min_y
    return audio


def get_stereo(pop_y, midi_y, pop_scale=0.99):
    if len(pop_y) > len(midi_y):
        midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y)))
    elif len(pop_y) < len(midi_y):
        pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y)))
    stereo = np.stack((midi_y, pop_y * pop_scale))
    return stereo


def generate_variable_f0_sine_wave(f0, len_y, sr):
    """
    integrate instant frequencies to get pure tone sine wave
    """
    x_sample = np.arange(len(f0))
    intp = interp1d(x_sample, f0, kind="linear")
    f0_audiorate = intp(np.linspace(0, len(f0) - 1, len_y))
    pitch_wave = np.sin((np.nan_to_num(f0_audiorate) / sr * 2 * np.pi).cumsum())
    return pitch_wave


def fluidsynth_without_normalize(self, fs=44100, sf2_path=None):
    """Synthesize using fluidsynth. without signal normalize
    Parameters
    ----------
    fs : int
        Sampling rate to synthesize at.
    sf2_path : str
        Path to a .sf2 file.
        Default ``None``, which uses the TimGM6mb.sf2 file included with
        ``pretty_midi``.
    Returns
    -------
    synthesized : np.ndarray
        Waveform of the MIDI data, synthesized at ``fs``.
    """
    # If there are no instruments, or all instruments have no notes, return
    # an empty array
    if len(self.instruments) == 0 or all(len(i.notes) == 0 for i in self.instruments):
        return np.array([])
    # Get synthesized waveform for each instrument
    waveforms = [i.fluidsynth(fs=fs, sf2_path=sf2_path) for i in self.instruments]
    # Allocate output waveform, with #sample = max length of all waveforms
    synthesized = np.zeros(np.max([w.shape[0] for w in waveforms]))
    # Sum all waveforms in
    for waveform in waveforms:
        synthesized[: waveform.shape[0]] += waveform
    # Normalize
    # synthesized /= np.abs(synthesized).max()
    return synthesized

In [3]:
Fs = 22050
feature_rate = 50
step_weights = np.array([1.5, 1.5, 2.0])
threshold_rec = 10 ** 6

In [4]:
def simple_adjust_times(pm, original_times, new_times):
    """
    most of these codes are from original pretty_midi
    https://github.com/craffel/pretty-midi/blob/main/pretty_midi/pretty_midi.py
    """
    for instrument in pm.instruments:
        instrument.notes = [
            copy.deepcopy(note)
            for note in instrument.notes
            if note.start >= original_times[0] and note.end <= original_times[-1]
        ]
    # Get array of note-on locations and correct them
    note_ons = np.array(
        [note.start for instrument in pm.instruments for note in instrument.notes]
    )
    adjusted_note_ons = np.interp(note_ons, original_times, new_times)
    # Same for note-offs
    note_offs = np.array(
        [note.end for instrument in pm.instruments for note in instrument.notes]
    )
    adjusted_note_offs = np.interp(note_offs, original_times, new_times)
    # Correct notes
    for n, note in enumerate(
        [note for instrument in pm.instruments for note in instrument.notes]
    ):
        note.start = (adjusted_note_ons[n] > 0) * adjusted_note_ons[n]
        note.end = (adjusted_note_offs[n] > 0) * adjusted_note_offs[n]
    # After performing alignment, some notes may have an end time which is
    # on or before the start time.  Remove these!
    pm.remove_invalid_notes()

    def adjust_events(event_getter):
        """This function calls event_getter with each instrument as the
        sole argument and adjusts the events which are returned."""
        # Sort the events by time
        for instrument in pm.instruments:
            event_getter(instrument).sort(key=lambda e: e.time)
        # Correct the events by interpolating
        event_times = np.array(
            [
                event.time
                for instrument in pm.instruments
                for event in event_getter(instrument)
            ]
        )
        adjusted_event_times = np.interp(event_times, original_times, new_times)
        for n, event in enumerate(
            [
                event
                for instrument in pm.instruments
                for event in event_getter(instrument)
            ]
        ):
            event.time = adjusted_event_times[n]
        for instrument in pm.instruments:
            # We want to keep only the final event which has time ==
            # new_times[0]
            valid_events = [
                event
                for event in event_getter(instrument)
                if event.time == new_times[0]
            ]
            if valid_events:
                valid_events = valid_events[-1:]
            # Otherwise only keep events within the new set of times
            valid_events.extend(
                event
                for event in event_getter(instrument)
                if event.time > new_times[0] and event.time < new_times[-1]
            )
            event_getter(instrument)[:] = valid_events

    # Correct pitch bends and control changes
    adjust_events(lambda i: i.pitch_bends)
    adjust_events(lambda i: i.control_changes)

    return pm

In [5]:
def get_features_from_audio(audio, tuning_offset, visualize=False):
    f_pitch = audio_to_pitch_features(
        f_audio=audio,
        Fs=Fs,
        tuning_offset=tuning_offset,
        feature_rate=feature_rate,
        verbose=visualize,
    )
    f_chroma = pitch_to_chroma(f_pitch=f_pitch)
    f_chroma_quantized = quantize_chroma(f_chroma=f_chroma)

    f_pitch_onset = audio_to_pitch_onset_features(
        f_audio=audio, Fs=Fs, tuning_offset=tuning_offset, verbose=visualize
    )
    f_DLNCO = pitch_onset_features_to_DLNCO(
        f_peaks=f_pitch_onset,
        feature_rate=feature_rate,
        feature_sequence_length=f_chroma_quantized.shape[1],
        visualize=visualize,
    )
    return f_chroma_quantized, f_DLNCO

In [6]:
def get_aligned_results(midi_pm, song_audio):
    piano_audio = midi_pm.fluidsynth(Fs)

    song_audio = normalize(song_audio)

    # The reason for estimating tuning ::
    # https://www.audiolabs-erlangen.de/resources/MIR/FMP/C3/C3S1_TranspositionTuning.html
    tuning_offset_1 = estimate_tuning(song_audio, Fs)
    tuning_offset_2 = estimate_tuning(piano_audio, Fs)

    # DLNCO features (Sebastian Ewert, Meinard Müller, and Peter Grosche: High Resolution Audio Synchronization Using Chroma Onset Features, In Proceedings of IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP): 1869–1872, 2009.):
    # helpful to increase synchronization accuracy, especially for music with clear onsets.

    # Quantized and smoothed chroma : CENS features
    # Because, MrMsDTW Requires CENS.
    f_chroma_quantized_1, f_DLNCO_1 = get_features_from_audio(
        song_audio, tuning_offset_1
    )
    f_chroma_quantized_2, f_DLNCO_2 = get_features_from_audio(
        piano_audio, tuning_offset_2
    )

    # Shift chroma vectors :
    # Otherwise, different keys of two audio leads to degradation of alignment.
    opt_chroma_shift = compute_optimal_chroma_shift(
        quantized_chroma_to_CENS(f_chroma_quantized_1, 201, 50, feature_rate)[0],
        quantized_chroma_to_CENS(f_chroma_quantized_2, 201, 50, feature_rate)[0],
    )
    f_chroma_quantized_2 = shift_chroma_vectors(f_chroma_quantized_2, opt_chroma_shift)
    f_DLNCO_2 = shift_chroma_vectors(f_DLNCO_2, opt_chroma_shift)

    wp = sync_via_mrmsdtw(
        f_chroma1=f_chroma_quantized_1,
        f_onset1=f_DLNCO_1,
        f_chroma2=f_chroma_quantized_2,
        f_onset2=f_DLNCO_2,
        input_feature_rate=feature_rate,
        step_weights=step_weights,
        threshold_rec=threshold_rec,
        verbose=False,
    )

    wp = make_path_strictly_monotonic(wp)
    pitch_shift_for_song_audio = -opt_chroma_shift % 12
    if pitch_shift_for_song_audio > 6:
        pitch_shift_for_song_audio -= 12

    if pitch_shift_for_song_audio != 0:
        song_audio_shifted = pyrb.pitch_shift(
            song_audio, Fs, pitch_shift_for_song_audio
        )
    else:
        song_audio_shifted = song_audio

    time_map_second = wp / feature_rate
    midi_pm_warped = copy.deepcopy(midi_pm)

    midi_pm_warped = simple_adjust_times(
        midi_pm_warped, time_map_second[1], time_map_second[0]
    )
    piano_audio_warped = midi_pm_warped.fluidsynth(Fs)

    song_audio_shifted = normalize(song_audio_shifted)
    stereo_sonification_piano = get_stereo(song_audio_shifted, piano_audio_warped)

    rd = dict(
        mix_song=stereo_sonification_piano,
        song_pitch_shifted=song_audio_shifted,
        midi_warped_pm=midi_pm_warped,
        pitch_shift_for_song_audio=pitch_shift_for_song_audio,
        tuning_offset_song=tuning_offset_1,
        tuning_offset_piano=tuning_offset_2,
    )
    return rd

In [7]:
def save_delayed_song(
    song_path,
    midi_path,
    song_output_path,
    midi_output_path,
    dry_run,
):
    import warnings

    warnings.filterwarnings(action="ignore")

    song_audio, _ = librosa.load(song_path, sr=Fs)
    midi_pm = pretty_midi.PrettyMIDI(midi_path)

    if np.power(song_audio, 2).sum() < 1:  # low energy: invalid file
        print("invalid audio :", song_path)
        return

    rd = get_aligned_results(midi_pm=midi_pm, song_audio=song_audio)

    mix_song = rd["mix_song"]
    song_pitch_shifted = rd["song_pitch_shifted"]
    midi_warped_pm = rd["midi_warped_pm"]
    pitch_shift_for_song_audio = rd["pitch_shift_for_song_audio"]
    tuning_offset_song = rd["tuning_offset_song"]
    tuning_offset_piano = rd["tuning_offset_piano"]

    try:
        if dry_run:
            print("write audio files: ", song_output_path)
        else:
            sf.write(
                file=song_output_path,
                data=song_pitch_shifted,
                samplerate=Fs,
                format="mp3",
            )
    except:
        print("Fail : ", song_path)

    try:
        if dry_run:
            print("write warped midi :", midi_output_path)
        else:
            midi_warped_pm.write(midi_output_path)

    except:
        midi_warped_pm._tick_scales = midi_pm._tick_scales
        try:
            if dry_run:
                print("write warped midi2 :", midi_output_path)
            else:
                midi_warped_pm.write(midi_output_path)

        except:
            print("ad-hoc failed midi : ", midi_path)
        print("ad-hoc midi : ", midi_path)

In [8]:
def get_files(song_dir, midi_dir, output_dir):
    song_files = glob.glob(os.path.join(song_dir, "*.mp3"))
    midi_files = glob.glob(os.path.join(midi_dir, "*.mid"))

    song_files = sorted(song_files)
    midi_files = sorted(midi_files)

    song_output_files = [
        os.path.join(output_dir, "song", os.path.basename(f))
        for f in song_files
    ]
    midi_output_files = [
        os.path.join(output_dir, "midi", os.path.basename(f))
        for f in midi_files
    ]

    return song_files, midi_files, song_output_files, midi_output_files

In [9]:
songs, midis, songs_o, midis_o = get_files("song2midi/dataset/song_blend", "song2midi/dataset/midi_blend", "song2midi/dataset/output")

In [15]:
file_to_process = list(zip(songs, midis, songs_o, midis_o))[:]

In [16]:
Parallel(n_jobs=16)(
    delayed(save_delayed_song)(sp, mp, sop, mop, dry_run=False)
    for sp, mp, sop, mop in tqdm(file_to_process)
)

  0%|          | 0/780 [00:00<?, ?it/s]

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [25]:
import copy
import librosa
import essentia
import essentia.standard
import numpy as np
import scipy.interpolate as interp
import note_seq

In [26]:
SAMPLERATE = 44100

In [27]:
def nearest_onset_offset_digitize(on, off, bins):
    intermediate = (bins[1:] + bins[:-1]) / 2
    on_idx = np.digitize(on, intermediate)
    off_idx = np.digitize(off, intermediate)
    off_idx[on_idx == off_idx] += 1
    # off_idx = np.clip(off_idx, a_min=0, a_max=len(bins) - 1)
    return on_idx, off_idx


In [28]:

def apply_sustain_pedal(pm):
    ns = note_seq.midi_to_note_sequence(pm)
    susns = note_seq.apply_sustain_control_changes(ns)
    suspm = note_seq.note_sequence_to_pretty_midi(susns)
    return suspm


In [29]:

def interpolate_beat_times(beat_times, steps_per_beat, extend=False):
    beat_times_function = interp.interp1d(
        np.arange(beat_times.size),
        beat_times,
        bounds_error=False,
        fill_value="extrapolate",
    )
    if extend:
        beat_steps_8th = beat_times_function(
            np.linspace(0, beat_times.size, beat_times.size * steps_per_beat + 1)
        )
    else:
        beat_steps_8th = beat_times_function(
            np.linspace(0, beat_times.size - 1, beat_times.size * steps_per_beat - 1)
        )
    return beat_steps_8th


In [30]:
def midi_quantize_by_beats(
    midi_path, beat_times, steps_per_beat, ignore_sustain_pedal=False
):
    ns = note_seq.midi_file_to_note_sequence(midi_path)
    if ignore_sustain_pedal:
        susns = ns
    else:
        susns = note_seq.apply_sustain_control_changes(ns)

    qns = copy.deepcopy(susns)

    notes = np.array([[n.start_time, n.end_time] for n in susns.notes])
    note_attributes = np.array([[n.pitch, n.velocity] for n in susns.notes])

    note_ons = np.array(notes[:, 0])
    note_offs = np.array(notes[:, 1])

    beat_steps_8th = interpolate_beat_times(beat_times, steps_per_beat, extend=False)

    on_idx, off_idx = nearest_onset_offset_digitize(note_ons, note_offs, beat_steps_8th)

    beat_steps_8th = interpolate_beat_times(beat_times, steps_per_beat, extend=True)

    discrete_notes = np.concatenate(
        (np.stack((on_idx, off_idx), axis=1), note_attributes), axis=1
    )

    def delete_duplicate_notes(dnotes):
        note_order = dnotes[:, 0] * 128 + dnotes[:, 2]
        dnotes = dnotes[note_order.argsort()]
        indices = []
        for i in range(1, len(dnotes)):
            if dnotes[i, 0] == dnotes[i - 1, 0] and dnotes[i, 2] == dnotes[i - 1, 2]:
                indices.append(i)
        dnotes = np.delete(dnotes, indices, axis=0)
        note_order = dnotes[:, 0] * 128 + dnotes[:, 1]
        dnotes = dnotes[note_order.argsort()]
        return dnotes

    discrete_notes = delete_duplicate_notes(discrete_notes)

    digitized_note_ons, digitized_note_offs = (
        beat_steps_8th[on_idx],
        beat_steps_8th[off_idx],
    )

    for i, note in enumerate(qns.notes):
        note.start_time = digitized_note_ons[i]
        note.end_time = digitized_note_offs[i]

    return qns, discrete_notes, beat_steps_8th


In [31]:

def extract_rhythm(song, y=None):
    if y is None:
        y, sr = librosa.load(song, sr=SAMPLERATE)

    essentia_tracker = essentia.standard.RhythmExtractor2013(method="multifeature")
    (
        bpm,
        beat_times,
        confidence,
        estimates,
        essentia_beat_intervals,
    ) = essentia_tracker(y)
    return bpm, beat_times, confidence, estimates, essentia_beat_intervals

In [37]:
def estimate(song_path, midi_path, qmidi_path, qmix_path, npy_path, ignore_sustain_pedal=False):

    bpm, beat_times, confidence, estimates, essentia_beat_intervals = extract_rhythm(
        song_path
    )
    beat_times = np.array(beat_times)
    essentia_beat_intervals = np.array(essentia_beat_intervals)

    qns, discrete_notes, beat_steps_8th = midi_quantize_by_beats(
        midi_path, beat_times, 2, ignore_sustain_pedal=ignore_sustain_pedal
    )

    qpm = note_seq.note_sequence_to_pretty_midi(qns)
    qpm.instruments[0].control_changes = []
    qpm.write(qmidi_path)
    y, sr = librosa.load(song_path, sr=None)
    qpm_y = qpm.fluidsynth(sr)
    qmix = get_stereo(y, qpm_y, 0.4)
    sf.write(file=qmix_path, data=qmix.T, samplerate=sr, format="mp3")

    os.makedirs(npy_path, exist_ok=True)
    np.save(os.path.join(npy_path, "notes.npy"), discrete_notes)
    np.save(os.path.join(npy_path, "beatstep.npy"), beat_steps_8th)
    np.save(os.path.join(npy_path, "beattime.npy"), beat_times)
    np.save(os.path.join(npy_path, "beatinterval.npy"), essentia_beat_intervals)


In [39]:
def get_files_for_estimate(song_dir, midi_dir, qmidi_dir, qmix_dir, npy_dir):
    song_files = glob.glob(os.path.join(song_dir, "*.mp3"))
    midi_files = glob.glob(os.path.join(midi_dir, "*.mid"))

    song_files = sorted(song_files)
    midi_files = sorted(midi_files)

    qmidi_files = [
        os.path.join(qmidi_dir, os.path.basename(f)) for f in midi_files
    ]
    qmix_files = [
        os.path.join(qmix_dir, os.path.basename(f).replace(".mid", ".mp3"))
        for f in midi_files
    ]
    npy_files = [
        os.path.join(npy_dir, os.path.basename(f).replace(".mid", ""))
        for f in midi_files
    ]

    return song_files, midi_files, qmidi_files, qmix_files, npy_files

In [40]:
song_files, midi_files, qmidi_files, qmix_files, npy_files = get_files_for_estimate("song2midi/dataset/output/song", "song2midi/dataset/output/midi", "song2midi/dataset/output/qmidi", "song2midi/dataset/output/qmix", "song2midi/dataset/output/npy")

In [45]:
file_to_estimate = list(zip(song_files, midi_files, qmidi_files, qmix_files, npy_files))[:10]

In [46]:
file_to_estimate

[('song2midi/dataset/output/song/-7AMMFFiGLU.mp3',
  'song2midi/dataset/output/midi/-7AMMFFiGLU.3.mid',
  'song2midi/dataset/output/qmidi/-7AMMFFiGLU.3.mid',
  'song2midi/dataset/output/qmix/-7AMMFFiGLU.3.mp3',
  'song2midi/dataset/output/npy/-7AMMFFiGLU.3'),
 ('song2midi/dataset/output/song/-7jE5I1XaX8.mp3',
  'song2midi/dataset/output/midi/-7jE5I1XaX8.14.mid',
  'song2midi/dataset/output/qmidi/-7jE5I1XaX8.14.mid',
  'song2midi/dataset/output/qmix/-7jE5I1XaX8.14.mp3',
  'song2midi/dataset/output/npy/-7jE5I1XaX8.14'),
 ('song2midi/dataset/output/song/-CzU--_ZFus.mp3',
  'song2midi/dataset/output/midi/-CzU--_ZFus.11.mid',
  'song2midi/dataset/output/qmidi/-CzU--_ZFus.11.mid',
  'song2midi/dataset/output/qmix/-CzU--_ZFus.11.mp3',
  'song2midi/dataset/output/npy/-CzU--_ZFus.11'),
 ('song2midi/dataset/output/song/-HuYhO_KCbs.mp3',
  'song2midi/dataset/output/midi/-HuYhO_KCbs.13.mid',
  'song2midi/dataset/output/qmidi/-HuYhO_KCbs.13.mid',
  'song2midi/dataset/output/qmix/-HuYhO_KCbs.13.mp3'

In [44]:
Parallel(n_jobs=10)(
    delayed(estimate)(sp, mp, qmip, qmxp, nyp) for sp, mp, qmip, qmxp, nyp in tqdm(file_to_estimate)
)

  0%|          | 0/780 [00:00<?, ?it/s]

[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default
[   INFO   ] MusicExtractorSVM: no classifier models were configured by default


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [None]:

def main(meta_files, ignore_sustain_pedal):
    from tqdm import tqdm
    import multiprocessing
    from joblib import Parallel, delayed

    def files():
        pbar = tqdm(meta_files)
        for meta_file in pbar:
            pbar.set_description(meta_file)
            yield meta_file

    Parallel(n_jobs=multiprocessing.cpu_count() // 2)(
        delayed(estimate)(meta_file, ignore_sustain_pedal) for meta_file in files()
    )


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="bpm estimate using essentia")

    parser.add_argument(
        "data_dir",
        type=str,
        default=None,
        help="""directory contains {id}/{pop_filename.wav}
        """,
    )

    parser.add_argument(
        "--ignore_sustain_pedal",
        default=False,
        action="store_true",
    )

    args = parser.parse_args()

    meta_files = sorted(glob.glob(args.data_dir + "/*.yaml"))
    print("meta ", len(meta_files))

    main(meta_files, args.ignore_sustain_pedal)