In [1]:
import mido

def midi_duration(hym: int):
    mid = mido.MidiFile(f"machinepianist_midi/output/hymnary{hym:03}.mid")

    total_abs_ticks = 0
    tempo = 120  # Default to 120 bpm
    for track in mid.tracks:
        for msg in track:
            if msg.type == "set_tempo":
                tempo = msg.tempo
            relative_ticks = msg.time
            absolute_ticks = relative_ticks * tempo / 60
            total_abs_ticks += absolute_ticks

    ticks_per_beat = mid.ticks_per_beat
    return total_abs_ticks / ticks_per_beat * 60 / tempo

In [2]:
midi_duration(2)

76.71875000000004

In [3]:
import pandas as pd

In [4]:
hym = pd.DataFrame({'id':range(1, 717)})
hym.drop(hym.loc[hym['id'] == 649].index, inplace=True)

In [5]:
hym['midi_duration'] = hym.id.apply(midi_duration)

In [6]:
hym['midi_duration'] = hym['midi_duration'] / 2

In [7]:
import soundfile as sf
import numpy as np

def duration_after_cut(hym, window_size=0.005, threshold=0.005):
    # Open the audio file using soundfile
    signal, sample_rate = sf.read(f"pianoteq_mp/hymnary{hym:03}.wav")

    # Compute the magnitude of the audio signal
    magnitude = np.abs(signal)

    # Compute the moving average of the magnitude using a rolling window
    window_length = int(window_size * sample_rate)
    ma = np.convolve(np.mean(magnitude, axis=1), np.ones(window_length)/window_length, mode='valid')

    # Find the index of the last sample above the threshold
    cutoff_idx = len(signal) - np.argmax(ma[::-1] > threshold)

    # Calculate the duration of the resulting audio file
    return cutoff_idx / sample_rate

In [8]:
duration_after_cut(2)

38.83832199546485

In [9]:
hym['duration_after_cut'] = hym.id.apply(duration_after_cut)

In [10]:
hym['diff'] = ((hym.midi_duration - hym.duration_after_cut) / hym.midi_duration)

In [11]:
hym.sort_values('diff', ascending=False).head(10)

Unnamed: 0,id,midi_duration,duration_after_cut,diff
199,200,23.882812,22.911293,0.040679
579,580,26.067708,25.085805,0.037667
412,413,23.888021,23.023673,0.036183
709,710,24.195312,23.4922,0.02906
582,583,23.864583,23.223832,0.026849
584,585,47.869792,46.793243,0.022489
159,160,47.877604,46.971247,0.018931
490,491,32.877604,32.5,0.011485
606,607,63.986979,63.415465,0.008932
564,565,48.854167,48.621429,0.004764


In [12]:
hym.sort_values('diff').head(10)

Unnamed: 0,id,midi_duration,duration_after_cut,diff
80,81,11.921875,16.746077,-0.404651
261,262,16.007812,22.127415,-0.382288
266,267,12.005208,16.40644,-0.36661
607,608,15.505208,20.670771,-0.33315
243,244,15.864583,21.145329,-0.332864
667,668,18.223958,24.195692,-0.327686
157,158,16.489583,21.61415,-0.310776
664,665,19.0,24.748776,-0.302567
458,459,16.242188,21.128095,-0.300816
207,208,15.882813,20.437619,-0.286776
