In [60]:
import os
from collections import deque
import numpy as np
import aubio

In [61]:
notes = ['C', 'C#/Db', 'D', 'D#/Eb', 'E', 'F', 'F#/Gb', 'G', 'G#/Ab', 'A', 'A#/Bb', 'B']
def midi2char(midi_key: int) -> str:
    # c1 = 24 (32.7hz)  -> c0 = 12 (16.4hz)
    return notes[(midi_key-12)%12] + str(int((midi_key-12)/12))

In [63]:
%%time
p_weights = {0: 0.1, 1: 0.2, 2: 0.3, 3: 0.4, 4: 0.75, 5: 1.0, 6: 1.2, 7: 1.3}

folder = r'C:\Users\Silvan\Desktop\tony game samle recordings'
for subdir, dirs, files in os.walk(folder):
    for file in files:
        path = os.path.join(subdir, file)
        if path.endswith('.wav'):
            src = aubio.source(path, hop_size=512)
            samples_per_100ms = src.samplerate/10
            o = aubio.onset(method='energy', buf_size=1024, hop_size=512, samplerate=src.samplerate)
            o.set_threshold(0.9) # default 0.3 -- testing showed that 0.9, 1.0 and 1.1 yield the same top results
            # o.set_minioi_ms(75) # default 50

            p = aubio.pitch(method="yinfft", buf_size=4096, hop_size=512, samplerate=src.samplerate)
            p.set_unit('midi')
            # p.set_tolerance() 0.15 yin 0.85 yinfft

            print(path.split('\\')[-1], src.samplerate, src.channels, src.duration)

            pitches = deque(maxlen=8)
            total_read = 0
            last_onset = -2*p.buf_size
            while True:
                samples, read = src()
                total_read += read

                # do something with samples
                onset = o(samples)
                if(onset[0] != 0):
                    last_onset = o.get_last()
            #         print(f'{total_read}: onset at {round(o.get_last_ms())}ms, sample {o.get_last()} with value: {round(onset[0],3)}',
            #               f'-- descriptor: {round(o.get_descriptor(),3)}/{round(o.get_thresholded_descriptor(),3)}')

                midi = int(round(p(samples)[0]))
                pitches.append(midi)
                if total_read - samples_per_100ms < last_onset and midi > 0 and total_read + p.hop_size - samples_per_100ms > last_onset:
                    print(f'{total_read} ({round(total_read/src.samplerate*1000)}ms): pitch {midi2char(midi)} ({midi}) with {list(pitches)}')
                    #print(f'{total_read}: pitch {midi} with {round((total_read - last_onset) / src.samplerate * 1000, 1)}ms delay')

                if read < src.hop_size:
                    break


d3-c2 octave 1.wav 48000 2 743040
88576 (1845ms): pitch D3 (50) with [50, 50, 50, 50, 50, 50, 50, 50]
139776 (2912ms): pitch C3 (48) with [50, 48, 48, 48, 48, 48, 48, 48]
187904 (3915ms): pitch B2 (47) with [48, 48, 47, 47, 47, 47, 47, 47]
239104 (4981ms): pitch A2 (45) with [47, 45, 45, 45, 45, 45, 45, 45]
291840 (6080ms): pitch G2 (43) with [45, 52, 52, 43, 43, 43, 43, 43]
342528 (7136ms): pitch F2 (41) with [43, 43, 41, 41, 41, 41, 41, 41]
393728 (8203ms): pitch E2 (40) with [41, 29, 40, 40, 40, 40, 40, 40]
447488 (9323ms): pitch D2 (38) with [40, 40, 28, 38, 38, 38, 38, 38]
499712 (10411ms): pitch C3 (48) with [38, 38, 42, 36, 36, 36, 48, 48]
559104 (11648ms): pitch B1 (35) with [36, 36, 65, 65, 40, 35, 35, 35]
619520 (12907ms): pitch C2 (36) with [35, 35, 36, 36, 36, 36, 36, 36]
d3-c2octave 2.wav 48000 2 747648
97280 (2027ms): pitch D3 (50) with [50, 50, 50, 50, 50, 50, 50, 50]
150528 (3136ms): pitch C3 (48) with [50, 48, 48, 48, 48, 48, 48, 48]
202240 (4213ms): pitch B2 (47) with

staccato 2.wav 48000 2 709632
81408 (1696ms): pitch D6 (86) with [86, 86, 86, 86, 86, 86, 86, 86]
128512 (2677ms): pitch C6 (84) with [84, 84, 84, 84, 84, 84, 84, 84]
173056 (3605ms): pitch B5 (83) with [83, 83, 83, 83, 83, 83, 83, 83]
217088 (4523ms): pitch D6 (86) with [86, 86, 86, 86, 86, 86, 86, 86]
260096 (5419ms): pitch D5 (74) with [74, 74, 74, 74, 74, 74, 74, 74]
264704 (5515ms): pitch D5 (74) with [74, 74, 74, 74, 74, 74, 74, 74]
299520 (6240ms): pitch C5 (72) with [72, 72, 72, 72, 72, 72, 72, 72]
343040 (7147ms): pitch B4 (71) with [71, 71, 71, 71, 71, 71, 71, 71]
390144 (8128ms): pitch D4 (62) with [62, 62, 62, 62, 62, 62, 62, 62]
433152 (9024ms): pitch C4 (60) with [60, 60, 60, 60, 60, 60, 60, 60]
479232 (9984ms): pitch B3 (59) with [59, 59, 59, 59, 59, 59, 59, 59]
524800 (10933ms): pitch D3 (50) with [50, 50, 50, 50, 50, 50, 50, 50]
572416 (11925ms): pitch C3 (48) with [48, 48, 48, 48, 48, 48, 48, 48]
625152 (13024ms): pitch B2 (47) with [47, 47, 47, 47, 47, 47, 47, 47]
Wa