In [1]:
import numpy as np
import sys

import librosa
from importlib import reload
from itertools import islice

def window(seq, n=2):
    "Returns a sliding window (of width n) over data from the iterable"
    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result    
    for elem in it:
        result = result[1:] + (elem,)
        yield result

In [2]:
# reload(librosa)
# librosa.onset.test()

In [3]:
audio_path = "/Applications/osu!.app/Contents/Resources/drive_c/osu!/Songs/13019 Daisuke Achiwa - BASARA/BASARA.mp3"

y, sr = librosa.load(audio_path,sr=None)

len(y), sr

(9107712, 44100)

In [4]:
for precise_window_size in [2,3,4]:
    print('precise_window_size: ', precise_window_size)
    for precise_hop_length in [4,8,16,32,64,128]:
        print('precise_hop_length: ', precise_hop_length)
        hop_length = 512
        onsets = librosa.onset.onset_detect(y=y, sr=sr, hop_length = hop_length, 
                                            precise=True, precise_hop_length=precise_hop_length,
                                            precise_window_size = precise_window_size,
                                            units='time')
        print('onsets: ', len(onsets))
        windowsize=1
        timeIntervals = [(w[windowsize]-w[0])/windowsize for w in window(onsets, windowsize+1)]
        quantiles = [40,45,50,55,60]
        print(60/np.percentile(timeIntervals, q=quantiles))

precise_window_size:  2
precise_hop_length:  4
onsets:  811
[ 271.55172414  261.21981559  258.90410959  258.29754002  257.79423227]
precise_hop_length:  8
onsets:  811
[ 271.55172414  261.22497334  258.90410959  258.3984375   257.79423227]
precise_hop_length:  16
onsets:  811
[ 271.55172414  261.23528947  258.8028169   258.3984375   257.59345794]
precise_hop_length:  32
onsets:  811
[ 271.10655738  260.84384858  259.20846395  258.3984375   257.59345794]
precise_hop_length:  64
onsets:  811
[ 271.99835526  260.02358491  258.3984375   258.3984375   258.3984375 ]
precise_hop_length:  128
onsets:  811
[ 271.99835526  261.6693038   258.3984375   258.3984375   258.3984375 ]
precise_window_size:  3
precise_hop_length:  4
onsets:  811
[ 265.4494382   262.80765181  260.79243051  259.61538462  258.25720309]
precise_hop_length:  8
onsets:  811
[ 265.4494382   262.70849881  260.84384858  259.61538462  258.31771321]
precise_hop_length:  16
onsets:  811
[ 265.4494382   262.5         260.84384858  25

In [5]:
librosa.frames_to_time(onsets, sr=sr)

array([  3.1230839 ,   3.36689342,   3.4829932 ,   3.58748299,
         3.69197279,   3.81968254,   3.93578231,   4.05188209,
         4.21442177,   4.28408163,   4.38857143,   4.5046712 ,
         4.73687075,   4.96907029,   5.21287982,   5.32897959,
         5.44507937,   5.56117914,   5.67727891,   5.79337868,
         5.89786848,   6.11845805,   6.23455782,   6.3506576 ,
         6.58285714,   6.69895692,   6.81505669,   7.04725624,
         7.17496599,   7.29106576,   7.51165533,   7.63936508,
         7.75546485,   7.99927438,   8.23147392,   8.46367347,
         8.57977324,   8.66104308,   8.92807256,   9.03256236,
         9.14866213,   9.2647619 ,   9.3692517 ,   9.61306122,
         9.729161  ,   9.82204082,   9.92653061,  10.05424036,
        10.29804989,  10.50702948,  10.97142857,  11.2152381 ,
        11.43582766,  11.66802721,  11.90022676,  12.1324263 ,
        12.36462585,  12.58521542,  12.71292517,  12.81741497,
        13.06122449,  13.28181406,  13.51401361,  13.74

In [6]:
samplesStarts = librosa.core.frames_to_samples(onsets, hop_length=hop_length)
samplesStarts

array([137728, 148480, 153600, 158208, 162816, 168448, 173568, 178688,
       185856, 188928, 193536, 198656, 208896, 219136, 229888, 235008,
       240128, 245248, 250368, 255488, 260096, 269824, 274944, 280064,
       290304, 295424, 300544, 310784, 316416, 321536, 331264, 336896,
       342016, 352768, 363008, 373248, 378368, 381952, 393728, 398336,
       403456, 408576, 413184, 423936, 429056, 433152, 437760, 443392,
       454144, 463360, 483840, 494592, 504320, 514560, 524800, 535040,
       545280, 555008, 560640, 565248, 576000, 585728, 595968, 606208,
       616448, 626176, 636928, 642048, 646656, 658432, 663040, 667136,
       677376, 682496, 687616, 698368, 708096, 728576, 758784, 769024,
       779264, 789504, 809984, 819712, 830464, 840192, 845312, 849920,
       860160, 870400, 881152])

In [7]:
onset_envelope = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length)
print("onset_envelope: ",len(onset_envelope))

onset_envelope:  1723


In [33]:
samplesStarts = librosa.core.frames_to_samples(onsets-2, hop_length=hop_length)
[print((end-start)//hop_length,np.argmax
    (librosa.onset.onset_strength(y[range(start, end)],
        sr=sr,hop_length=512))
)
for (start, end) in window(samplesStarts)]


21 3
10 3
9 3
9 3
11 9
10 3
10 3
14 3
6 3
9 3
10 3
20 3
20 3
21 3
10 3
10 3
10 10
10 8
10 3
9 3
19 3
10 3
10 3
20 3
10 3
10 3
20 3
11 3
10 3
19 3
11 3
10 5
21 3
20 3
20 3
10 3
7 3
23 3
9 8
10 10
10 3
9 3
21 3
10 3
8 3
9 3
11 10
21 3
18 3
40 3
21 3
19 4
20 3
20 3
20 3
20 12
19 5
11 3
9 3
21 3
19 4
20 3
20 4
20 3
19 12
21 3
10 3
9 9
23 3
9 8
8 3
20 3
10 4
10 3
21 12
19 10
40 3
59 42
20 13
20 3
20 13
40 22
19 3
21 13
19 9
10 3
9 3
20 3
20 3
21 3


[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None]

In [9]:
onsets = librosa.onset.onset_detect(y=y, sr=sr, hop_length = hop_length)
samplesStarts = librosa.core.frames_to_samples(onsets, hop_length=hop_length)
for startSample in samplesStarts:
    strength = librosa.onset.onset_strength(y[range(startSample-hop_length*3, startSample+hop_length*3)],sr=sr,hop_length=8)
    print(
        len(strength),
        np.argmax(strength),
        (np.argmax(strength)-(len(strength)-1)//2),
        (np.argmax(strength)-(len(strength)-1)//2)/(len(strength)-1)*6
    )

385 147 -45 -0.3515625
385 152 -40 -0.3125
385 159 -33 -0.2578125
385 210 18 0.140625
385 165 -27 -0.2109375
385 154 -38 -0.296875
385 191 -1 -0.0078125
385 188 -4 -0.03125
385 156 -36 -0.28125
385 169 -23 -0.1796875
385 179 -13 -0.1015625
385 154 -38 -0.296875
385 209 17 0.1328125
385 170 -22 -0.171875
385 182 -10 -0.078125
385 154 -38 -0.296875
385 143 -49 -0.3828125
385 156 -36 -0.28125
385 154 -38 -0.296875
385 155 -37 -0.2890625
385 188 -4 -0.03125
385 189 -3 -0.0234375
385 172 -20 -0.15625
385 208 16 0.125
385 160 -32 -0.25
385 184 -8 -0.0625
385 167 -25 -0.1953125
385 175 -17 -0.1328125
385 192 0 0.0
385 182 -10 -0.078125
385 163 -29 -0.2265625
385 240 48 0.375
385 158 -34 -0.265625
385 209 17 0.1328125
385 200 8 0.0625
385 166 -26 -0.203125
385 152 -40 -0.3125
385 166 -26 -0.203125
385 182 -10 -0.078125
385 156 -36 -0.28125
385 150 -42 -0.328125
385 236 44 0.34375
385 169 -23 -0.1796875
385 180 -12 -0.09375
385 160 -32 -0.25
385 151 -41 -0.3203125
385 173 -19 -0.1484375
385 152

In [9]:
np.argmax(np.array([1,2,3,2])++1)

2

In [10]:
len([a for a in librosa.onset.onset_strength(y=y[],sr=sr,hop_length=hop_length) if a>0])

SyntaxError: invalid syntax (<ipython-input-10-f3dc456319ef>, line 1)

In [14]:
%config InlineBackend.figure_format='retina'

# matplotlib for displaying the output
import matplotlib.pyplot as plt
import matplotlib.style as ms
ms.use('seaborn-muted')


# and IPython.display for audio output
import IPython.display

# And the display module for visualization
import librosa.display

In [13]:
D = librosa.stft(y)
times = librosa.frames_to_time(np.arange(D.shape[1]))
plt.figure()
ax1 = plt.subplot(2, 1, 1)
librosa.display.specshow(librosa.amplitude_to_db(D, ref=np.max),
                          y_axis='log', x_axis='time')
plt.title('Power spectrogram')

plt.subplot(2, 1, 2, sharex=ax1)

onset_env = librosa.onset.onset_strength(y=y, sr=sr)
plt.plot(times, 2 + onset_env / onset_env.max(), alpha=0.8, label='Mean (mel)')


onset_env = librosa.onset.onset_strength(y=y, sr=sr,
                                         aggregate=np.median,
                                         fmax=8000, n_mels=256)
plt.plot(times, 1 + onset_env / onset_env.max(), alpha=0.8,
         label='Median (custom mel)')


onset_env = librosa.onset.onset_strength(y=y, sr=sr,
                                         feature=librosa.cqt)
plt.plot(times, onset_env / onset_env.max(), alpha=0.8,
         label='Mean (CQT)')
plt.legend(frameon=True, framealpha=0.75)
plt.ylabel('Normalized strength')
plt.yticks([])
plt.axis('tight')
plt.tight_layout()

In [75]:
sr//8

5512