In [55]:
def frequency_spectrum(sample, max_frequency=4187):
    # For single note, the highest note frequency = 4186.009 (for 88 keys piano)
    """
    Derive frequency spectrum of a signal pydub.AudioSample
    Returns an array of frequencies and an array of how prevelant that frequency is in the sample
    """
    # Convert pydub.AudioSample to raw audio data
    # Copied from Jiaaro's answer on https://stackoverflow.com/questions/32373996/pydub-raw-audio-data
    bit_depth = sample.sample_width * 8
    array_type = get_array_type(bit_depth)
#     print(array_type)
    raw_audio_data = array.array(array_type, sample._data)
#     print(raw_audio_data)
#     print(sample._data)
#     print ("="*30)
#     print(sample.raw_data)
    n = len(raw_audio_data)
#     print(n)

    # Compute FFT and frequency value for each index in FFT array
    # Inspired by Reveille's answer on https://stackoverflow.com/questions/53308674/audio-frequencies-in-python
    freq_array = np.arange(n) * (float(sample.frame_rate) / n)  # two sides frequency range
    freq_array = freq_array[: (n // 2)]  # one side frequency range

    raw_audio_data = raw_audio_data - np.average(raw_audio_data)  # zero-centering
    freq_magnitude = fft(raw_audio_data)  # fft computing and normalization
    freq_magnitude = freq_magnitude[: (n // 2)]  # one side

    if max_frequency:
        max_index = int(max_frequency * n / sample.frame_rate) + 1
        freq_array = freq_array[:max_index]
        freq_magnitude = freq_magnitude[:max_index]

    freq_magnitude = abs(freq_magnitude)
    freq_magnitude = freq_magnitude / np.sum(freq_magnitude)

    return freq_array, freq_magnitude

def detect_frequency(song, detected_onsets, detect_onset_before, detect_onset_after):
    detected_freqs = []
    for start in detected_onsets:
        sample_from = int(start + detect_onset_before)
        sample_to = int(start + detect_onset_after)
        segment = song[sample_from:sample_to]
        freqs, freq_magnitudes = frequency_spectrum(segment)
        #     print(freqs[np.argmax(freq_magnitudes)])
        detected_freqs.append(freqs[np.argmax(freq_magnitudes)])
        
#     for start in detected_onsets:
#     start = detected_onsets[4]
#     sample_from = int(start + detect_onset_before)
#     sample_to = int(start + detect_onset_after)
#     segment = song[sample_from:sample_to]
#     freqs, freq_magnitudes = frequency_spectrum(segment)
#         #     print(freqs[np.argmax(freq_magnitudes)])
#     detected_freqs.append(freqs[np.argmax(freq_magnitudes)])
#     return detected_freqs

In [56]:
c4 = 261.626
d4 = 293.665
e4 = 329.629
f4 = 349.228
g4 = 391.995
g3 = 195.998
standard_freqs = [e4,e4,f4,g4,
        g4,f4,e4,d4,
        c4,c4,d4,e4,
        e4,d4,d4,
        e4,e4,f4,g4,
        g4,f4,e4,d4,
        c4,c4,d4,e4,
        d4,c4,c4,
        d4,d4,e4,c4,
        d4,e4,f4,e4,c4,
        d4,e4,f4,e4,d4,
        c4,d4,g3,
        e4,e4,f4,g4,
        g4,f4,e4,d4,
        c4,c4,d4,e4,
        d4,c4,c4]

beat_index = [0,1,2,3,
            4,5,6,7,
            8,9,10,11,
            12,13.5,14,
            16,17,18,19,
            20,21,22,23,
            24,25,26,27,
            28,29.5,30,
            32,33,34,35,
            36,37,37.5,38,39,
            40,41,41.5,42,43,
            44,45,46,
            48,49,50,51,
            52,53,54,55,
            56,57,58,59,
            60,61.5,62]

note_duration = [1, 1, 1, 1,
                1, 1, 1, 1,
                1, 1, 1, 1,
                1.5, 0.5, 2,
                1, 1, 1, 1,
                1, 1, 1, 1,
                1, 1, 1, 1,
                1.5, 0.5, 2,
                1, 1, 1, 1,
                1, 0.5, 0.5, 1, 1,
                1, 0.5, 0.5, 1, 1,
                1, 1, 2,
                1, 1, 1, 1,
                1, 1, 1, 1,
                1, 1, 1, 1,
                1.5, 0.5, 2]

notes = ["E4","E4","F4","G4",
       "G4","F4","E4","D4",
       "C4","C4","D4","E4",
       "E4","D4","D4",
       "E4","E4","F4","G4",
       "G4","F4","E4","D4",
       "C4","C4","D4","E4",
       "D4","C4","C4",
       "D4","D4","E4","C4",
       "D4","E4","F4","E4","C4",
       "D4","E4","F4","E4","D4",
       "C4","D4","G3",
       "E4","E4","F4","G4",
       "G4","F4","E4","D4",
       "C4","C4","D4","E4",
       "D4","C4","C4"]

In [57]:
# get detected onsets

import array
import numpy as np
import scipy
from pydub.utils import get_array_type
from scipy.fft import fft
import sys
#sys.path.append("/usr/local/lib/python3.7/site-packages")
sys.path.append("/usr/local/lib/python3.8/site-packages")
import essentia
# import essentia.standard as es
from essentia.standard import *
from pydub import AudioSegment

from pylab import plot, show, figure, imshow

def detect_onsets(file_name):
    # Loading audio file
    audio = MonoLoader(filename=file_name)()
    # Computing onset detection functions.
    od1 = OnsetDetection(method='hfc')

    w = Windowing(type = 'hann')
    fft = FFT() # this gives us a complex FFT
    c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase)
    pool = essentia.Pool()

    for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
        mag, phase, = c2p(fft(w(frame)))
        pool.add('features.hfc', od1(mag, phase))

    # compute the actual onsets locations
    onsets = Onsets()

    onsets_hfc = onsets(essentia.array([ pool['features.hfc'] ]), [ 1 ])
    print("number of onsets:", len(onsets_hfc))
    if len(onsets_hfc) > 0:
        print("first onsets: ", onsets_hfc[0])
    else:
        print("no onset")

    return audio, onsets_hfc



file_name = "/Users/linyaya/Desktop/ode1.m4a"
audio, detected_onsets = detect_onsets(file_name)
detected_onsets = detected_onsets * 1000
plt.rcParams['figure.figsize'] = (18, 6) # set plot sizes to something larger than default
plot(audio)
for onset in onsets_hfc:
    plt.axvline(x=onset*44100, color='red')
plt.title("Audio waveform and the estimated onset positions (HFC onset detection function)")
plt.show()

number of onsets: 67
first onsets:  0.023219954


In [58]:
from pydub import AudioSegment
song = AudioSegment.from_file("/Users/linyaya/Desktop/ode1.m4a")

DETECT_ONSET_BEFORE = 0
DETECT_ONSET_AFTER = 125
detected_freqs = detect_frequency(song, detected_onsets, DETECT_ONSET_BEFORE, DETECT_ONSET_AFTER)

In [59]:
# get standard onsets


In [60]:
    import matplotlib.pyplot as plt
    plt.rcParams['figure.figsize'] = (18, 6) 
    plt.rcParams['figure.figsize'] = (18, 6) # set plot sizes to something larger than default
    for x in standard_onsets:
        plt.axvline(x=x, color='g', linewidth=0.5, linestyle="-")

    for x in detected_onsets:
        plt.axvline(x=x, color='r', linewidth=0.5, linestyle="-")

    plt.plot(detected_onsets, detected_freqs)
    plt.plot(standard_onsets, standard_freqs)
    plt.show()
    # plt_standard_detected_onsets_freqs(standard_onsets, detected_onsets, standard_freqs, detected_freqs)

NameError: name 'standard_onsets' is not defined