In [None]:


def remove_silences(audio_t, ref=1.0, top_db=75):

    audio_t = np.copy(audio_t)
    audio_segs = librosa.effects.split(audio_t.T, ref=ref, top_db=top_db)
    new_audio_t = []
    for c in audio_segs:
        data = audio_t[c[0]: c[1]]
        new_audio_t.extend(data)
    new_audio_t = np.asarray(new_audio_t)

    return new_audio_t

def get_mean_peak(audio, sr=44100, 
                  percentile=75, del_silences=False):
    
#     Returns mean peak value in dB after the 1Q is removed.
#     Input should be in the shape samples x channel
    
    audio_ = audio
    window_size = 2**10 # FFT size
    hop_size = window_size

    if del_silences:
        audio_ = remove_silences(audio_)
    
    peak = []
    std = []
    for ch in range(audio_.shape[-1]):
        x = np.ascontiguousarray(audio_[:, ch])

        onset_func = aubio.onset('hfc', buf_size=window_size, hop_size=hop_size, samplerate=sr)

        frames = librosa.util.frame(x, frame_length=window_size, hop_length=hop_size)
        
        onset_times = []
        for frame in frames.T:
            
            if onset_func(frame):
                
                onset_time = onset_func.get_last()
                onset_times.append(onset_time) 
                
        samples=[]
        if onset_times:
            for i, p in enumerate(onset_times[:-1]):
                samples.append(onset_times[i]+np.argmax(np.abs(x[onset_times[i]:onset_times[i+1]])))
            samples.append(onset_times[-1]+np.argmax(np.abs(x[onset_times[-1]:])))

        p_value = []
        for p in samples:
            p_ = amp_to_db(np.abs(x[p]))
#             print(p, p_)
            p_value.append(p_)
        p_value_=[]
        for p in p_value:
            if p > np.percentile(p_value, percentile):
                p_value_.append(p)
        if p_value_:
            peak.append(np.mean(p_value_))
            std.append(np.std(p_value_))
        elif p_value:
            peak.append(np.mean(p_value))
            std.append(np.std(p_value))
        else:
            return None
    return [np.mean(peak), np.mean(std)]

def get_feature(x):
    x = pyln.normalize.peak(x, -10)