In [1]:
import librosa
import numpy as np
from scipy.fftpack import fft
from scipy.stats import rankdata
import matplotlib.pyplot as plt
import librosa.display
import IPython.display as ipd
%matplotlib inline

In [2]:
signals = ['audio_data/bass.wav', 'audio_data/drums.wav', 'audio_data/other.wav', 'audio_data/vocals.wav']

In [3]:
y, sr = librosa.load(signals[0], sr=44100)

In [4]:
y.shape

(1942186,)

In [34]:
a = np.mean(np.abs(librosa.core.stft(y, n_fft=1024, hop_length=512)), axis=0)
a.shape

(3794,)

In [49]:
def forget_factor(time_constant, sr): 
    '''
    Alpha signifies the forget factor for parameter autonomation equations
    '''
    return np.exp(-1 / (time_constant * sr))

def rms_squared(audio_signal, time_constant, sr):
    alpha = forget_factor(time_constant, sr)
    rms = librosa.feature.rms(audio_signal, frame_length=1024, hop_length=512)
    rms_squared = np.zeros(rms.shape)
    for i in range(1, rms.shape[1]):
        rms_squared[0,i] = alpha * rms[0,i-1]**2 + (1-alpha)*np.absolute(audio_signal[i]**2)
    return np.squeeze(rms_squared, axis=0)

In [58]:
rms_sq = rms_squared(y, 200, sr)

In [50]:
librosa.feature.rms(y, frame_length=1024, hop_length=512).max()

0.09677552

In [8]:
on = librosa.onset.onset_detect(y, sr)

In [11]:
on.shape

(137,)

In [10]:
librosa.frames_to_time(on, sr=sr).shape

(137,)

In [54]:
def peak(audio_signal, time_constant, sr):
    onset_env = librosa.onset.onset_strength(y=audio_signal, sr=sr)
    x = np.mean(np.abs(librosa.core.stft(y, n_fft=1024, hop_length=512)), axis=0)
    alpha = forget_factor(time_constant, sr)
    peaks_squared = np.zeros(onset_env.shape)
    for i in range(1, len(peaks)):
        peak_factor = alpha * onset_env[i-1]**2 + (1 - alpha) * np.absolute(x[i]**2)
        peaks_squared[i] = max(x[i]**2, peak_factor)
    return peaks_squared

In [59]:
peaks = peak(y, 200, sr)

In [90]:
def crest_factor(rms, peaks): 
    crest_factor = np.zeros(rms.shape)
    crest_factor[0] = 0
    crest_factor[1:] = peaks[1:] / rms[1:]
    return np.sqrt(crest_factor)

In [91]:
c = crest_factor(rms_sq, peaks)

In [92]:
c.min(), c.max()

(0.0, 7265.276652077735)

In [16]:
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
peaks = librosa.util.peak_pick(onset_env, 3, 3, 3, 5, 0.5, 10)

In [17]:
onset_env.shape, peaks.shape, y.shape

((3794,), (119,), (1942186,))

In [54]:
onset_env[0:100]

array([0.        , 0.        , 0.        , 0.3112303 , 0.12708595,
       0.11057308, 0.11878169, 0.21374717, 0.07286657, 0.34539852,
       1.2407556 , 0.4264236 , 0.6987854 , 0.29487702, 0.06401458,
       0.09641525, 0.09450664, 0.09868456, 0.19187562, 0.16227448,
       0.21471027, 0.65236485, 0.67280674, 0.3411281 , 1.3900677 ,
       0.8845742 , 0.7931112 , 0.09421846, 0.13882941, 0.34839094,
       0.15218246, 0.21057165, 0.22302027, 0.48494613, 3.8679395 ,
       2.473797  , 0.10030654, 1.0575626 , 0.6013916 , 0.5892565 ,
       0.2505203 , 0.06827833, 0.14719911, 0.16026837, 0.10090128,
       0.13873114, 0.0743132 , 0.14091025, 0.14641246, 0.23194903,
       2.5353937 , 2.6091757 , 0.54646707, 0.4353831 , 0.6103169 ,
       0.50026417, 0.08913529, 0.81014276, 0.12816143, 0.17252366,
       0.20587367, 0.18358293, 0.42461815, 0.09630018, 0.5010067 ,
       0.1730793 , 0.1884503 , 0.28795668, 0.02362141, 0.51603144,
       0.04251099, 0.60725224, 0.02587058, 0.09988533, 0.83209

In [81]:
def half_wave_rectifier(x): return (x + np.absolute(x)) / 2

def spectral_flux(fft_signal):
    difference = np.zeros(fft_signal.shape)
    difference[:, 1:] = np.diff(np.absolute(fft_signal), axis=1)
    hwr = half_wave_rectifier(difference)
    spectral_flux = hwr / np.absolute(fft_signal)
    return np.sum(spectral_flux, axis=0)

In [65]:
fft = librosa.core.stft(y, n_fft=1024, hop_length=512)

In [89]:
spectral_flux(fft).max()

327.97626813599356