In [6]:
import numpy as np

import librosa
import pywt
from scipy.stats import skew

In [8]:
# Utility functions
def stats(feature):
    return {
        'mean': np.mean(feature), 
        'median': np.median(feature), 
        'std': np.std(feature), 
        'var': np.var(feature)
    }

def extra_stats(feature):
    return {
        'sb_energy': np.mean(np.abs(feature)),
        'skewness': skew(feature)
    }

In [9]:
WAV_FILE = '/Users/macbookretina/blues.00042.wav'
MP3_FILE = '/Users/macbookretina/073192.mp3'

In [10]:
# get sample rate
sample_rate = librosa.core.get_samplerate(MP3_FILE)
# load audio file
time_series, _ = librosa.core.load(MP3_FILE, sample_rate)
sample_rate, time_series

FileNotFoundError: [Errno 2] No such file or directory: '/Users/macbookretina/073192.mp3'

In [11]:
# get duration of audio file in seconds
duration = librosa.core.get_duration(time_series, sample_rate)
duration

NameError: name 'time_series' is not defined

In [16]:
# compute spectral centroid
c = librosa.feature.spectral_centroid(time_series, sample_rate)
stats_spec_centroid = stats(spec_centroid)

stats_spec_centroid

{'mean': 2301.609667012493,
 'median': 2282.553550256012,
 'std': 452.9883111350955,
 'var': 205198.41002502607}

In [18]:
# compute spectral roll-off
spec_rolloff = librosa.feature.spectral_rolloff(time_series, sample_rate)
stats_spec_rolloff = stats(spec_rolloff)

stats_spec_rolloff

{'mean': 5187.578453541908,
 'median': 5189.501953125,
 'std': 750.5816483033326,
 'var': 563372.8107697476}

In [19]:
# compute zcr
zcr = librosa.feature.zero_crossing_rate(time_series)
stats_zcr = stats(zcr)

stats_zcr

{'mean': 0.09782391966357308,
 'median': 0.09619140625,
 'std': 0.042732002354603726,
 'var': 0.0018260240252338587}

In [21]:
# compute spectral bandwidth
spec_bw = librosa.feature.spectral_bandwidth(time_series, sample_rate)
stats_spec_bw = stats(spec_bw[0])

stats_spec_bw

{'mean': 2367.518780747192,
 'median': 2348.460727484699,
 'std': 217.51588196941861,
 'var': 47313.158908934056}

In [25]:
# compute spectral contrast
spec_contrast = librosa.feature.spectral_contrast(time_series, sample_rate)
stats_spec_contrast_1 = stats(spec_contrast[0])
stats_spec_contrast_2 = stats(spec_contrast[1])
stats_spec_contrast_3 = stats(spec_contrast[2])
stats_spec_contrast_4 = stats(spec_contrast[3])
stats_spec_contrast_5 = stats(spec_contrast[4])
stats_spec_contrast_6 = stats(spec_contrast[5])
stats_spec_contrast_7 = stats(spec_contrast[6])

stats_spec_contrast_1, stats_spec_contrast_2

({'mean': 22.57474197706149,
  'median': 22.733432733492467,
  'std': 4.055359631237688,
  'var': 16.44594173867228},
 {'mean': 17.494974326528283,
  'median': 17.106014296604254,
  'std': 4.771121676552711,
  'var': 22.763602052471146})

In [26]:
# compute mfcc 1 - 13
mfcc = librosa.feature.mfcc(time_series, sample_rate, n_mfcc=13)
stat_mfcc_1 = stats(mfcc[0])
stat_mfcc_2 = stats(mfcc[1])
stat_mfcc_3 = stats(mfcc[2])
stat_mfcc_4 = stats(mfcc[3])
stat_mfcc_5 = stats(mfcc[4])
stat_mfcc_6 = stats(mfcc[5])
stat_mfcc_7 = stats(mfcc[6])
stat_mfcc_8 = stats(mfcc[7])
stat_mfcc_9 = stats(mfcc[8])
stat_mfcc_10 = stats(mfcc[9])
stat_mfcc_11 = stats(mfcc[10])
stat_mfcc_12 = stats(mfcc[11])
stat_mfcc_13 = stats(mfcc[12])

stat_mfcc_1, stat_mfcc_2

({'mean': -159.55165,
  'median': -155.67007,
  'std': 49.407143,
  'var': 2441.0657},
 {'mean': 69.79762, 'median': 69.48889, 'std': 15.514424, 'var': 240.69736})

In [27]:
# computer 3rd order Linear Prediction Coefficients (LPC)
lpc = librosa.lpc(time_series, 3)
lpc

array([ 1.        , -1.4211732 ,  0.94541854, -0.47950742], dtype=float32)

In [28]:
# compute log-mel / constant-Q transform
cqt = librosa.cqt(time_series, sample_rate)
cqt.shape, cqt

((84, 1293), array([[ 0.05184316-3.17597742e-04j, -0.00135106-5.03471017e-02j,
         -0.04695281+4.15343480e-03j, ...,  0.08611077+7.04651426e-02j,
          0.07986752-9.41078889e-02j, -0.09459323-8.91543892e-02j],
        [ 0.03035726-5.02627023e-05j,  0.00905793-2.79752047e-02j,
         -0.02086024-1.36097574e-02j, ...,  0.09887046-4.33131787e-03j,
          0.02860533-1.08688594e-01j, -0.098653  -6.87599562e-02j],
        [ 0.15558674+2.67811374e-03j,  0.12240589-8.95206061e-02j,
          0.04038024-1.39511295e-01j, ...,  0.08168999-1.33454319e-01j,
         -0.04398992-1.58197861e-01j, -0.15295645-7.13537292e-02j],
        ...,
        [-0.00479006-8.85093963e-04j,  0.07690022+4.54300980e-02j,
         -0.03788237+1.03927417e-01j, ..., -0.00405467-7.87371230e-03j,
          0.0103419 -1.00843408e-02j, -0.02450254+1.78092560e-02j],
        [-0.03649381-1.29193823e-04j,  0.05282222+4.43045898e-02j,
         -0.05058878+4.56024022e-02j, ...,  0.00267781-3.96175992e-03j,
        

In [31]:
# compute mel-spectogram
mel_spect = librosa.feature.melspectrogram(time_series, sample_rate)
# scaled_mel_spect = librosa.power_to_db(mel_spect, ref=np.max) # convert spectogram to decibels unit
mel_spect.shape, mel_spect

((128, 2586),
 array([[1.8680854e-05, 6.2002707e-02, 1.1151104e+00, ..., 1.7733978e-01,
         2.8754392e-01, 1.6142677e-01],
        [6.5987210e-06, 2.5891724e-01, 2.1808436e+00, ..., 8.4348541e-01,
         4.4655958e-01, 2.6228645e-01],
        [3.2318534e-05, 4.6489990e-01, 2.0627596e+00, ..., 3.9542034e+00,
         3.7528203e+00, 5.1367295e-01],
        ...,
        [1.1018846e-11, 3.4859121e-10, 1.1011653e-09, ..., 5.1674506e-06,
         4.3752367e-05, 2.4380066e-05],
        [2.7074251e-11, 6.7583716e-10, 1.4731847e-09, ..., 5.1306511e-06,
         4.3441189e-05, 2.4208157e-05],
        [1.3928958e-11, 3.4815550e-10, 1.1026061e-09, ..., 5.1061379e-06,
         4.3238626e-05, 2.4084664e-05]], dtype=float32))

In [32]:
# compute tempo & beats
tempo, beats = librosa.beat.beat_track(time_series, sample_rate)
tempo

147.65625

In [34]:
stats_beats = stats(beats)
stats_beats

{'mean': 1246.0422535211267,
 'median': 1250.0,
 'std': 723.0792416906572,
 'var': 522843.58976393583}

In [36]:
# compute timestamps from beats
beat_timestamps = librosa.frames_to_time(beats, sample_rate)
stats_beat_timestamps = stats(beat_timestamps)
stats_beat_timestamps

{'mean': 14.466522308453893,
 'median': 14.512471655328799,
 'std': 8.394933599673843,
 'var': 70.47491014293281}

In [46]:
# compute coefficients for Db4 at level 4 decomposition
db4_coeffs = pywt.wavedec(time_series, 'db4', level=4)
cA4, cD4, cD3, cD2, cD1 = db4_coeffs
stats_cA4 = {**stats(cA4), **extra_stats(cA4)}
stats_cD4 = {**stats(cD4), **extra_stats(cD4)}
stats_cD3 = {**stats(cD3), **extra_stats(cD3)}
stats_cD2 = {**stats(cD2), **extra_stats(cD2)}
stats_cD1 = {**stats(cD1), **extra_stats(cD1)}
stats_cA4, stats_cD4, stats_cD3

({'mean': -1.5523201e-06,
  'median': 0.0019135531,
  'std': 0.44200575,
  'var': 0.19536908,
  'sb_energy': 0.3519586,
  'skewness': -0.014327818527817726},
 {'mean': 0.00079981604,
  'median': 0.0020318378,
  'std': 0.18456157,
  'var': 0.03406297,
  'sb_energy': 0.14433607,
  'skewness': -0.05667450278997421},
 {'mean': 2.7548062e-05,
  'median': -0.00031280774,
  'std': 0.07951908,
  'var': 0.006323284,
  'sb_energy': 0.061881058,
  'skewness': 0.0289473794400692})

In [47]:
# compute coefficients for Db5 at level 4 decomposition
db5_coeffs = pywt.wavedec(time_series, 'db5', level=4)
cA4, cD4, cD3, cD2, cD1 = db5_coeffs
stats_cA4 = {**stats(cA4), **extra_stats(cA4)}
stats_cD4 = {**stats(cD4), **extra_stats(cD4)}
stats_cD3 = {**stats(cD3), **extra_stats(cD3)}
stats_cD2 = {**stats(cD2), **extra_stats(cD2)}
stats_cD1 = {**stats(cD1), **extra_stats(cD1)}
stats_cA4, stats_cD4, stats_cD3

({'mean': -1.5523243e-06,
  'median': 0.0017831312,
  'std': 0.44204047,
  'var': 0.19539979,
  'sb_energy': 0.35210282,
  'skewness': -0.01582862064242363},
 {'mean': -0.00032373413,
  'median': 0.0004893675,
  'std': 0.18565981,
  'var': 0.034469567,
  'sb_energy': 0.14545022,
  'skewness': -0.015256845392286777},
 {'mean': -7.136517e-05,
  'median': -0.00021495076,
  'std': 0.07873312,
  'var': 0.0061989035,
  'sb_energy': 0.061209664,
  'skewness': 4.694569724961184e-05})

In [48]:
# compute coefficients for Db8 at level 7 decomposition
db8_coeffs = pywt.wavedec(time_series, 'db4', level=7)
cA7, cD7, cD6, cD5, cD4, cD3, cD2, cD1 = db8_coeffs
stats_cA7 = {**stats(cA7), **extra_stats(cA7)}
stats_cD7 = {**stats(cD7), **extra_stats(cD7)}
stats_cD6 = {**stats(cD6), **extra_stats(cD6)}
stats_cD5 = {**stats(cD5), **extra_stats(cD5)}
stats_cD4 = {**stats(cD4), **extra_stats(cD4)}
stats_cD3 = {**stats(cD3), **extra_stats(cD3)}
stats_cD2 = {**stats(cD2), **extra_stats(cD2)}
stats_cD1 = {**stats(cD1), **extra_stats(cD1)}
stats_cA7, stats_cD7, stats_cD6

({'mean': -4.38949e-06,
  'median': 0.009905862,
  'std': 0.6425138,
  'var': 0.41282403,
  'sb_energy': 0.5101531,
  'skewness': -0.04386148229241371},
 {'mean': -0.003294279,
  'median': 0.005104491,
  'std': 0.69103664,
  'var': 0.4775316,
  'sb_energy': 0.53049105,
  'skewness': -0.13731873035430908},
 {'mean': 0.0009034596,
  'median': 0.0019393812,
  'std': 0.43469727,
  'var': 0.18896171,
  'sb_energy': 0.33991796,
  'skewness': 0.04403303563594818})