In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import os
import librosa as lb
import glob
from pprint import pprint as pp
import scipy
import matplotlib.pyplot as plt
# plt.rcParams["figure.figsize"] = (14,5)

from  more_itertools import unique_everseen
from collections import OrderedDict
# import soundfile as sf
# import difflib
import statistics
import itertools
from scipy.stats import skew
from scipy.stats import kurtosis

In [7]:
data ={}
name_list =[]
path = './songs/'

files = [f for f in glob.glob(path + "*.wav", recursive=True)]

for file in files:
    name = file.split('/')[-1].split('.')[0]
    y, sr = lb.load(file, sr=44100)
    data[name] = {'y' : y, 'sr': sr}

In [8]:
def get_features_mean(song, sr, hop_length, n_fft):
    # try:
    print('extracting features...')
    y_harmonic, y_percussive = lb.effects.hpss(song)  # split song into harmonic and percussive parts
    stft_harmonic = lb.core.stft(y_harmonic, n_fft=n_fft, hop_length=hop_length)  # Compute power spectrogram.
    stft_percussive = lb.core.stft(y_percussive, n_fft=n_fft, hop_length=hop_length)  # Compute power spectrogram.

    rmsH = np.sqrt(np.mean(np.abs(lb.feature.rms(S=stft_harmonic)) ** 2, axis=0, keepdims=True))
    rmsH_a = np.mean(rmsH)
    rmsH_std = np.std(rmsH)
    rmsH_skew = skew(np.mean(rmsH, axis=0))
    rmsH_kurtosis = kurtosis(np.mean(rmsH, axis=0), fisher=True, bias=True)

    rmsP = np.sqrt(np.mean(np.abs(lb.feature.rms(S=stft_percussive)) ** 2, axis=0, keepdims=True))
    rmsP_a = np.mean(rmsP)
    rmsP_std = np.std(rmsP)
    rmsP_skew = skew(np.mean(rmsP, axis=0))
    rmsP_kurtosis = kurtosis(np.mean(rmsP, axis=0), fisher=True, bias=True)

    centroid = lb.feature.spectral_centroid(song, sr, n_fft=n_fft,
                                            hop_length=hop_length)  # Compute the spectral centroid.
    centroid_a = np.mean(centroid)
    centroid_std = np.std(centroid)

    bw = lb.feature.spectral_bandwidth(song, sr, n_fft=n_fft,
                                       hop_length=hop_length)  # Compute p’th-order spectral bandwidth:
    bw_a = np.mean(bw)
    bw_std = np.std(bw)

    contrast = lb.feature.spectral_contrast(song, sr, n_fft=n_fft,
                                            hop_length=hop_length)  # Compute spectral contrast [R16]
    contrast_a = np.mean(contrast)
    contrast_std = np.std(contrast)

    polyfeat = lb.feature.poly_features(y_harmonic, sr, n_fft=n_fft,
                                        hop_length=hop_length)  # Get coefficients of fitting an nth-order polynomial to the columns of a spectrogram.
    polyfeat_a = np.mean(polyfeat[0])
    polyfeat_std = np.std(polyfeat[0])

    tonnetz = lb.feature.tonnetz(lb.effects.harmonic(y_harmonic),
                                 sr)  # Computes the tonal centroid features (tonnetz), following the method of [R17].
    tonnetz_a = np.mean(tonnetz)
    tonnetz_std = np.std(tonnetz)

    zcr = lb.feature.zero_crossing_rate(song, sr, hop_length=hop_length)  # zero crossing rate
    zcr_a = np.mean(zcr)
    zcr_std = np.std(zcr)

    onset_env = lb.onset.onset_strength(y_percussive, sr=sr)
    onset_a = np.mean(onset_env)
    onset_std = np.std(onset_env)

    D = lb.stft(song)
    times = lb.frames_to_time(np.arange(D.shape[1]))  # not returned, but could be if you want to plot things as a time series

    bpm, beats = lb.beat.beat_track(y=y_percussive, sr=sr, onset_envelope=onset_env, units='time')
    beats_a = np.mean(beats)
    beats_std = np.std(beats)

    features_dict = OrderedDict({'rmseP_a': rmsP_a, 'rmseP_std': rmsP_std, 'rmseH_a': rmsH_a, 'rmseH_std': rmsH_std,
                                 'centroid_a': centroid_a, 'centroid_std': centroid_std, 'bw_a': bw_a, 'bw_std': bw_std,
                                 'contrast_a': contrast_a, 'contrast_std': contrast_std, 'polyfeat_a': polyfeat_a,
                                 'polyfeat_std': polyfeat_std, 'tonnetz_a': tonnetz_a, 'tonnetz_std': tonnetz_std,
                                 'zcr_a': zcr_a, 'zcr_std': zcr_std, 'onset_a': onset_a, 'onset_std': onset_std,
                                 'bpm': bpm, 'rmseP_skew': rmsP_skew, 'rmseP_kurtosis': rmsP_kurtosis,
                                 'rmseH_skew': rmsH_skew, 'rmseH_kurtosis': rmsH_kurtosis, 'beats_a':beats_a,
                                 'beats_std':beats_std})

    # combine_features = {**features_dict, **bands_dict}
    print('features extracted successfully')
    return features_dict

In [9]:
data.keys()

dict_keys(['0_house', '0_punk', '0_classic', '0_hardstyle'])

In [10]:
res = get_features_mean(data['0_house']['y'], data['0_house']['sr'], hop_length=512, n_fft=2048)
print('finish')

extracting features...
features extracted successfully
finish


In [11]:
res

OrderedDict([('rmseP_a', 0.03493496345047862),
             ('rmseP_std', 0.028009404957561445),
             ('rmseH_a', 0.1302421034021926),
             ('rmseH_std', 0.0900995213616632),
             ('centroid_a', 2640.2656095645175),
             ('centroid_std', 1526.926171527276),
             ('bw_a', 3113.6905856423964),
             ('bw_std', 1365.5514240291257),
             ('contrast_a', 18.815819675422635),
             ('contrast_std', 11.103575918384696),
             ('polyfeat_a', -0.00019204193177112765),
             ('polyfeat_std', 9.556297070831763e-05),
             ('tonnetz_a', 0.022138128162436318),
             ('tonnetz_std', 0.12800464669845535),
             ('zcr_a', 0.04860909178911409),
             ('zcr_std', 0.03019266369063947),
             ('onset_a', 1.6842921),
             ('onset_std', 2.4423685),
             ('bpm', 123.046875),
             ('rmseP_skew', 1.4477788254163841),
             ('rmseP_kurtosis', 2.6896722608796226),
         