In [1]:
import librosa
import numpy as np
import pandas as pd
import os
from os import listdir
from os.path import isfile, join
from natsort import natsorted
from tqdm import tqdm

from scipy.stats import kurtosis, skew

In [2]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Static features extraction

In [3]:
def extract_static_feature(path_chorus, path_VA):

    feature_set = pd.DataFrame()
    count = 1 # counter to cicle in VA values
        
    # fo every .mp3 file get a set of features
    for mp3_file in tqdm(natsorted(os.listdir(path_chorus))):
        if mp3_file.endswith(".mp3"):
            
            feature = {}
            
            file_name = os.path.basename(mp3_file)
            id = file_name.split('.')[0] # music_ID
            
            VA_std = pd.read_csv(path_VA + '/static_annotations_std.csv', header = None)
            VA_mean = pd.read_csv(path_VA + '/static_annotations.csv', header = None)
            
            # there are some .mp3 files with no VA data, so exclude them
            if id != VA_std.iloc[count][0]:
                continue
            
            # save VA values for the mp3 file
            v_std = VA_std.iloc[count][2]
            a_std = VA_std.iloc[count][1]
            v_mean = VA_mean.iloc[count][2]
            a_mean = VA_mean.iloc[count][1]
            count += 1
            
            # load the mp3 file thorugh librosa and get the STFT
            y,sr = librosa.load(path_chorus + '/' + mp3_file)
            S = np.abs(librosa.stft(y))
            
            # extract librosa features
            tempo, beats = librosa.beat.beat_track(y=y, sr=sr)
            chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
            chroma_cq = librosa.feature.chroma_cqt(y=y, sr=sr)
            chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
            melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr)
            cent = librosa.feature.spectral_centroid(y=y, sr=sr)
            spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
            contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
            rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
            poly_features = librosa.feature.poly_features(S=S, sr=sr)
            tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
            zcr = librosa.feature.zero_crossing_rate(y)
            harmonic = librosa.effects.harmonic(y)
            percussive = librosa.effects.percussive(y)
            onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
            frames_to_time = librosa.frames_to_time(onset_frames[:20], sr=sr)
            
            # extract mfcc coefficients splitting in n_mfcc bands 
            n_mfcc = 12
            mfcc_ = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
            mfcc_delta = librosa.feature.delta(mfcc_)
            
            # save features in a DataFrame
            feature['music_ID'] = id
            feature['tempo'] = tempo
            feature['total_beats'] = sum(beats)
            feature['average_beats'] = np.average(beats)
            feature['chroma_stft_mean'] = np.mean(chroma_stft)
            feature['chroma_stft_std'] = np.std(chroma_stft)
            feature['chroma_stft_var'] = np.var(chroma_stft)
            #feature['chroma_stft_kurt'] = kurtosis(chroma_stft.reshape(-1,))
            #feature['chroma_stft_skew'] = skew(chroma_stft.reshape(-1,))
            feature['chroma_cq_mean'] = np.mean(chroma_cq)
            feature['chroma_cq_std'] = np.std(chroma_cq)
            feature['chroma_cq_var'] = np.var(chroma_cq)
            #feature['chroma_cq_kurt'] = kurtosis(chroma_cq.reshape(-1,))
            #feature['chroma_cq_skew'] = skew(chroma_cq.reshape(-1,))
            feature['chroma_cens_mean'] = np.mean(chroma_cens)
            feature['chroma_cens_std'] = np.std(chroma_cens)
            feature['chroma_cens_var'] = np.var(chroma_cens)
            #feature['chroma_cens_kurt'] = kurtosis(chroma_cens.reshape(-1,))
            #feature['chroma_cens_skew'] = skew(chroma_cens.reshape(-1,))
            feature['melspectrogram_mean'] = np.mean(melspectrogram)
            feature['melspectrogram_std'] = np.std(melspectrogram)
            feature['melspectrogram_var'] = np.var(melspectrogram)
            #feature['melspectrogram_kurt'] = kurtosis(melspectrogram.reshape(-1,))
            #feature['melspectrogram_skew'] = skew(melspectrogram.reshape(-1,))
            feature['cent_mean'] = np.mean(cent)
            feature['cent_std'] = np.std(cent)
            feature['cent_var'] = np.var(cent)
            #feature['cent_kurt'] = kurtosis(cent.reshape(-1,))
            #feature['cent_skew'] = skew(cent.reshape(-1,))
            feature['spec_bw_mean'] = np.mean(spec_bw)
            feature['spec_bw_std'] = np.std(spec_bw)
            feature['spec_bw_var'] = np.var(spec_bw)
            #feature['spec_bw_kurt'] = kurtosis(spec_bw.reshape(-1,))
            #feature['spec_bw_skew'] = skew(spec_bw.reshape(-1,))            
            feature['contrast_mean'] = np.mean(contrast)
            feature['contrast_std'] = np.std(contrast)
            feature['contrast_var'] = np.var(contrast)
            #feature['contrast_kurt'] = kurtosis(contrast.reshape(-1,))
            #feature['contrast_skew'] = skew(contrast.reshape(-1,))
            feature['rolloff_mean'] = np.mean(rolloff)
            feature['rolloff_std'] = np.std(rolloff)
            feature['rolloff_var'] = np.var(rolloff)
            #feature['rolloff_kurt'] = kurtosis(rolloff.reshape(-1,))
            #feature['rolloff_skew'] = skew(rolloff.reshape(-1,))
            feature['poly_mean'] = np.mean(poly_features)
            feature['poly_std'] = np.std(poly_features)
            feature['poly_var'] = np.var(poly_features)
            #feature['poly_kurt'] = kurtosis(poly_features.reshape(-1,))
            #feature['poly_skew'] = skew(poly_features.reshape(-1,))
            feature['tonnetz_mean'] = np.mean(tonnetz)
            feature['tonnetz_std'] = np.std(tonnetz)
            feature['tonnetz_var'] = np.var(tonnetz)
            #feature['tonnetz_kurt'] = kurtosis(tonnetz.reshape(-1,))
            #feature['tonnetz_skew'] = skew(tonnetz.reshape(-1,))
            feature['zcr_mean'] = np.mean(zcr)
            feature['zcr_std'] = np.std(zcr)
            feature['zcr_var'] = np.var(zcr)
            #feature['zcr_kurt'] = kurtosis(zcr.reshape(-1,))
            #feature['zcr_skew'] = skew(zcr.reshape(-1,))
            feature['harm_mean'] = np.mean(harmonic)
            feature['harm_std'] = np.std(harmonic)
            feature['harm_var'] = np.var(harmonic)
            #feature['harm_kurt'] = kurtosis(harmonic.reshape(-1,))
            #feature['harm_skew'] = skew(harmonic.reshape(-1,))
            feature['perc_mean'] = np.mean(percussive)
            feature['perc_std'] = np.std(percussive)
            feature['perc_var'] = np.var(percussive)
            #feature['perc_kurt'] = kurtosis(percussive.reshape(-1,))
            #feature['perc_skew'] = skew(percussive.reshape(-1,))
            feature['frame_mean'] = np.mean(frames_to_time)
            feature['frame_std'] = np.std(frames_to_time)
            feature['frame_var'] = np.var(frames_to_time)
            #feature['frame_kurt'] = kurtosis(frames_to_time.reshape(-1,))
            #feature['frame_skew'] = skew(frames_to_time.reshape(-1,))
            
            for i in range(n_mfcc):
                mfccCoef = mfcc_[i,:]
                feature[f'meanMFCC[{i}]'] = np.mean(mfccCoef)
                feature[f'stdMFCC[{i}]'] = np.std(mfccCoef)
                feature[f'medianMFCC[{i}]'] = np.median(mfccCoef)
                feature[f'kurtMFCC[{i}]'] = kurtosis(mfccCoef.reshape(-1,))
                feature[f'skewMFCC[{i}]'] = skew(mfccCoef.reshape(-1,))
                
            feature['mfcc_delta_mean'] = np.mean(mfcc_delta)
            feature['mfcc_delta_std'] = np.std(mfcc_delta)
            feature['mfcc_delta_var'] = np.var(mfcc_delta)
            
            feature_set = feature_set.append(pd.DataFrame(data=feature, index=[0]))
            
    return feature_set

In [4]:
#path_chorus = '/Users/gioelepozzi/Desktop/MasterThesis/code/features_extraction/data'
path_chorus = '/Users/gioelepozzi/Desktop/data/chorus'

path_VA = '/Users/gioelepozzi/Desktop/data/annotations'

static = extract_static_feature(path_chorus, path_VA)

100%|██████████| 794/794 [1:33:26<00:00,  7.06s/it]


In [5]:
# Converting Dataframe into CSV and JSON file

static.to_csv('static_features.csv', index=False)
#static.to_json('Emotion_features.json')

# Dynamic features extraction

In [3]:
def window_with_overlap(a, window, stride):
    nrows = ((a.size-window)//stride)+1
    n = a.strides[0]
    # create a view into the array a with the given shape and strides
    return np.lib.stride_tricks.as_strided(a, shape=(nrows,window), strides=(stride*n,n))

In [4]:
def extract_dynamic_feature(path_chorus, path_VA, window, stride):

    feature_set = pd.DataFrame()
    count = 1
        
    for mp3_file in tqdm(natsorted(os.listdir(path_chorus))):
        if mp3_file.endswith(".mp3"):
            
            file_name = os.path.basename(mp3_file)
            id = file_name.split('.')[0]
            
            VA_std = pd.read_csv(path_VA + '/static_annotations_std.csv', header = None)
            VA_mean = pd.read_csv(path_VA + '/static_annotations.csv', header = None)
            
            if id != VA_std.iloc[count][0]:
                continue
            
            v_std = VA_std.iloc[count][2]
            a_std = VA_std.iloc[count][1]
            v_mean = VA_mean.iloc[count][2]
            a_mean = VA_mean.iloc[count][1]
            count = count + 1
            
            y,sr = librosa.load(path_chorus + '/' + mp3_file)
            
            times = np.arange(len(y))/sr
            frames = window_with_overlap(y, window, stride)
            time_frames = window_with_overlap(times, window, stride)

            for fidx, frame in enumerate(frames):
                
                feature = {}
                
                frame_time = fidx*0.5+1
                times = time_frames[fidx]
                S = np.abs(librosa.stft(frame))
                                
                tempo, beats = librosa.beat.beat_track(y=frame, sr=sr)
                chroma_stft = librosa.feature.chroma_stft(y=frame, sr=sr)
                chroma_cq = librosa.feature.chroma_cqt(y=frame, sr=sr)
                chroma_cens = librosa.feature.chroma_cens(y=frame, sr=sr)
                melspectrogram = librosa.feature.melspectrogram(y=frame, sr=sr)
                cent = librosa.feature.spectral_centroid(y=frame, sr=sr)
                spec_bw = librosa.feature.spectral_bandwidth(y=frame, sr=sr)
                contrast = librosa.feature.spectral_contrast(S=S, sr=sr)
                rolloff = librosa.feature.spectral_rolloff(y=frame, sr=sr)
                poly_features = librosa.feature.poly_features(S=S, sr=sr)
                tonnetz = librosa.feature.tonnetz(y=frame, sr=sr)
                zcr = librosa.feature.zero_crossing_rate(frame)
                harmonic = librosa.effects.harmonic(frame)
                percussive = librosa.effects.percussive(frame)
                onset_frames = librosa.onset.onset_detect(y=frame, sr=sr)
                frames_to_time = librosa.frames_to_time(onset_frames[:20], sr=sr)

                n_mfcc = 12
                mfcc_ = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
                mfcc_delta = librosa.feature.delta(mfcc_)

                feature['music_ID'] = id
                feature['frame'] = frame_time
                feature['tempo'] = tempo
                feature['total_beats'] = sum(beats)
                feature['average_beats'] = np.average(beats)
                feature['chroma_stft_mean'] = np.mean(chroma_stft)
                feature['chroma_stft_std'] = np.std(chroma_stft)
                feature['chroma_stft_var'] = np.var(chroma_stft)
                #feature['chroma_stft_kurt'] = kurtosis(chroma_stft.reshape(-1,))
                #feature['chroma_stft_skew'] = skew(chroma_stft.reshape(-1,))
                feature['chroma_cq_mean'] = np.mean(chroma_cq)
                feature['chroma_cq_std'] = np.std(chroma_cq)
                feature['chroma_cq_var'] = np.var(chroma_cq)
                #feature['chroma_cq_kurt'] = kurtosis(chroma_cq.reshape(-1,))
                #feature['chroma_cq_skew'] = skew(chroma_cq.reshape(-1,))
                feature['chroma_cens_mean'] = np.mean(chroma_cens)
                feature['chroma_cens_std'] = np.std(chroma_cens)
                feature['chroma_cens_var'] = np.var(chroma_cens)
                #feature['chroma_cens_kurt'] = kurtosis(chroma_cens.reshape(-1,))
                #feature['chroma_cens_skew'] = skew(chroma_cens.reshape(-1,))
                feature['melspectrogram_mean'] = np.mean(melspectrogram)
                feature['melspectrogram_std'] = np.std(melspectrogram)
                feature['melspectrogram_var'] = np.var(melspectrogram)
                #feature['melspectrogram_kurt'] = kurtosis(melspectrogram.reshape(-1,))
                #feature['melspectrogram_skew'] = skew(melspectrogram.reshape(-1,))
                feature['cent_mean'] = np.mean(cent)
                feature['cent_std'] = np.std(cent)
                feature['cent_var'] = np.var(cent)
                #feature['cent_kurt'] = kurtosis(cent.reshape(-1,))
                #feature['cent_skew'] = skew(cent.reshape(-1,))
                feature['spec_bw_mean'] = np.mean(spec_bw)
                feature['spec_bw_std'] = np.std(spec_bw)
                feature['spec_bw_var'] = np.var(spec_bw)
                #feature['spec_bw_kurt'] = kurtosis(spec_bw.reshape(-1,))
                #feature['spec_bw_skew'] = skew(spec_bw.reshape(-1,))            
                feature['contrast_mean'] = np.mean(contrast)
                feature['contrast_std'] = np.std(contrast)
                feature['contrast_var'] = np.var(contrast)
                #feature['contrast_kurt'] = kurtosis(contrast.reshape(-1,))
                #feature['contrast_skew'] = skew(contrast.reshape(-1,))
                feature['rolloff_mean'] = np.mean(rolloff)
                feature['rolloff_std'] = np.std(rolloff)
                feature['rolloff_var'] = np.var(rolloff)
                #feature['rolloff_kurt'] = kurtosis(rolloff.reshape(-1,))
                #feature['rolloff_skew'] = skew(rolloff.reshape(-1,))
                feature['poly_mean'] = np.mean(poly_features)
                feature['poly_std'] = np.std(poly_features)
                feature['poly_var'] = np.var(poly_features)
                #feature['poly_kurt'] = kurtosis(poly_features.reshape(-1,))
                #feature['poly_skew'] = skew(poly_features.reshape(-1,))
                feature['tonnetz_mean'] = np.mean(tonnetz)
                feature['tonnetz_std'] = np.std(tonnetz)
                feature['tonnetz_var'] = np.var(tonnetz)
                #feature['tonnetz_kurt'] = kurtosis(tonnetz.reshape(-1,))
                #feature['tonnetz_skew'] = skew(tonnetz.reshape(-1,))
                feature['zcr_mean'] = np.mean(zcr)
                feature['zcr_std'] = np.std(zcr)
                feature['zcr_var'] = np.var(zcr)
                #feature['zcr_kurt'] = kurtosis(zcr.reshape(-1,))
                #feature['zcr_skew'] = skew(zcr.reshape(-1,))
                feature['harm_mean'] = np.mean(harmonic)
                feature['harm_std'] = np.std(harmonic)
                feature['harm_var'] = np.var(harmonic)
                #feature['harm_kurt'] = kurtosis(harmonic.reshape(-1,))
                #feature['harm_skew'] = skew(harmonic.reshape(-1,))
                feature['perc_mean'] = np.mean(percussive)
                feature['perc_std'] = np.std(percussive)
                feature['perc_var'] = np.var(percussive)
                #feature['perc_kurt'] = kurtosis(percussive.reshape(-1,))
                #feature['perc_skew'] = skew(percussive.reshape(-1,))
                feature['frame_mean'] = np.mean(frames_to_time)
                feature['frame_std'] = np.std(frames_to_time)
                feature['frame_var'] = np.var(frames_to_time)
                #feature['frame_kurt'] = kurtosis(frames_to_time.reshape(-1,))
                #feature['frame_skew'] = skew(frames_to_time.reshape(-1,))
                
                for i in range(n_mfcc):
                    mfccCoef = mfcc_[i,:]
                    feature[f'meanMFCC[{i}]'] = np.mean(mfccCoef)
                    feature[f'stdMFCC[{i}]'] = np.std(mfccCoef)
                    feature[f'medianMFCC[{i}]'] = np.median(mfccCoef)
                    feature[f'kurtMFCC[{i}]'] = kurtosis(mfccCoef.reshape(-1,))
                    feature[f'skewMFCC[{i}]'] = skew(mfccCoef.reshape(-1,))
                
                feature['mfcc_delta_mean'] = np.mean(mfcc_delta)
                feature['mfcc_delta_std'] = np.std(mfcc_delta)
                feature['mfcc_delta_var'] = np.var(mfcc_delta)
                
                feature_set = feature_set.append(pd.DataFrame(data=feature, index=[0]))
                
    return feature_set

In [5]:
#path_chorus = '/Users/gioelepozzi/Desktop/MasterThesis/code/features_extraction/data'
path_chorus = '/Users/gioelepozzi/Desktop/data/chorus'

path_VA = '/Users/gioelepozzi/Desktop/data/annotations'

dynamic = extract_dynamic_feature(path_chorus, path_VA, window=22050, stride=11025)

  avg = a.mean(axis)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  keepdims=keepdims)
  arrmean, rcount, out=arrmean, casting='unsafe', subok=False)
  ret = ret.dtype.type(ret / rcount)
  **kwargs)
100%|██████████| 794/794 [5:34:55<00:00, 25.31s/it]


In [6]:
# Converting Dataframe into CSV and JSON file

dynamic.to_csv('dynamic_features.csv', index=False)
#feature_set.to_json('dynamic_features.json')