# PMEmo Dataset - Preprocessing

# Done on local computer

## Import relevant libraries

In [1]:
import numpy as np
import ast
import pandas as pd
import matplotlib.pyplot as plt

import os

Set file directory variables

In [2]:
import sys
sys.path.insert(1, '../../utils')
from paths import *

## Verify file count

Check that the number of .csv files in the './data/PMEmo/PMEmo2019/features' directory matches the number number of .mp3 files in the './data/PMEmo/PMEmo2019/chorus' directory

In [3]:
def extract_filenames(directory, file_format):
    raw_file_list = os.listdir(directory)
    raw_count = len(raw_file_list)
    print(f"Total files: {raw_count} \nFiles: {raw_file_list}\n")
    print("Processing files...")
    processed_list = []
    for filename in raw_file_list:
        if file_format in filename:
            filename = filename.replace(f".{file_format}", "")
            processed_list.append(filename)
    
    processed_count = len(processed_list)
    print(f"Total files: {processed_count} \nFiles: {processed_list}")
    return processed_list, processed_count

In [4]:
features_list, features_count = extract_filenames(get_pmemo_path('chorus'), "csv")

Total files: 794 
Files: ['752.mp3', '746.mp3', '791.mp3', '550.mp3', '236.mp3', '222.mp3', '544.mp3', '578.mp3', '587.mp3', '424.mp3', '342.mp3', '356.mp3', '418.mp3', '381.mp3', '395.mp3', '140.mp3', '626.mp3', '632.mp3', '96.mp3', '168.mp3', '829.mp3', '69.mp3', '197.mp3', '801.mp3', '815.mp3', '55.mp3', '7.mp3', '54.mp3', '814.mp3', '6.mp3', '40.mp3', '68.mp3', '828.mp3', '196.mp3', '97.mp3', '169.mp3', '83.mp3', '141.mp3', '627.mp3', '394.mp3', '419.mp3', '431.mp3', '425.mp3', '343.mp3', '592.mp3', '579.mp3', '551.mp3', '237.mp3', '974.mp3', '784.mp3', '948.mp3', '790.mp3', '747.mp3', '753.mp3', '745.mp3', '989.mp3', '751.mp3', '779.mp3', '786.mp3', '792.mp3', '962.mp3', '221.mp3', '235.mp3', '209.mp3', '584.mp3', '590.mp3', '433.mp3', '355.mp3', '341.mp3', '427.mp3', '396.mp3', '382.mp3', '157.mp3', '631.mp3', '625.mp3', '143.mp3', '95.mp3', '81.mp3', '180.mp3', '4.mp3', '816.mp3', '56.mp3', '42.mp3', '43.mp3', '803.mp3', '5.mp3', '57.mp3', '181.mp3', '80.mp3', '94.mp3', '624.mp3

## Process the annotation dataframes

Read static song-level annotations with song_id from 1 to 2000 (2013 and 2014 only)

In [5]:
df_annotations = pd.read_csv(get_pmemo_path('annotations/static_annotations.csv'))
display(df_annotations)
print(df_annotations.shape)

Unnamed: 0,musicId,Arousal(mean),Valence(mean)
0,1,0.4000,0.5750
1,4,0.2625,0.2875
2,5,0.1500,0.2000
3,6,0.5125,0.3500
4,7,0.7000,0.7250
...,...,...,...
762,993,0.8625,0.7625
763,996,0.8750,0.5625
764,997,0.7125,0.6625
765,999,0.8750,0.7750


(767, 3)


In [6]:
df_annotations = df_annotations.rename(columns={'musicId': 'song_id'})
df_annotations

Unnamed: 0,song_id,Arousal(mean),Valence(mean)
0,1,0.4000,0.5750
1,4,0.2625,0.2875
2,5,0.1500,0.2000
3,6,0.5125,0.3500
4,7,0.7000,0.7250
...,...,...,...
762,993,0.8625,0.7625
763,996,0.8750,0.5625
764,997,0.7125,0.6625
765,999,0.8750,0.7750


In [7]:
df_annotations.describe()

Unnamed: 0,song_id,Arousal(mean),Valence(mean)
count,767.0,767.0,767.0
mean,493.770535,0.622355,0.596581
std,285.958166,0.184812,0.161986
min,1.0,0.0875,0.125
25%,238.5,0.5,0.4875
50%,495.0,0.65,0.625
75%,739.5,0.7625,0.725
max,1000.0,0.975,0.9125


Map the valence and arousal values in the dataset, ranging from 1 to 9, to values ranging from -1 to 1, to follow convention

In [8]:
def map_va_value(value):
  old_min = 0
  old_max = 1

  new_min = -1
  new_max = 1

  mapped_value = ((value - old_min) * (new_max - new_min) / (old_max - old_min)) + new_min
  return mapped_value

# Test the function
for i in range(1, 10):
    print(f"Original value: {i}, Mapped value: {map_va_value(i)}")

Original value: 1, Mapped value: 1.0
Original value: 2, Mapped value: 3.0
Original value: 3, Mapped value: 5.0
Original value: 4, Mapped value: 7.0
Original value: 5, Mapped value: 9.0
Original value: 6, Mapped value: 11.0
Original value: 7, Mapped value: 13.0
Original value: 8, Mapped value: 15.0
Original value: 9, Mapped value: 17.0


In [9]:
df_annotations['valence_mean_mapped'] = df_annotations['Valence(mean)'].apply(map_va_value)
df_annotations['arousal_mean_mapped'] = df_annotations['Arousal(mean)'].apply(map_va_value)
df_annotations = df_annotations.drop(['Valence(mean)', 'Arousal(mean)',], axis=1)

In [10]:
df_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,1,0.150,-0.200
1,4,-0.425,-0.475
2,5,-0.600,-0.700
3,6,-0.300,0.025
4,7,0.450,0.400
...,...,...,...
762,993,0.525,0.725
763,996,0.125,0.750
764,997,0.325,0.425
765,999,0.550,0.750


Export the dataframe

In [11]:
df_annotations.to_csv(get_pmemo_path('processed/annotations/pmemo_static_annotations.csv'), index=False)

Import the dataframe

In [12]:
df_annotations = pd.read_csv(get_pmemo_path('processed/annotations/pmemo_static_annotations.csv'))
df_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,1,0.150,-0.200
1,4,-0.425,-0.475
2,5,-0.600,-0.700
3,6,-0.300,0.025
4,7,0.450,0.400
...,...,...,...
762,993,0.525,0.725
763,996,0.125,0.750
764,997,0.325,0.425
765,999,0.550,0.750


## Process the Essentia features datasets

Define function to check for any non-float/int columns and deal with them

In [13]:
df_essentia_features = pd.read_csv(get_pmemo_path('processed/features/essentia_features.csv'))
df_essentia_features

Unnamed: 0.1,Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.dmean,lowlevel.barkbands_crest.dmean2,lowlevel.barkbands_crest.dvar,lowlevel.barkbands_crest.dvar2,lowlevel.barkbands_crest.max,lowlevel.barkbands_crest.mean,lowlevel.barkbands_crest.median,...,metadata.version.essentia_git_sha,metadata.version.extractor,tonal.chords_key,tonal.chords_scale,tonal.key_edma.key,tonal.key_edma.scale,tonal.key_krumhansl.key,tonal.key_krumhansl.scale,tonal.key_temperley.key,tonal.key_temperley.scale
0,0,1,0.970365,2.386482,3.594657,6.740679,14.444414,25.484644,11.411017,10.707484,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,G,major,G,major,G,major,G,major
1,1,4,0.938222,2.208391,3.674144,5.080004,13.383171,24.118259,14.166083,14.635095,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,F#,major,Bb,major,Bb,major,Bb,major
2,2,5,0.930816,2.393671,3.616534,9.753744,20.003422,26.715527,18.050819,18.184444,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,A,minor,D,minor,D,minor,D,minor
3,3,6,0.954429,1.709188,2.437101,4.900128,8.548371,24.963417,17.043421,18.244125,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,E,minor,C,major,C,major,C,major
4,4,7,0.970629,3.049778,4.716875,10.767557,23.465727,25.671312,10.193353,8.912777,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,G,major,G,major,G,major,G,major
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,789,993,0.900841,2.860650,4.524299,7.774405,19.612854,24.813204,11.216174,10.689340,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,F,minor,Ab,major,Ab,major,Ab,major
790,790,996,0.923707,2.780330,4.445892,9.029428,20.219875,25.388409,10.426634,8.341273,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,C#,minor,C#,minor,C#,minor,C#,minor
791,791,997,0.913454,2.712144,4.339530,7.626777,20.528904,25.967821,11.849738,11.506009,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,Eb,major,Ab,major,Ab,major,Ab,major
792,792,999,0.955402,2.754516,3.943231,5.839927,11.535942,25.688959,11.403626,9.805414,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,B,minor,B,minor,B,minor,B,minor


Drop the 'Unnamed: 0	' column

In [14]:
# drop Unnamed:0 column
df_essentia_features = df_essentia_features[df_essentia_features.columns[1:]]
df_essentia_features

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.dmean,lowlevel.barkbands_crest.dmean2,lowlevel.barkbands_crest.dvar,lowlevel.barkbands_crest.dvar2,lowlevel.barkbands_crest.max,lowlevel.barkbands_crest.mean,lowlevel.barkbands_crest.median,lowlevel.barkbands_crest.min,...,metadata.version.essentia_git_sha,metadata.version.extractor,tonal.chords_key,tonal.chords_scale,tonal.key_edma.key,tonal.key_edma.scale,tonal.key_krumhansl.key,tonal.key_krumhansl.scale,tonal.key_temperley.key,tonal.key_temperley.scale
0,1,0.970365,2.386482,3.594657,6.740679,14.444414,25.484644,11.411017,10.707484,2.498400,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,G,major,G,major,G,major,G,major
1,4,0.938222,2.208391,3.674144,5.080004,13.383171,24.118259,14.166083,14.635095,4.408802,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,F#,major,Bb,major,Bb,major,Bb,major
2,5,0.930816,2.393671,3.616534,9.753744,20.003422,26.715527,18.050819,18.184444,2.864332,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,A,minor,D,minor,D,minor,D,minor
3,6,0.954429,1.709188,2.437101,4.900128,8.548371,24.963417,17.043421,18.244125,2.253785,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,E,minor,C,major,C,major,C,major
4,7,0.970629,3.049778,4.716875,10.767557,23.465727,25.671312,10.193353,8.912777,2.406512,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,G,major,G,major,G,major,G,major
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,0.900841,2.860650,4.524299,7.774405,19.612854,24.813204,11.216174,10.689340,2.157647,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,F,minor,Ab,major,Ab,major,Ab,major
790,996,0.923707,2.780330,4.445892,9.029428,20.219875,25.388409,10.426634,8.341273,2.059642,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,C#,minor,C#,minor,C#,minor,C#,minor
791,997,0.913454,2.712144,4.339530,7.626777,20.528904,25.967821,11.849738,11.506009,2.347258,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,Eb,major,Ab,major,Ab,major,Ab,major
792,999,0.955402,2.754516,3.943231,5.839927,11.535942,25.688959,11.403626,9.805414,2.655332,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,B,minor,B,minor,B,minor,B,minor


See what features are available

In [15]:
print(df_essentia_features.columns.to_list())

['song_id', 'lowlevel.average_loudness', 'lowlevel.barkbands_crest.dmean', 'lowlevel.barkbands_crest.dmean2', 'lowlevel.barkbands_crest.dvar', 'lowlevel.barkbands_crest.dvar2', 'lowlevel.barkbands_crest.max', 'lowlevel.barkbands_crest.mean', 'lowlevel.barkbands_crest.median', 'lowlevel.barkbands_crest.min', 'lowlevel.barkbands_crest.stdev', 'lowlevel.barkbands_crest.var', 'lowlevel.barkbands_flatness_db.dmean', 'lowlevel.barkbands_flatness_db.dmean2', 'lowlevel.barkbands_flatness_db.dvar', 'lowlevel.barkbands_flatness_db.dvar2', 'lowlevel.barkbands_flatness_db.max', 'lowlevel.barkbands_flatness_db.mean', 'lowlevel.barkbands_flatness_db.median', 'lowlevel.barkbands_flatness_db.min', 'lowlevel.barkbands_flatness_db.stdev', 'lowlevel.barkbands_flatness_db.var', 'lowlevel.barkbands_kurtosis.dmean', 'lowlevel.barkbands_kurtosis.dmean2', 'lowlevel.barkbands_kurtosis.dvar', 'lowlevel.barkbands_kurtosis.dvar2', 'lowlevel.barkbands_kurtosis.max', 'lowlevel.barkbands_kurtosis.mean', 'lowlevel.ba

Get song_ids

In [16]:
song_ids = df_annotations['song_id'].values.tolist()
print(song_ids)

[1, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 54, 55, 56, 57, 58, 59, 60, 61, 63, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 78, 79, 80, 81, 83, 84, 86, 87, 88, 89, 90, 91, 92, 93, 94, 96, 97, 98, 99, 100, 102, 103, 104, 105, 106, 107, 108, 109, 113, 114, 115, 116, 117, 119, 120, 123, 124, 126, 127, 128, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 156, 157, 158, 159, 160, 161, 162, 165, 166, 167, 168, 169, 170, 171, 172, 174, 175, 176, 177, 179, 180, 181, 184, 185, 186, 187, 188, 189, 191, 192, 196, 197, 198, 199, 200, 201, 202, 203, 204, 207, 209, 210, 211, 212, 213, 215, 216, 217, 219, 220, 221, 222, 224, 225, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 241, 242, 244, 249, 251, 252, 253, 257, 258, 260, 263, 265, 267, 269, 273, 275, 276, 277, 279, 280, 282, 283, 284, 285, 287,

Different number of annotations and features are available. Let's drop some rows

In [17]:
df_essentia_features = df_essentia_features[df_essentia_features['song_id'].isin(song_ids)]
df_essentia_features

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.dmean,lowlevel.barkbands_crest.dmean2,lowlevel.barkbands_crest.dvar,lowlevel.barkbands_crest.dvar2,lowlevel.barkbands_crest.max,lowlevel.barkbands_crest.mean,lowlevel.barkbands_crest.median,lowlevel.barkbands_crest.min,...,metadata.version.essentia_git_sha,metadata.version.extractor,tonal.chords_key,tonal.chords_scale,tonal.key_edma.key,tonal.key_edma.scale,tonal.key_krumhansl.key,tonal.key_krumhansl.scale,tonal.key_temperley.key,tonal.key_temperley.scale
0,1,0.970365,2.386482,3.594657,6.740679,14.444414,25.484644,11.411017,10.707484,2.498400,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,G,major,G,major,G,major,G,major
1,4,0.938222,2.208391,3.674144,5.080004,13.383171,24.118259,14.166083,14.635095,4.408802,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,F#,major,Bb,major,Bb,major,Bb,major
2,5,0.930816,2.393671,3.616534,9.753744,20.003422,26.715527,18.050819,18.184444,2.864332,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,A,minor,D,minor,D,minor,D,minor
3,6,0.954429,1.709188,2.437101,4.900128,8.548371,24.963417,17.043421,18.244125,2.253785,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,E,minor,C,major,C,major,C,major
4,7,0.970629,3.049778,4.716875,10.767557,23.465727,25.671312,10.193353,8.912777,2.406512,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,G,major,G,major,G,major,G,major
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,0.900841,2.860650,4.524299,7.774405,19.612854,24.813204,11.216174,10.689340,2.157647,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,F,minor,Ab,major,Ab,major,Ab,major
790,996,0.923707,2.780330,4.445892,9.029428,20.219875,25.388409,10.426634,8.341273,2.059642,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,C#,minor,C#,minor,C#,minor,C#,minor
791,997,0.913454,2.712144,4.339530,7.626777,20.528904,25.967821,11.849738,11.506009,2.347258,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,Eb,major,Ab,major,Ab,major,Ab,major
792,999,0.955402,2.754516,3.943231,5.839927,11.535942,25.688959,11.403626,9.805414,2.655332,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,B,minor,B,minor,B,minor,B,minor


Filter out only the mean features

In [18]:
def get_mean_features(df):
  col_names = df.columns.to_list()
  feature_mean_cols = [col for col in col_names if 'dmean' not in col and 'dmean2' not in col and 'dvar' not in col and 'dvar2' not in col and 'max' not in col and 'median' not in col and 'min' not in col and 'stdev' not in col and 'var' not in col and 'cov' not in col and 'icov' not in col]
  return df[feature_mean_cols]

In [19]:
df_essentia_features = get_mean_features(df_essentia_features)
df_essentia_features

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,metadata.version.essentia_git_sha,metadata.version.extractor,tonal.chords_key,tonal.chords_scale,tonal.key_edma.key,tonal.key_edma.scale,tonal.key_krumhansl.key,tonal.key_krumhansl.scale,tonal.key_temperley.key,tonal.key_temperley.scale
0,1,0.970365,11.411017,0.161770,6.455077,1.609292,30.026752,0.459247,3.059849,9.847343,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,G,major,G,major,G,major,G,major
1,4,0.938222,14.166083,0.301084,37.839718,4.362428,8.677779,0.437324,2.800550,11.660427,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,F#,major,Bb,major,Bb,major,Bb,major
2,5,0.930816,18.050819,0.278281,61.556568,5.047149,13.578498,0.447465,3.557904,9.755386,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,A,minor,D,minor,D,minor,D,minor
3,6,0.954429,17.043421,0.222608,15.175429,3.000883,20.610300,0.457072,4.498772,10.106106,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,E,minor,C,major,C,major,C,major
4,7,0.970629,10.193353,0.128590,5.166962,1.568603,23.933802,0.453411,2.241942,9.153606,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,G,major,G,major,G,major,G,major
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,0.900841,11.216174,0.141947,9.660921,1.687560,23.742342,0.455372,3.381016,10.037439,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,F,minor,Ab,major,Ab,major,Ab,major
790,996,0.923707,10.426634,0.114360,11.828347,1.801686,29.687521,0.471651,3.373200,8.276285,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,C#,minor,C#,minor,C#,minor,C#,minor
791,997,0.913454,11.849738,0.114356,2.702694,1.236795,39.045204,0.469326,6.539768,8.761862,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,Eb,major,Ab,major,Ab,major,Ab,major
792,999,0.955402,11.403626,0.107581,2.736115,1.312780,44.180817,0.477721,9.656989,7.768255,...,v2.1_beta5-1110-g77a6a954-dirty,music 2.0,B,minor,B,minor,B,minor,B,minor


Some features are irrelevant, or are metadata. Drop them

In [20]:
metadata_columns = [col for col in df_essentia_features.columns if 'metadata' in col]
df_essentia_features = df_essentia_features.drop(columns=metadata_columns)
df_essentia_features

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,tonal.chords_histogram,tonal.thpcp,tonal.chords_key,tonal.chords_scale,tonal.key_edma.key,tonal.key_edma.scale,tonal.key_krumhansl.key,tonal.key_krumhansl.scale,tonal.key_temperley.key,tonal.key_temperley.scale
0,1,0.970365,11.411017,0.161770,6.455077,1.609292,30.026752,0.459247,3.059849,9.847343,...,[32.272068 13.748191 8.393633 0. ...,[1. 0.43556923 0.25200686 0.19656383 0...,G,major,G,major,G,major,G,major
1,4,0.938222,14.166083,0.301084,37.839718,4.362428,8.677779,0.437324,2.800550,11.660427,...,[ 5.4545455 0. 0. 3.471074...,[1. 0.7764151 0.30383804 0.39174917 0...,F#,major,Bb,major,Bb,major,Bb,major
2,5,0.930816,18.050819,0.278281,61.556568,5.047149,13.578498,0.447465,3.557904,9.755386,...,[ 6.5709553 6.0739923 75.98012 1.711761...,[1. 0.39827436 0.12710713 0.06779694 0...,A,minor,D,minor,D,minor,D,minor
3,6,0.954429,17.043421,0.222608,15.175429,3.000883,20.610300,0.457072,4.498772,10.106106,...,[ 0. 72.64151 0. 0. ...,[1. 0.55658203 0.13316369 0.13195944 0...,E,minor,C,major,C,major,C,major
4,7,0.970629,10.193353,0.128590,5.166962,1.568603,23.933802,0.453411,2.241942,9.153606,...,[49.760765 5.582137 17.22488 0. ...,[1. 0.9409427 0.34792826 0.22837171 0...,G,major,G,major,G,major,G,major
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,0.900841,11.216174,0.141947,9.660921,1.687560,23.742342,0.455372,3.381016,10.037439,...,[25.365854 0.40650406 2.113821 0. ...,[1. 0.53521824 0.28652653 0.24161763 0...,F,minor,Ab,major,Ab,major,Ab,major
790,996,0.923707,10.426634,0.114360,11.828347,1.801686,29.687521,0.471651,3.373200,8.276285,...,[49.304913 3.429101 3.892493 0. ...,[1. 0.6449748 0.6369306 0.527618 0...,C#,minor,C#,minor,C#,minor,C#,minor
791,997,0.913454,11.849738,0.114356,2.702694,1.236795,39.045204,0.469326,6.539768,8.761862,...,[14.708234 10.231814 38.289368 1.918465...,[1. 0.4878843 0.25034007 0.41165787 0...,Eb,major,Ab,major,Ab,major,Ab,major
792,999,0.955402,11.403626,0.107581,2.736115,1.312780,44.180817,0.477721,9.656989,7.768255,...,[62.23278 10.332541 1.7814727 4.275534...,[1. 0.4901825 0.24507648 0.21346456 0...,B,minor,B,minor,B,minor,B,minor


Find out if any columns are not of the type float or int

In [21]:
pd.set_option('display.max_columns', None)
df_essentia_features.select_dtypes(exclude=['int64', 'float64'])

Unnamed: 0,lowlevel.barkbands.mean,lowlevel.erbbands.mean,lowlevel.gfcc.mean,lowlevel.melbands.mean,lowlevel.melbands128.mean,lowlevel.mfcc.mean,lowlevel.spectral_contrast_coeffs.mean,lowlevel.spectral_contrast_valleys.mean,rhythm.beats_loudness_band_ratio.mean,tonal.hpcp.mean,rhythm.beats_position,rhythm.bpm_histogram,tonal.chords_histogram,tonal.thpcp,tonal.chords_key,tonal.chords_scale,tonal.key_edma.key,tonal.key_edma.scale,tonal.key_krumhansl.key,tonal.key_krumhansl.scale,tonal.key_temperley.key,tonal.key_temperley.scale
0,[3.0178686e-03 1.1141291e-02 8.9332712e-04 7.7...,[6.0949785e-01 1.3509623e+00 1.1622906e+00 2.0...,[ -18.483932 65.78962 -126.755196 26.2...,[4.2022024e-03 9.2927861e-04 3.6628402e-04 2.2...,[2.79276259e-03 6.20536832e-03 4.12436435e-03 ...,[-6.56206970e+02 9.60831909e+01 1.94060767e+...,[-0.72531193 -0.71691185 -0.7339848 -0.765902...,[ -7.6060724 -7.151893 -7.7001133 -8.01088...,[0.4145438 0.05675205 0.16019633 0.12468063 0...,[0.08938619 0.11562601 0.0685273 0.02887096 0...,[ 0.4643991 0.95201814 1.4164172 1.880816...,[0. 0. 0. 0. 0...,[32.272068 13.748191 8.393633 0. ...,[1. 0.43556923 0.25200686 0.19656383 0...,G,major,G,major,G,major,G,major
1,[8.2245916e-02 7.5655505e-02 2.1220266e-03 6.7...,[5.6544228e+00 4.2868652e+00 2.8798769e+00 2.4...,[-50.025883 115.83649 -92.1891 30.14364...,[3.9984886e-02 2.2016608e-03 4.3093192e-04 3.2...,[6.83880150e-02 6.52295128e-02 2.14435253e-02 ...,[-6.9855377e+02 1.4512706e+02 2.4600237e+01 ...,[-0.7527084 -0.7301582 -0.77621865 -0.825397...,[ -6.8088274 -6.782653 -7.7797394 -8.24941...,[0.68719506 0.07491369 0.13646096 0.07364323 0...,[0.01961876 0.01111388 0.16504459 0.38964278 0...,[ 0.37151927 0.7662585 1.1609977 1.555736...,[0. 0. 0. 0. 0...,[ 5.4545455 0. 0. 3.471074...,[1. 0.7764151 0.30383804 0.39174917 0...,F#,major,Bb,major,Bb,major,Bb,major
2,[8.8138906e-03 7.6573014e-02 3.1388358e-03 1.5...,[3.63373971e+00 1.15637579e+01 5.22267008e+00 ...,[-62.226883 104.65986 -75.136696 53.44722...,[2.64592804e-02 6.23792922e-03 8.07063887e-04 ...,[8.3128167e-03 3.3223473e-02 3.4469403e-02 1.3...,[-714.7264 128.25824 39.05503 39.6...,[-0.74215907 -0.73728615 -0.8107042 -0.823487...,[ -6.611424 -7.2615967 -8.380316 -8.52143...,[0.71344894 0.06430289 0.06801426 0.03494145 0...,[0.36658412 0.49308702 0.19638392 0.06267487 0...,[ 0.4527891 0.8939682 1.3235373 1.7647165 ...,[0. 0. 0. 0. 0...,[ 6.5709553 6.0739923 75.98012 1.711761...,[1. 0.39827436 0.12710713 0.06779694 0...,A,minor,D,minor,D,minor,D,minor
3,[6.3011521e-03 3.6585730e-02 1.9028852e-03 1.0...,[2.1608114e+00 3.7564065e+00 2.0137136e+00 3.8...,[ -46.242584 92.42371 -100.47229 47.5...,[1.3818758e-02 2.0177304e-03 5.9450179e-04 3.2...,[6.10772148e-03 2.21698973e-02 1.40665239e-02 ...,[-691.70605 116.25024 11.83004 2...,[-0.7515435 -0.74985373 -0.79138196 -0.780475...,[ -6.883027 -7.5208316 -8.230615 -8.13892...,[0.7006997 0.07236319 0.05754355 0.07572661 0...,[0.043404 0.03473848 0.02643559 0.0266388 0...,[ 0.4643991 0.9287982 1.3931973 1.8459864 ...,[0. 0. 0. 0. 0...,[ 0. 72.64151 0. 0. ...,[1. 0.55658203 0.13316369 0.13195944 0...,E,minor,C,major,C,major,C,major
4,[4.6498782e-04 9.8319445e-03 5.4105553e-03 2.3...,[2.8125489e-01 2.2236748e+00 5.7747264e+00 7.9...,[ 27.01986 72.39925 -112.586334 19.5...,[2.66810576e-03 2.97313510e-03 1.52259623e-03 ...,[4.14900831e-04 2.04694364e-03 3.79415927e-03 ...,[-615.9276 107.363815 10.946475 ...,[-0.648217 -0.72120893 -0.75433093 -0.775806...,[-6.890399 -6.532882 -7.1275434 -7.4947677 -...,[0.7726181 0.05904132 0.06053954 0.04129672 0...,[0.18653132 0.19554321 0.10502436 0.05691439 0...,[ 0.5456689 1.1029478 1.6602267 2.2058957 ...,[0. 0. 0. 0. 0...,[49.760765 5.582137 17.22488 0. ...,[1. 0.9409427 0.34792826 0.22837171 0...,G,major,G,major,G,major,G,major
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,[3.5178368e-03 1.5135601e-02 1.9693996e-03 1.5...,[7.8102082e-01 2.0012560e+00 2.4515338e+00 4.0...,[ 4.877607 53.859146 -104.731415 15.8...,[5.4654987e-03 1.6586656e-03 7.6211180e-04 5.8...,[3.2699502e-03 7.8566214e-03 5.4481556e-03 2.7...,[-646.34717 97.44762 8.238821 8.0...,[-0.71112514 -0.7262784 -0.734402 -0.794455...,[-7.239582 -6.954302 -7.4930725 -7.7608438 -...,[0.48283657 0.25367835 0.10937485 0.09307689 0...,[0.08248428 0.06955606 0.06757244 0.10833656 0...,[ 0.5572789 1.1377778 1.7182766 2.2987754 ...,[0. 0. 0. 0. 0...,[25.365854 0.40650406 2.113821 0. ...,[1. 0.53521824 0.28652653 0.24161763 0...,F,minor,Ab,major,Ab,major,Ab,major
790,[1.1546280e-03 1.2026374e-02 1.7000119e-03 7.9...,[5.3879452e-01 1.8922870e+00 2.2729483e+00 2.3...,[ 25.498896 38.018738 -114.41091 15.0...,[3.91058670e-03 1.55003543e-03 4.47034079e-04 ...,[1.05579849e-03 5.03111025e-03 4.67802258e-03 ...,[-611.81537 77.93499 4.731605 -6.2...,[-0.6808761 -0.77204776 -0.7607724 -0.810808...,[-7.3751197 -6.901618 -6.847089 -7.2618237 -...,[0.15982121 0.21989284 0.13302465 0.2677962 0...,[0.22539215 0.18670943 0.09066834 0.06981848 0...,[ 0.52244896 1.056508 1.5789568 2.113016...,[0. 0. 0. 0. 0...,[49.304913 3.429101 3.892493 0. ...,[1. 0.6449748 0.6369306 0.527618 0...,C#,minor,C#,minor,C#,minor,C#,minor
791,[1.1115515e-03 1.8697891e-02 2.8095006e-03 1.5...,[7.13423133e-01 3.45063257e+00 3.73951983e+00 ...,[-18.143194 42.52717 -99.91797 27.55029...,[5.73293772e-03 2.76341126e-03 7.89165264e-04 ...,[1.0615792e-03 6.0030175e-03 7.9094041e-03 4.9...,[-6.5590564e+02 8.0344185e+01 1.4993113e+01 ...,[-0.7286933 -0.75842535 -0.7590517 -0.783111...,[-7.445872 -7.4966383 -7.800202 -7.8799167 -...,[0.47496933 0.0911731 0.09027235 0.12803979 0...,[0.1281634 0.09204237 0.07920623 0.15585023 0...,[ 0.48761904 0.9752381 1.4512471 1.927256...,[0. 0. 0. 0. 0...,[14.708234 10.231814 38.289368 1.918465...,[1. 0.4878843 0.25034007 0.41165787 0...,Eb,major,Ab,major,Ab,major,Ab,major
792,[4.2990032e-03 2.2155575e-02 4.3489551e-03 1.3...,[9.05316353e-01 3.89563417e+00 5.54006767e+00 ...,[ -48.780575 22.957071 -105.544876 29.0638...,[7.24369148e-03 3.54450475e-03 8.38307198e-04 ...,[3.68734868e-03 8.20335001e-03 8.59119650e-03 ...,[-6.7323883e+02 5.8582005e+01 9.4512033e+00 ...,[-0.76104397 -0.7731787 -0.75603086 -0.796876...,[-7.9163246 -7.944481 -8.098653 -8.138778 -...,[0.61888707 0.01735113 0.05294096 0.15319249 0...,[0.12345421 0.13720885 0.07769132 0.05598477 0...,[ 0.42956915 0.8707483 1.3235373 1.787936...,[0. 0. 0. 0. 0...,[62.23278 10.332541 1.7814727 4.275534...,[1. 0.4901825 0.24507648 0.21346456 0...,B,minor,B,minor,B,minor,B,minor


Seems like there are 3 main types of column types:

1. ndarray
2. string
3. float64 or int64

Get the columns whose type is ndarray

In [22]:
pd.reset_option('display.max_columns')

string_columns = ['tonal.chords_key',
                  'tonal.chords_scale',
                  'tonal.key_edma.key',
                  'tonal.key_edma.scale',
                  'tonal.key_krumhansl.key',
                  'tonal.key_krumhansl.scale',
                  'tonal.key_temperley.key',
                  'tonal.key_temperley.scale'
                  ]

df_essentia_features_ndarray_columns = df_essentia_features.select_dtypes(exclude=['int64', 'float64'])
ndarray_columns = df_essentia_features_ndarray_columns.columns.difference(string_columns)
df_essentia_features_ndarray_columns = df_essentia_features_ndarray_columns[ndarray_columns]
df_essentia_features_ndarray_columns

Unnamed: 0,lowlevel.barkbands.mean,lowlevel.erbbands.mean,lowlevel.gfcc.mean,lowlevel.melbands.mean,lowlevel.melbands128.mean,lowlevel.mfcc.mean,lowlevel.spectral_contrast_coeffs.mean,lowlevel.spectral_contrast_valleys.mean,rhythm.beats_loudness_band_ratio.mean,rhythm.beats_position,rhythm.bpm_histogram,tonal.chords_histogram,tonal.hpcp.mean,tonal.thpcp
0,[3.0178686e-03 1.1141291e-02 8.9332712e-04 7.7...,[6.0949785e-01 1.3509623e+00 1.1622906e+00 2.0...,[ -18.483932 65.78962 -126.755196 26.2...,[4.2022024e-03 9.2927861e-04 3.6628402e-04 2.2...,[2.79276259e-03 6.20536832e-03 4.12436435e-03 ...,[-6.56206970e+02 9.60831909e+01 1.94060767e+...,[-0.72531193 -0.71691185 -0.7339848 -0.765902...,[ -7.6060724 -7.151893 -7.7001133 -8.01088...,[0.4145438 0.05675205 0.16019633 0.12468063 0...,[ 0.4643991 0.95201814 1.4164172 1.880816...,[0. 0. 0. 0. 0...,[32.272068 13.748191 8.393633 0. ...,[0.08938619 0.11562601 0.0685273 0.02887096 0...,[1. 0.43556923 0.25200686 0.19656383 0...
1,[8.2245916e-02 7.5655505e-02 2.1220266e-03 6.7...,[5.6544228e+00 4.2868652e+00 2.8798769e+00 2.4...,[-50.025883 115.83649 -92.1891 30.14364...,[3.9984886e-02 2.2016608e-03 4.3093192e-04 3.2...,[6.83880150e-02 6.52295128e-02 2.14435253e-02 ...,[-6.9855377e+02 1.4512706e+02 2.4600237e+01 ...,[-0.7527084 -0.7301582 -0.77621865 -0.825397...,[ -6.8088274 -6.782653 -7.7797394 -8.24941...,[0.68719506 0.07491369 0.13646096 0.07364323 0...,[ 0.37151927 0.7662585 1.1609977 1.555736...,[0. 0. 0. 0. 0...,[ 5.4545455 0. 0. 3.471074...,[0.01961876 0.01111388 0.16504459 0.38964278 0...,[1. 0.7764151 0.30383804 0.39174917 0...
2,[8.8138906e-03 7.6573014e-02 3.1388358e-03 1.5...,[3.63373971e+00 1.15637579e+01 5.22267008e+00 ...,[-62.226883 104.65986 -75.136696 53.44722...,[2.64592804e-02 6.23792922e-03 8.07063887e-04 ...,[8.3128167e-03 3.3223473e-02 3.4469403e-02 1.3...,[-714.7264 128.25824 39.05503 39.6...,[-0.74215907 -0.73728615 -0.8107042 -0.823487...,[ -6.611424 -7.2615967 -8.380316 -8.52143...,[0.71344894 0.06430289 0.06801426 0.03494145 0...,[ 0.4527891 0.8939682 1.3235373 1.7647165 ...,[0. 0. 0. 0. 0...,[ 6.5709553 6.0739923 75.98012 1.711761...,[0.36658412 0.49308702 0.19638392 0.06267487 0...,[1. 0.39827436 0.12710713 0.06779694 0...
3,[6.3011521e-03 3.6585730e-02 1.9028852e-03 1.0...,[2.1608114e+00 3.7564065e+00 2.0137136e+00 3.8...,[ -46.242584 92.42371 -100.47229 47.5...,[1.3818758e-02 2.0177304e-03 5.9450179e-04 3.2...,[6.10772148e-03 2.21698973e-02 1.40665239e-02 ...,[-691.70605 116.25024 11.83004 2...,[-0.7515435 -0.74985373 -0.79138196 -0.780475...,[ -6.883027 -7.5208316 -8.230615 -8.13892...,[0.7006997 0.07236319 0.05754355 0.07572661 0...,[ 0.4643991 0.9287982 1.3931973 1.8459864 ...,[0. 0. 0. 0. 0...,[ 0. 72.64151 0. 0. ...,[0.043404 0.03473848 0.02643559 0.0266388 0...,[1. 0.55658203 0.13316369 0.13195944 0...
4,[4.6498782e-04 9.8319445e-03 5.4105553e-03 2.3...,[2.8125489e-01 2.2236748e+00 5.7747264e+00 7.9...,[ 27.01986 72.39925 -112.586334 19.5...,[2.66810576e-03 2.97313510e-03 1.52259623e-03 ...,[4.14900831e-04 2.04694364e-03 3.79415927e-03 ...,[-615.9276 107.363815 10.946475 ...,[-0.648217 -0.72120893 -0.75433093 -0.775806...,[-6.890399 -6.532882 -7.1275434 -7.4947677 -...,[0.7726181 0.05904132 0.06053954 0.04129672 0...,[ 0.5456689 1.1029478 1.6602267 2.2058957 ...,[0. 0. 0. 0. 0...,[49.760765 5.582137 17.22488 0. ...,[0.18653132 0.19554321 0.10502436 0.05691439 0...,[1. 0.9409427 0.34792826 0.22837171 0...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,[3.5178368e-03 1.5135601e-02 1.9693996e-03 1.5...,[7.8102082e-01 2.0012560e+00 2.4515338e+00 4.0...,[ 4.877607 53.859146 -104.731415 15.8...,[5.4654987e-03 1.6586656e-03 7.6211180e-04 5.8...,[3.2699502e-03 7.8566214e-03 5.4481556e-03 2.7...,[-646.34717 97.44762 8.238821 8.0...,[-0.71112514 -0.7262784 -0.734402 -0.794455...,[-7.239582 -6.954302 -7.4930725 -7.7608438 -...,[0.48283657 0.25367835 0.10937485 0.09307689 0...,[ 0.5572789 1.1377778 1.7182766 2.2987754 ...,[0. 0. 0. 0. 0...,[25.365854 0.40650406 2.113821 0. ...,[0.08248428 0.06955606 0.06757244 0.10833656 0...,[1. 0.53521824 0.28652653 0.24161763 0...
790,[1.1546280e-03 1.2026374e-02 1.7000119e-03 7.9...,[5.3879452e-01 1.8922870e+00 2.2729483e+00 2.3...,[ 25.498896 38.018738 -114.41091 15.0...,[3.91058670e-03 1.55003543e-03 4.47034079e-04 ...,[1.05579849e-03 5.03111025e-03 4.67802258e-03 ...,[-611.81537 77.93499 4.731605 -6.2...,[-0.6808761 -0.77204776 -0.7607724 -0.810808...,[-7.3751197 -6.901618 -6.847089 -7.2618237 -...,[0.15982121 0.21989284 0.13302465 0.2677962 0...,[ 0.52244896 1.056508 1.5789568 2.113016...,[0. 0. 0. 0. 0...,[49.304913 3.429101 3.892493 0. ...,[0.22539215 0.18670943 0.09066834 0.06981848 0...,[1. 0.6449748 0.6369306 0.527618 0...
791,[1.1115515e-03 1.8697891e-02 2.8095006e-03 1.5...,[7.13423133e-01 3.45063257e+00 3.73951983e+00 ...,[-18.143194 42.52717 -99.91797 27.55029...,[5.73293772e-03 2.76341126e-03 7.89165264e-04 ...,[1.0615792e-03 6.0030175e-03 7.9094041e-03 4.9...,[-6.5590564e+02 8.0344185e+01 1.4993113e+01 ...,[-0.7286933 -0.75842535 -0.7590517 -0.783111...,[-7.445872 -7.4966383 -7.800202 -7.8799167 -...,[0.47496933 0.0911731 0.09027235 0.12803979 0...,[ 0.48761904 0.9752381 1.4512471 1.927256...,[0. 0. 0. 0. 0...,[14.708234 10.231814 38.289368 1.918465...,[0.1281634 0.09204237 0.07920623 0.15585023 0...,[1. 0.4878843 0.25034007 0.41165787 0...
792,[4.2990032e-03 2.2155575e-02 4.3489551e-03 1.3...,[9.05316353e-01 3.89563417e+00 5.54006767e+00 ...,[ -48.780575 22.957071 -105.544876 29.0638...,[7.24369148e-03 3.54450475e-03 8.38307198e-04 ...,[3.68734868e-03 8.20335001e-03 8.59119650e-03 ...,[-6.7323883e+02 5.8582005e+01 9.4512033e+00 ...,[-0.76104397 -0.7731787 -0.75603086 -0.796876...,[-7.9163246 -7.944481 -8.098653 -8.138778 -...,[0.61888707 0.01735113 0.05294096 0.15319249 0...,[ 0.42956915 0.8707483 1.3235373 1.787936...,[0. 0. 0. 0. 0...,[62.23278 10.332541 1.7814727 4.275534...,[0.12345421 0.13720885 0.07769132 0.05598477 0...,[1. 0.4901825 0.24507648 0.21346456 0...


Flatten the columns whose values are ndarrays, like tonal.chords_histogram

Credits to https://stackoverflow.com/questions/45704999/how-to-convert-vector-wrapped-as-string-to-numpy-array-in-pandas-dataframe

In [23]:
def string_to_ndarray(str):
  return np.fromstring(str.replace('\n','')
                       .replace('[','')
                       .replace(']','')
                       .replace('  ',' '), 
                       sep=' '
                       )

In [24]:
df_essentia_features_ndarray_columns = df_essentia_features_ndarray_columns.applymap(string_to_ndarray)
df_essentia_features_ndarray_columns

  df_essentia_features_ndarray_columns = df_essentia_features_ndarray_columns.applymap(string_to_ndarray)


Unnamed: 0,lowlevel.barkbands.mean,lowlevel.erbbands.mean,lowlevel.gfcc.mean,lowlevel.melbands.mean,lowlevel.melbands128.mean,lowlevel.mfcc.mean,lowlevel.spectral_contrast_coeffs.mean,lowlevel.spectral_contrast_valleys.mean,rhythm.beats_loudness_band_ratio.mean,rhythm.beats_position,rhythm.bpm_histogram,tonal.chords_histogram,tonal.hpcp.mean,tonal.thpcp
0,"[0.0030178686, 0.011141291, 0.00089332712, 0.0...","[0.60949785, 1.3509623, 1.1622906, 2.0258048, ...","[-18.483932, 65.78962, -126.755196, 26.254904,...","[0.0042022024, 0.00092927861, 0.00036628402, 0...","[0.00279276259, 0.00620536832, 0.00412436435, ...","[-656.20697, 96.0831909, 1.94060767, 6.5184054...","[-0.72531193, -0.71691185, -0.7339848, -0.7659...","[-7.6060724, -7.151893, -7.7001133, -8.01088, ...","[0.4145438, 0.05675205, 0.16019633, 0.12468063...","[0.4643991, 0.95201814, 1.4164172, 1.8808163, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[32.272068, 13.748191, 8.393633, 0.0, 2.894356...","[0.08938619, 0.11562601, 0.0685273, 0.02887096...","[1.0, 0.43556923, 0.25200686, 0.19656383, 0.15..."
1,"[0.082245916, 0.075655505, 0.0021220266, 0.000...","[5.6544228, 4.2868652, 2.8798769, 2.4525661, 3...","[-50.025883, 115.83649, -92.1891, 30.143646, -...","[0.039984886, 0.0022016608, 0.00043093192, 0.0...","[0.068388015, 0.0652295128, 0.0214435253, 0.00...","[-698.55377, 145.12706, 24.600237, 25.394375, ...","[-0.7527084, -0.7301582, -0.77621865, -0.82539...","[-6.8088274, -6.782653, -7.7797394, -8.249413,...","[0.68719506, 0.07491369, 0.13646096, 0.0736432...","[0.37151927, 0.7662585, 1.1609977, 1.5557369, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[5.4545455, 0.0, 0.0, 3.4710743, 7.4380164, 16...","[0.01961876, 0.01111388, 0.16504459, 0.3896427...","[1.0, 0.7764151, 0.30383804, 0.39174917, 0.413..."
2,"[0.0088138906, 0.076573014, 0.0031388358, 0.00...","[3.63373971, 11.5637579, 5.22267008, 4.6478900...","[-62.226883, 104.65986, -75.136696, 53.447224,...","[0.0264592804, 0.00623792922, 0.000807063887, ...","[0.0083128167, 0.033223473, 0.034469403, 0.013...","[-714.7264, 128.25824, 39.05503, 39.65911, 17....","[-0.74215907, -0.73728615, -0.8107042, -0.8234...","[-6.611424, -7.2615967, -8.380316, -8.521434, ...","[0.71344894, 0.06430289, 0.06801426, 0.0349414...","[0.4527891, 0.8939682, 1.3235373, 1.7647165, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[6.5709553, 6.0739923, 75.98012, 1.7117615, 0....","[0.36658412, 0.49308702, 0.19638392, 0.0626748...","[1.0, 0.39827436, 0.12710713, 0.06779694, 0.06..."
3,"[0.0063011521, 0.03658573, 0.0019028852, 0.001...","[2.1608114, 3.7564065, 2.0137136, 3.8941705, 2...","[-46.242584, 92.42371, -100.47229, 47.556934, ...","[0.013818758, 0.0020177304, 0.00059450179, 0.0...","[0.00610772148, 0.0221698973, 0.0140665239, 0....","[-691.70605, 116.25024, 11.83004, 29.930351, 2...","[-0.7515435, -0.74985373, -0.79138196, -0.7804...","[-6.883027, -7.5208316, -8.230615, -8.138925, ...","[0.7006997, 0.07236319, 0.05754355, 0.07572661...","[0.4643991, 0.9287982, 1.3931973, 1.8459864, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.0, 72.64151, 0.0, 0.0, 0.0, 0.0, 0.0, 2.830...","[0.043404, 0.03473848, 0.02643559, 0.0266388, ...","[1.0, 0.55658203, 0.13316369, 0.13195944, 0.11..."
4,"[0.00046498782, 0.0098319445, 0.0054105553, 0....","[0.28125489, 2.2236748, 5.7747264, 7.9591084, ...","[27.01986, 72.39925, -112.586334, 19.591005, -...","[0.00266810576, 0.0029731351, 0.00152259623, 0...","[0.000414900831, 0.00204694364, 0.00379415927,...","[-615.9276, 107.363815, 10.946475, 4.1894474, ...","[-0.648217, -0.72120893, -0.75433093, -0.77580...","[-6.890399, -6.532882, -7.1275434, -7.4947677,...","[0.7726181, 0.05904132, 0.06053954, 0.04129672...","[0.5456689, 1.1029478, 1.6602267, 2.2058957, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[49.760765, 5.582137, 17.22488, 0.0, 0.9569378...","[0.18653132, 0.19554321, 0.10502436, 0.0569143...","[1.0, 0.9409427, 0.34792826, 0.22837171, 0.235..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,"[0.0035178368, 0.015135601, 0.0019693996, 0.00...","[0.78102082, 2.001256, 2.4515338, 4.0438552, 6...","[4.877607, 53.859146, -104.731415, 15.871382, ...","[0.0054654987, 0.0016586656, 0.0007621118, 0.0...","[0.0032699502, 0.0078566214, 0.0054481556, 0.0...","[-646.34717, 97.44762, 8.238821, 8.098506, 5.0...","[-0.71112514, -0.7262784, -0.734402, -0.794455...","[-7.239582, -6.954302, -7.4930725, -7.7608438,...","[0.48283657, 0.25367835, 0.10937485, 0.0930768...","[0.5572789, 1.1377778, 1.7182766, 2.2987754, 2...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[25.365854, 0.40650406, 2.113821, 0.0, 0.0, 0....","[0.08248428, 0.06955606, 0.06757244, 0.1083365...","[1.0, 0.53521824, 0.28652653, 0.24161763, 0.23..."
790,"[0.001154628, 0.012026374, 0.0017000119, 0.000...","[0.53879452, 1.892287, 2.2729483, 2.3121567, 3...","[25.498896, 38.018738, -114.41091, 15.032679, ...","[0.0039105867, 0.00155003543, 0.000447034079, ...","[0.00105579849, 0.00503111025, 0.00467802258, ...","[-611.81537, 77.93499, 4.731605, -6.262985, -3...","[-0.6808761, -0.77204776, -0.7607724, -0.81080...","[-7.3751197, -6.901618, -6.847089, -7.2618237,...","[0.15982121, 0.21989284, 0.13302465, 0.2677962...","[0.52244896, 1.056508, 1.5789568, 2.113016, 2....","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[49.304913, 3.429101, 3.892493, 0.0, 0.0, 0.0,...","[0.22539215, 0.18670943, 0.09066834, 0.0698184...","[1.0, 0.6449748, 0.6369306, 0.527618, 0.256217..."
791,"[0.0011115515, 0.018697891, 0.0028095006, 0.00...","[0.713423133, 3.45063257, 3.73951983, 4.720855...","[-18.143194, 42.52717, -99.91797, 27.550295, -...","[0.00573293772, 0.00276341126, 0.000789165264,...","[0.0010615792, 0.0060030175, 0.0079094041, 0.0...","[-655.90564, 80.344185, 14.993113, 1.8364519, ...","[-0.7286933, -0.75842535, -0.7590517, -0.78311...","[-7.445872, -7.4966383, -7.800202, -7.8799167,...","[0.47496933, 0.0911731, 0.09027235, 0.12803979...","[0.48761904, 0.9752381, 1.4512471, 1.9272562, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[14.708234, 10.231814, 38.289368, 1.9184653, 5...","[0.1281634, 0.09204237, 0.07920623, 0.15585023...","[1.0, 0.4878843, 0.25034007, 0.41165787, 0.543..."
792,"[0.0042990032, 0.022155575, 0.0043489551, 0.00...","[0.905316353, 3.89563417, 5.54006767, 5.008196...","[-48.780575, 22.957071, -105.544876, 29.063808...","[0.00724369148, 0.00354450475, 0.000838307198,...","[0.00368734868, 0.00820335001, 0.0085911965, 0...","[-673.23883, 58.582005, 9.4512033, 8.3645639, ...","[-0.76104397, -0.7731787, -0.75603086, -0.7968...","[-7.9163246, -7.944481, -8.098653, -8.138778, ...","[0.61888707, 0.01735113, 0.05294096, 0.1531924...","[0.42956915, 0.8707483, 1.3235373, 1.7879364, ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[62.23278, 10.332541, 1.7814727, 4.2755346, 0....","[0.12345421, 0.13720885, 0.07769132, 0.0559847...","[1.0, 0.4901825, 0.24507648, 0.21346456, 0.202..."


In [25]:
pd.reset_option('display.max_columns')

Now that all values are proper ndarrays, let's flatten these columns, define a function to flatten a column into multiple new columns containing float64

In [26]:
def flatten_column(df, col):
  print(col)
  result_dict = {}
  num_of_new_cols = max([len(i) for i in df[col]])
  # num_of_new_cols = len(df[col][0])
  num_of_rows = len(df[col])

  for i in range(num_of_new_cols):
    result_col_name = f'{col}_{i}'
    result_dict[result_col_name] = []

  for i in range(num_of_rows):
    # print(f'i: {i}')
    for j in range(num_of_new_cols):
      result_col_name = f'{col}_{j}'

      # print(f'j: {j}')
      # print(f'len: {df[col][i]}')
      # do padding
      if j >= len(df.iloc[i][col]):
        value = 0
      else:
        value = df.iloc[i][col][j]
      
      result_dict[result_col_name].append(value)

  return pd.DataFrame(result_dict)

Apply the function to flatten all these columns

In [27]:
ndarray_columns = df_essentia_features_ndarray_columns.columns.to_list()
df_ndarray_columns = []

for column in ndarray_columns:
  df_ndarray_column = flatten_column(df_essentia_features_ndarray_columns, column)
  df_ndarray_columns.append(df_ndarray_column)

df_essentia_features_ndarray_columns = pd.concat(df_ndarray_columns, axis=1)
df_essentia_features_ndarray_columns.insert(0, column='song_id', value=song_ids)
df_essentia_features_ndarray_columns

lowlevel.barkbands.mean
lowlevel.erbbands.mean
lowlevel.gfcc.mean
lowlevel.melbands.mean
lowlevel.melbands128.mean
lowlevel.mfcc.mean
lowlevel.spectral_contrast_coeffs.mean
lowlevel.spectral_contrast_valleys.mean
rhythm.beats_loudness_band_ratio.mean
rhythm.beats_position
rhythm.bpm_histogram
tonal.chords_histogram
tonal.hpcp.mean
tonal.thpcp


Unnamed: 0,song_id,lowlevel.barkbands.mean_0,lowlevel.barkbands.mean_1,lowlevel.barkbands.mean_2,lowlevel.barkbands.mean_3,lowlevel.barkbands.mean_4,lowlevel.barkbands.mean_5,lowlevel.barkbands.mean_6,lowlevel.barkbands.mean_7,lowlevel.barkbands.mean_8,...,tonal.thpcp_26,tonal.thpcp_27,tonal.thpcp_28,tonal.thpcp_29,tonal.thpcp_30,tonal.thpcp_31,tonal.thpcp_32,tonal.thpcp_33,tonal.thpcp_34,tonal.thpcp_35
0,1,0.003018,0.011141,0.000893,0.000772,0.001004,0.000855,0.001722,0.001290,0.001407,...,0.431222,0.504071,0.259952,0.325301,0.402024,0.279380,0.259622,0.232648,0.244101,0.851880
1,4,0.082246,0.075656,0.002122,0.000678,0.001640,0.001543,0.001398,0.001654,0.001471,...,0.068841,0.130083,0.149334,0.078383,0.057536,0.071149,0.075440,0.050351,0.028523,0.423579
2,5,0.008814,0.076573,0.003139,0.001546,0.001760,0.002099,0.000830,0.000805,0.000394,...,0.062485,0.080485,0.081931,0.070034,0.069636,0.071415,0.080507,0.088939,0.098014,0.743447
3,6,0.006301,0.036586,0.001903,0.001071,0.001435,0.000847,0.000449,0.000439,0.000989,...,0.356605,0.734746,0.499623,0.543463,0.550155,0.140020,0.109984,0.096867,0.093510,0.554280
4,7,0.000465,0.009832,0.005411,0.002330,0.005109,0.002563,0.002642,0.002825,0.002037,...,0.249866,0.615913,0.588765,0.278053,0.244454,0.208635,0.249514,0.483272,0.497619,0.471051
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.003518,0.015136,0.001969,0.001531,0.002655,0.002045,0.002026,0.001056,0.001481,...,0.924196,0.959206,0.484637,0.280796,0.235990,0.246565,0.407393,0.462533,0.426415,0.860458
763,996,0.001155,0.012026,0.001700,0.000793,0.002385,0.002690,0.001475,0.000674,0.001708,...,0.362434,0.381794,0.384891,0.602852,0.574937,0.342436,0.281894,0.270360,0.401432,0.889561
764,997,0.001112,0.018698,0.002810,0.001533,0.001680,0.001629,0.000883,0.000674,0.001302,...,0.558074,0.672083,0.445612,0.344640,0.502234,0.482791,0.408797,0.390303,0.378446,0.842631
765,999,0.004299,0.022156,0.004349,0.001314,0.001426,0.000981,0.000505,0.000695,0.001428,...,0.191604,0.166138,0.191558,0.389773,0.433200,0.245289,0.176757,0.187847,0.294250,0.837339


For the string columns, convert these categorical data into numerical data, get the dataframe with only the string columns first

In [28]:
df_essentia_features_string_columns = df_essentia_features[string_columns]
df_essentia_features_string_columns

Unnamed: 0,tonal.chords_key,tonal.chords_scale,tonal.key_edma.key,tonal.key_edma.scale,tonal.key_krumhansl.key,tonal.key_krumhansl.scale,tonal.key_temperley.key,tonal.key_temperley.scale
0,G,major,G,major,G,major,G,major
1,F#,major,Bb,major,Bb,major,Bb,major
2,A,minor,D,minor,D,minor,D,minor
3,E,minor,C,major,C,major,C,major
4,G,major,G,major,G,major,G,major
...,...,...,...,...,...,...,...,...
789,F,minor,Ab,major,Ab,major,Ab,major
790,C#,minor,C#,minor,C#,minor,C#,minor
791,Eb,major,Ab,major,Ab,major,Ab,major
792,B,minor,B,minor,B,minor,B,minor


Then use cat.codes attribute to convert these categorical columns into numerical columns

In [29]:
for col in df_essentia_features_string_columns.columns:
  df_essentia_features_string_columns[col] = df_essentia_features_string_columns[col].astype('category')
  df_essentia_features_string_columns[col] = df_essentia_features_string_columns[col].cat.codes

df_essentia_features_string_columns.insert(0, column='song_id', value=song_ids)
df_essentia_features_string_columns

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_essentia_features_string_columns[col] = df_essentia_features_string_columns[col].astype('category')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_essentia_features_string_columns[col] = df_essentia_features_string_columns[col].cat.codes
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_essen

Unnamed: 0,song_id,tonal.chords_key,tonal.chords_scale,tonal.key_edma.key,tonal.key_edma.scale,tonal.key_krumhansl.key,tonal.key_krumhansl.scale,tonal.key_temperley.key,tonal.key_temperley.scale
0,1,11,0,11,0,11,0,11,0
1,4,10,0,3,0,3,0,3,0
2,5,0,1,6,1,6,1,6,1
3,6,7,1,4,0,4,0,4,0
4,7,11,0,11,0,11,0,11,0
...,...,...,...,...,...,...,...,...,...
789,993,9,1,1,0,1,0,1,0
790,996,5,1,5,1,5,1,5,1
791,997,8,0,1,0,1,0,1,0
792,999,2,1,2,1,2,1,2,1


Combine the ndarray columns, string columns, and the rest of the dataframe together in one flattened dataframe with just numerical data

In [30]:
df_essentia_features_numerical_columns = df_essentia_features.select_dtypes(include=['int64', 'float64'])

df_temp = pd.merge(df_essentia_features_numerical_columns, df_essentia_features_ndarray_columns, how='inner', on='song_id')
df_essentia_features_flattened = pd.merge(df_temp, df_essentia_features_string_columns, how='inner', on='song_id')

df_essentia_features_flattened

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,tonal.thpcp_34,tonal.thpcp_35,tonal.chords_key,tonal.chords_scale,tonal.key_edma.key,tonal.key_edma.scale,tonal.key_krumhansl.key,tonal.key_krumhansl.scale,tonal.key_temperley.key,tonal.key_temperley.scale
0,1,0.970365,11.411017,0.161770,6.455077,1.609292,30.026752,0.459247,3.059849,9.847343,...,0.244101,0.851880,11,0,11,0,11,0,11,0
1,4,0.938222,14.166083,0.301084,37.839718,4.362428,8.677779,0.437324,2.800550,11.660427,...,0.028523,0.423579,10,0,3,0,3,0,3,0
2,5,0.930816,18.050819,0.278281,61.556568,5.047149,13.578498,0.447465,3.557904,9.755386,...,0.098014,0.743447,0,1,6,1,6,1,6,1
3,6,0.954429,17.043421,0.222608,15.175429,3.000883,20.610300,0.457072,4.498772,10.106106,...,0.093510,0.554280,7,1,4,0,4,0,4,0
4,7,0.970629,10.193353,0.128590,5.166962,1.568603,23.933802,0.453411,2.241942,9.153606,...,0.497619,0.471051,11,0,11,0,11,0,11,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.900841,11.216174,0.141947,9.660921,1.687560,23.742342,0.455372,3.381016,10.037439,...,0.426415,0.860458,9,1,1,0,1,0,1,0
763,996,0.923707,10.426634,0.114360,11.828347,1.801686,29.687521,0.471651,3.373200,8.276285,...,0.401432,0.889561,5,1,5,1,5,1,5,1
764,997,0.913454,11.849738,0.114356,2.702694,1.236795,39.045204,0.469326,6.539768,8.761862,...,0.378446,0.842631,8,0,1,0,1,0,1,0
765,999,0.955402,11.403626,0.107581,2.736115,1.312780,44.180817,0.477721,9.656989,7.768255,...,0.294250,0.837339,2,1,2,1,2,1,2,1


Export the flattened Essentia features dataset

In [31]:
df_essentia_features_flattened.to_csv(get_pmemo_path('processed/features/essentia_features_flattened.csv'))

## Create separate, more distinct features dataframes extracted by Essentia

Import the features lsit

In [32]:
from pmemo_essentia_best_features import *

### Create separate feature dataset for best features for building arousal regressor
According to https://ieeexplore-ieee-org.library.sutd.edu.sg:2443/stamp/stamp.jsp?tp=&arnumber=8001129

In [33]:
print(pmemo_essentia_arousal_features)

['lowlevel.average_loudness', 'lowlevel.barkbands_spread.mean', 'lowlevel.melbands_crest.mean', 'lowlevel.melbands_flatness_db.mean', 'lowlevel.melbands_kurtosis.mean', 'lowlevel.melbands_skewness.mean', 'lowlevel.melbands_spread.mean', 'lowlevel.spectral_energy.mean', 'lowlevel.spectral_entropy.mean', 'lowlevel.spectral_flux.mean', 'lowlevel.spectral_kurtosis.mean', 'lowlevel.spectral_rolloff.mean', 'lowlevel.spectral_skewness.mean', 'rhythm.bpm_histogram', 'rhythm.danceability', 'rhythm.onset_rate', 'rhythm.beats_loudness_band_ratio.mean', 'tonal.chords_strength.mean', 'tonal.hpcp_entropy.mean', 'tonal.key_edma.strength', 'tonal.key_temperley.strength', 'tonal.chords_histogram']


In [34]:
arousal_columns = [col for col in df_essentia_features_flattened.columns if any(substring in col for substring in pmemo_essentia_arousal_features)]
df_essentia_best_arousal_features = df_essentia_features_flattened[arousal_columns]
df_essentia_best_arousal_features.insert(0, 'song_id', song_ids)
df_essentia_best_arousal_features.to_csv(get_pmemo_path('processed/features/essentia_best_arousal_features.csv'))
df_essentia_best_arousal_features

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,0.970365,30.026752,21.315563,0.210012,19.044914,2.916830,34.354786,0.030159,7.647935,...,0.000000,0.000000,0.000000,0.000000,0.000000,4.630969,0.000000,10.998553,7.814761,18.958033
1,4,0.938222,8.677779,30.797611,0.372297,95.002518,6.664732,8.631808,0.173342,7.264801,...,0.991735,4.793388,25.785124,0.000000,0.661157,6.115703,0.000000,5.289256,0.000000,10.247934
2,5,0.930816,13.578498,26.574856,0.361869,154.612579,7.777407,15.093005,0.099738,7.315362,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.441745,0.220872,0.000000,0.000000,0.000000
3,6,0.954429,20.610300,28.322435,0.300273,47.095280,5.359872,16.303146,0.056681,7.528450,...,0.000000,3.930818,0.000000,0.000000,0.000000,0.000000,0.000000,2.515723,0.000000,0.000000
4,7,0.970629,23.933802,13.910563,0.183233,15.404405,2.676914,29.061829,0.043341,7.649847,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.017544,13.078150,6.379585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.900841,23.742342,16.773016,0.198606,17.451988,2.597063,29.571623,0.040386,7.658952,...,0.000000,0.000000,0.000000,2.113821,0.000000,0.000000,0.000000,0.650406,12.357723,56.829270
763,996,0.923707,29.687521,14.956428,0.155688,34.581482,3.150197,34.261261,0.039496,8.051765,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.521780,1.112141
764,997,0.913454,39.045204,18.180822,0.169300,14.138631,2.758755,38.427273,0.042173,8.011445,...,0.479616,1.598721,0.000000,3.597122,0.000000,1.438849,0.000000,7.354117,2.797762,9.832134
765,999,0.955402,44.180817,19.358309,0.162716,18.854839,3.191128,36.419331,0.049650,8.179975,...,0.000000,0.000000,0.000000,0.000000,0.118765,0.000000,1.543943,0.000000,8.907364,1.306413


Import the best feature dataset .csv for arousal

In [35]:
df_essentia_best_arousal_features = pd.read_csv(get_pmemo_path('processed/features/essentia_best_arousal_features.csv'))

# drop Unnamed:0 column
df_essentia_best_arousal_features = df_essentia_best_arousal_features[df_essentia_best_arousal_features.columns[1:]]

df_essentia_best_arousal_features

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,0.970365,30.026752,21.315563,0.210012,19.044914,2.916830,34.354786,0.030159,7.647935,...,0.000000,0.000000,0.000000,0.000000,0.000000,4.630969,0.000000,10.998553,7.814761,18.958033
1,4,0.938222,8.677779,30.797611,0.372297,95.002518,6.664732,8.631808,0.173342,7.264801,...,0.991735,4.793388,25.785124,0.000000,0.661157,6.115703,0.000000,5.289256,0.000000,10.247934
2,5,0.930816,13.578498,26.574856,0.361869,154.612579,7.777407,15.093005,0.099738,7.315362,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.441745,0.220872,0.000000,0.000000,0.000000
3,6,0.954429,20.610300,28.322435,0.300273,47.095280,5.359872,16.303146,0.056681,7.528450,...,0.000000,3.930818,0.000000,0.000000,0.000000,0.000000,0.000000,2.515723,0.000000,0.000000
4,7,0.970629,23.933802,13.910563,0.183233,15.404405,2.676914,29.061829,0.043341,7.649847,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.017544,13.078150,6.379585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.900841,23.742342,16.773016,0.198606,17.451988,2.597063,29.571623,0.040386,7.658952,...,0.000000,0.000000,0.000000,2.113821,0.000000,0.000000,0.000000,0.650406,12.357723,56.829270
763,996,0.923707,29.687521,14.956428,0.155688,34.581482,3.150197,34.261261,0.039496,8.051765,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.521780,1.112141
764,997,0.913454,39.045204,18.180822,0.169300,14.138631,2.758755,38.427273,0.042173,8.011445,...,0.479616,1.598721,0.000000,3.597122,0.000000,1.438849,0.000000,7.354117,2.797762,9.832134
765,999,0.955402,44.180817,19.358309,0.162716,18.854839,3.191128,36.419331,0.049650,8.179975,...,0.000000,0.000000,0.000000,0.000000,0.118765,0.000000,1.543943,0.000000,8.907364,1.306413


### Create separate feature dataset for best features for building valence regressor
According to https://ieeexplore-ieee-org.library.sutd.edu.sg:2443/stamp/stamp.jsp?tp=&arnumber=8001129

In [36]:
valence_columns = [col for col in df_essentia_features_flattened.columns if any(substring in col for substring in pmemo_essentia_valence_features)]
df_essentia_best_valence_features = df_essentia_features_flattened[valence_columns]
df_essentia_best_valence_features.insert(0, 'song_id', song_ids)
df_essentia_best_valence_features.to_csv(get_pmemo_path('processed/features/essentia_best_valence_features.csv'))
df_essentia_best_valence_features

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,19.044914,2.916830,0.030159,0.075053,0.049574,5.118037,0.483265,1.773851,0.647135,...,0.000000,0.000000,0.000000,0.000000,0.000000,4.630969,0.000000,10.998553,7.814761,18.958033
1,4,95.002518,6.664732,0.173342,0.020421,0.216983,5.026791,0.495532,1.739260,0.427637,...,0.991735,4.793388,25.785124,0.000000,0.661157,6.115703,0.000000,5.289256,0.000000,10.247934
2,5,154.612579,7.777407,0.099738,0.038424,0.148369,5.317868,0.548106,1.986469,0.754484,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.441745,0.220872,0.000000,0.000000,0.000000
3,6,47.095280,5.359872,0.056681,0.052518,0.073169,4.851120,0.514703,1.623544,0.556869,...,0.000000,3.930818,0.000000,0.000000,0.000000,0.000000,0.000000,2.515723,0.000000,0.000000
4,7,15.404405,2.676914,0.043341,0.079573,0.114386,3.750890,0.541958,2.095964,0.773975,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.017544,13.078150,6.379585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.092792,0.066337,3.592518,0.578063,1.937530,0.694123,...,0.000000,0.000000,0.000000,2.113821,0.000000,0.000000,0.000000,0.650406,12.357723,56.829270
763,996,34.581482,3.150197,0.039496,0.107249,0.043886,4.314631,0.576943,2.071364,0.709678,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.521780,1.112141
764,997,14.138631,2.758755,0.042173,0.105412,0.054690,3.410177,0.442799,1.921766,0.659779,...,0.479616,1.598721,0.000000,3.597122,0.000000,1.438849,0.000000,7.354117,2.797762,9.832134
765,999,18.854839,3.191128,0.049650,0.111079,0.073621,2.894145,0.440451,1.786400,0.613418,...,0.000000,0.000000,0.000000,0.000000,0.118765,0.000000,1.543943,0.000000,8.907364,1.306413


Import the best feature dataset .csv for valence

In [37]:
df_essentia_best_valence_features = pd.read_csv(get_pmemo_path('processed/features/essentia_best_valence_features.csv'))

# drop Unnamed:0 column
df_essentia_best_valence_features = df_essentia_best_valence_features[df_essentia_best_valence_features.columns[1:]]

df_essentia_best_valence_features

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,19.044914,2.916830,0.030159,0.075053,0.049574,5.118037,0.483265,1.773851,0.647135,...,0.000000,0.000000,0.000000,0.000000,0.000000,4.630969,0.000000,10.998553,7.814761,18.958033
1,4,95.002518,6.664732,0.173342,0.020421,0.216983,5.026791,0.495532,1.739260,0.427637,...,0.991735,4.793388,25.785124,0.000000,0.661157,6.115703,0.000000,5.289256,0.000000,10.247934
2,5,154.612579,7.777407,0.099738,0.038424,0.148369,5.317868,0.548106,1.986469,0.754484,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.441745,0.220872,0.000000,0.000000,0.000000
3,6,47.095280,5.359872,0.056681,0.052518,0.073169,4.851120,0.514703,1.623544,0.556869,...,0.000000,3.930818,0.000000,0.000000,0.000000,0.000000,0.000000,2.515723,0.000000,0.000000
4,7,15.404405,2.676914,0.043341,0.079573,0.114386,3.750890,0.541958,2.095964,0.773975,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.017544,13.078150,6.379585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.092792,0.066337,3.592518,0.578063,1.937530,0.694123,...,0.000000,0.000000,0.000000,2.113821,0.000000,0.000000,0.000000,0.650406,12.357723,56.829270
763,996,34.581482,3.150197,0.039496,0.107249,0.043886,4.314631,0.576943,2.071364,0.709678,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.521780,1.112141
764,997,14.138631,2.758755,0.042173,0.105412,0.054690,3.410177,0.442799,1.921766,0.659779,...,0.479616,1.598721,0.000000,3.597122,0.000000,1.438849,0.000000,7.354117,2.797762,9.832134
765,999,18.854839,3.191128,0.049650,0.111079,0.073621,2.894145,0.440451,1.786400,0.613418,...,0.000000,0.000000,0.000000,0.000000,0.118765,0.000000,1.543943,0.000000,8.907364,1.306413


### Create separate feature dataset for best overall features for detecting both arousal and valence

According to https://ieeexplore-ieee-org.library.sutd.edu.sg:2443/stamp/stamp.jsp?tp=&arnumber=8001129

In [38]:
overall_columns = [col for col in df_essentia_features_flattened.columns if any(substring in col for substring in pmemo_essentia_overall_features)]
df_essentia_best_overall_features = df_essentia_features_flattened[overall_columns]
df_essentia_best_overall_features.insert(0, 'song_id', song_ids)
df_essentia_best_overall_features.to_csv(get_pmemo_path('processed/features/essentia_best_overall_features.csv'))
df_essentia_best_overall_features

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,19.044914,2.916830,0.030159,0.483265,1.773851,0.647135,0.678089,0.414544,0.056752,...,0.000000,0.000000,0.000000,0.000000,0.000000,4.630969,0.000000,10.998553,7.814761,18.958033
1,4,95.002518,6.664732,0.173342,0.495532,1.739260,0.427637,0.442323,0.687195,0.074914,...,0.991735,4.793388,25.785124,0.000000,0.661157,6.115703,0.000000,5.289256,0.000000,10.247934
2,5,154.612579,7.777407,0.099738,0.548106,1.986469,0.754484,0.722807,0.713449,0.064303,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.441745,0.220872,0.000000,0.000000,0.000000
3,6,47.095280,5.359872,0.056681,0.514703,1.623544,0.556869,0.529661,0.700700,0.072363,...,0.000000,3.930818,0.000000,0.000000,0.000000,0.000000,0.000000,2.515723,0.000000,0.000000
4,7,15.404405,2.676914,0.043341,0.541958,2.095964,0.773975,0.798527,0.772618,0.059041,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.017544,13.078150,6.379585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.578063,1.937530,0.694123,0.727199,0.482837,0.253678,...,0.000000,0.000000,0.000000,2.113821,0.000000,0.000000,0.000000,0.650406,12.357723,56.829270
763,996,34.581482,3.150197,0.039496,0.576943,2.071364,0.709678,0.728921,0.159821,0.219893,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.521780,1.112141
764,997,14.138631,2.758755,0.042173,0.442799,1.921766,0.659779,0.683450,0.474969,0.091173,...,0.479616,1.598721,0.000000,3.597122,0.000000,1.438849,0.000000,7.354117,2.797762,9.832134
765,999,18.854839,3.191128,0.049650,0.440451,1.786400,0.613418,0.644549,0.618887,0.017351,...,0.000000,0.000000,0.000000,0.000000,0.118765,0.000000,1.543943,0.000000,8.907364,1.306413


Import the best feature dataset .csv overall 

In [39]:
df_essentia_best_overall_features = pd.read_csv(get_pmemo_path('processed/features/essentia_best_overall_features.csv'))

# drop Unnamed:0 column
df_essentia_best_overall_features = df_essentia_best_overall_features[df_essentia_best_overall_features.columns[1:]]

df_essentia_best_overall_features

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,19.044914,2.916830,0.030159,0.483265,1.773851,0.647135,0.678089,0.414544,0.056752,...,0.000000,0.000000,0.000000,0.000000,0.000000,4.630969,0.000000,10.998553,7.814761,18.958033
1,4,95.002518,6.664732,0.173342,0.495532,1.739260,0.427637,0.442323,0.687195,0.074914,...,0.991735,4.793388,25.785124,0.000000,0.661157,6.115703,0.000000,5.289256,0.000000,10.247934
2,5,154.612579,7.777407,0.099738,0.548106,1.986469,0.754484,0.722807,0.713449,0.064303,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.441745,0.220872,0.000000,0.000000,0.000000
3,6,47.095280,5.359872,0.056681,0.514703,1.623544,0.556869,0.529661,0.700700,0.072363,...,0.000000,3.930818,0.000000,0.000000,0.000000,0.000000,0.000000,2.515723,0.000000,0.000000
4,7,15.404405,2.676914,0.043341,0.541958,2.095964,0.773975,0.798527,0.772618,0.059041,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.017544,13.078150,6.379585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.578063,1.937530,0.694123,0.727199,0.482837,0.253678,...,0.000000,0.000000,0.000000,2.113821,0.000000,0.000000,0.000000,0.650406,12.357723,56.829270
763,996,34.581482,3.150197,0.039496,0.576943,2.071364,0.709678,0.728921,0.159821,0.219893,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.521780,1.112141
764,997,14.138631,2.758755,0.042173,0.442799,1.921766,0.659779,0.683450,0.474969,0.091173,...,0.479616,1.598721,0.000000,3.597122,0.000000,1.438849,0.000000,7.354117,2.797762,9.832134
765,999,18.854839,3.191128,0.049650,0.440451,1.786400,0.613418,0.644549,0.618887,0.017351,...,0.000000,0.000000,0.000000,0.000000,0.118765,0.000000,1.543943,0.000000,8.907364,1.306413


## Integrate Essentia features into openSMILE features

Import openSMILE featuresets .csv

In [40]:
df_pmemo_opensmile_compare2016_features = pd.read_csv(get_pmemo_path('processed/features/opensmile_compare2016_features.csv'))
df_pmemo_opensmile_emobase_features = pd.read_csv(get_pmemo_path('processed/features/opensmile_emobase_features.csv'))
df_pmemo_opensmile_gemaps_features = pd.read_csv(get_pmemo_path('processed/features/opensmile_gemaps_features.csv'))
df_pmemo_opensmile_egemaps_features = pd.read_csv(get_pmemo_path('processed/features/opensmile_egemaps_features.csv'))

# drop Unnamed:0 column
df_pmemo_opensmile_compare2016_features = df_pmemo_opensmile_compare2016_features[df_pmemo_opensmile_compare2016_features.columns[1:]]
df_pmemo_opensmile_emobase_features = df_pmemo_opensmile_emobase_features[df_pmemo_opensmile_emobase_features.columns[1:]]
df_pmemo_opensmile_gemaps_features = df_pmemo_opensmile_gemaps_features[df_pmemo_opensmile_gemaps_features.columns[1:]]
df_pmemo_opensmile_egemaps_features = df_pmemo_opensmile_egemaps_features[df_pmemo_opensmile_egemaps_features.columns[1:]]

In [41]:
df_pmemo_opensmile_compare2016_features

Unnamed: 0,song_id,audspec_lengthL1norm_sma_range,audspec_lengthL1norm_sma_maxPos,audspec_lengthL1norm_sma_minPos,audspec_lengthL1norm_sma_quartile1,audspec_lengthL1norm_sma_quartile2,audspec_lengthL1norm_sma_quartile3,audspec_lengthL1norm_sma_iqr1-2,audspec_lengthL1norm_sma_iqr2-3,audspec_lengthL1norm_sma_iqr1-3,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,1,7.322202,0.160250,0.0,2.621674,3.417233,4.200210,0.795559,0.782976,1.578536,...,9.387893,0.527035,2.584944,2.586223,-20.000000,0.636703,108.890144,50.417366,107.016620,58.077972
1,4,6.582460,0.703399,0.0,1.656607,2.151616,2.964223,0.495009,0.812608,1.307617,...,7.649520,0.493504,2.592234,2.594137,-19.999989,0.637455,104.337410,59.133785,104.376335,56.402073
2,5,8.124249,0.368032,0.0,1.477557,2.042911,2.738106,0.565354,0.695195,1.260549,...,9.301864,0.578882,2.421328,2.420420,20.000000,0.547071,104.202470,53.521988,107.850940,53.353096
3,6,9.011615,0.281728,0.0,2.313058,3.485445,4.404951,1.172387,0.919506,2.091893,...,9.001396,0.577419,2.309364,2.310359,-20.000000,0.491508,97.956540,49.070080,99.153725,51.412000
4,7,8.349628,0.942354,0.0,4.132348,5.013152,5.750350,0.880803,0.737199,1.618002,...,8.049467,0.513050,2.204685,2.211791,-19.971230,0.553399,94.180350,49.567486,91.844630,46.409393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,8.483475,0.763250,0.0,3.067161,4.405553,5.770013,1.338392,1.364460,2.702852,...,9.415093,0.519761,2.557367,2.557284,20.000000,0.653659,103.939530,59.248730,107.771060,58.497486
790,996,8.911368,0.202281,0.0,4.574404,6.361631,7.147145,1.787228,0.785513,2.572741,...,9.883374,0.546208,2.423050,2.423151,-20.000000,0.616050,112.232630,57.986270,107.318695,52.195194
791,997,8.573480,0.461791,0.0,2.674772,5.131298,6.267901,2.456525,1.136603,3.593128,...,9.722556,0.604539,2.208963,2.208760,20.000000,0.511724,104.249886,53.459106,100.819820,54.904305
792,999,8.656339,0.792811,0.0,4.998912,6.388689,7.008387,1.389777,0.619698,2.009475,...,8.010264,0.556838,2.027305,2.026831,20.000000,0.565515,98.668120,51.425926,96.999370,52.407906


In [42]:
df_pmemo_opensmile_emobase_features

Unnamed: 0,song_id,pcm_intensity_sma_max,pcm_intensity_sma_min,pcm_intensity_sma_range,pcm_intensity_sma_maxPos,pcm_intensity_sma_minPos,pcm_intensity_sma_amean,pcm_intensity_sma_linregc1,pcm_intensity_sma_linregc2,pcm_intensity_sma_linregerrA,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,1,0.000069,0.0,0.000069,680.0,0.0,0.000010,1.561768e-09,0.000007,0.000007,...,24.663260,4.969058,1.753136,23.013176,-0.823868,0.0,0.684412,0.823868,0.684412,1.508279
1,4,0.000203,0.0,0.000203,2735.0,0.0,0.000035,9.982415e-09,0.000021,0.000025,...,6.708408,2.596165,0.676692,38.048588,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
2,5,0.000207,0.0,0.000207,7809.0,0.0,0.000025,9.522875e-10,0.000021,0.000023,...,31.890630,5.647370,0.340074,6.558109,-1.443779,0.0,1.079922,1.443779,1.079922,2.523702
3,6,0.000145,0.0,0.000145,324.0,0.0,0.000025,-4.371458e-09,0.000031,0.000022,...,42.263218,6.504123,-0.035226,4.723821,-2.500313,0.0,2.730563,2.500313,2.730563,5.230876
4,7,0.000140,0.0,0.000140,1182.0,0.0,0.000024,6.233813e-09,0.000015,0.000018,...,16.184336,4.024880,2.228209,31.618954,-0.120087,0.0,0.047111,0.120087,0.047111,0.167197
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,0.000200,0.0,0.000200,4500.0,0.0,0.000025,-1.041277e-09,0.000028,0.000022,...,21.283978,4.614609,1.045086,23.684689,-0.047910,0.0,0.427303,0.047910,0.427303,0.475213
790,996,0.000157,0.0,0.000157,865.0,0.0,0.000028,-3.948720e-10,0.000029,0.000020,...,16.420145,4.055081,0.161662,11.100394,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
791,997,0.000161,0.0,0.000161,589.0,0.0,0.000024,-7.007779e-09,0.000045,0.000017,...,16.416569,4.052447,1.994095,28.915573,-0.340640,0.0,0.133139,0.340640,0.133139,0.473779
792,999,0.000155,0.0,0.000155,2217.0,0.0,0.000032,-8.242266e-09,0.000048,0.000022,...,21.666965,4.656510,0.786846,19.122852,0.000000,0.0,0.006744,0.000000,0.006744,0.006744


In [43]:
df_pmemo_opensmile_gemaps_features

Unnamed: 0,song_id,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,33.337425,0.292724,26.119843,36.004000,40.604490,14.484646,305.86212,450.777680,146.386750,...,-9.104095,18.212526,-0.064266,-0.011526,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119
1,4,28.663845,0.257283,22.888890,28.891180,34.114970,11.226082,181.81364,127.028496,60.697544,...,-23.523148,34.651325,-0.106618,-0.022618,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009
2,5,17.131292,0.444438,12.300488,12.616845,24.207296,11.906808,164.74464,236.056610,100.181330,...,-22.474178,32.335780,-0.098165,-0.014625,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928
3,6,27.172243,0.447355,13.918818,25.472513,43.135548,29.216728,311.85764,277.610320,97.866790,...,-13.287336,22.688347,-0.116574,-0.008948,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769
4,7,27.857573,0.326031,16.902195,30.955670,36.047790,19.145596,331.95724,427.345400,121.956184,...,-6.551864,12.755936,-0.000146,-0.015459,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,28.589037,0.445477,16.152197,24.940647,44.368267,28.216070,673.32890,974.933530,230.685240,...,-5.758517,12.847198,-0.031968,-0.011104,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384
790,996,34.963680,0.308940,23.050102,39.976578,43.117737,20.067635,337.40967,449.238000,146.028760,...,-6.366639,16.632452,-0.027622,-0.003730,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034
791,997,23.521826,0.515144,13.664935,16.773577,37.080620,23.415684,354.20798,692.777160,256.592250,...,-4.795950,14.046479,-0.042555,-0.007021,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109
792,999,28.412848,0.529474,14.129504,21.055300,48.873077,34.743572,832.96234,972.408140,152.829000,...,-5.796313,14.950939,-0.058397,-0.006240,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921


In [44]:
df_pmemo_opensmile_egemaps_features

Unnamed: 0,song_id,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,33.337425,0.292724,26.119843,36.004000,40.604490,14.484646,305.86212,450.777680,146.386750,...,-0.064266,-0.011526,3.343537,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119,-14.376077
1,4,28.663845,0.257283,22.888890,28.891180,34.114970,11.226082,181.81364,127.028496,60.697544,...,-0.106618,-0.022618,9.082945,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009,-8.796783
2,5,17.131292,0.444438,12.300488,12.616845,24.207296,11.906808,164.74464,236.056610,100.181330,...,-0.098165,-0.014625,4.480471,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928,-10.281398
3,6,27.172243,0.447355,13.918818,25.472513,43.135548,29.216728,311.85764,277.610320,97.866790,...,-0.116574,-0.008948,3.457023,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769,-10.272443
4,7,27.857573,0.326031,16.902195,30.955670,36.047790,19.145596,331.95724,427.345400,121.956184,...,-0.000146,-0.015459,4.280516,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226,-10.382742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,28.589037,0.445477,16.152197,24.940647,44.368267,28.216070,673.32890,974.933530,230.685240,...,-0.031968,-0.011104,5.368369,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384,-10.396038
790,996,34.963680,0.308940,23.050102,39.976578,43.117737,20.067635,337.40967,449.238000,146.028760,...,-0.027622,-0.003730,5.295775,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034,-9.722353
791,997,23.521826,0.515144,13.664935,16.773577,37.080620,23.415684,354.20798,692.777160,256.592250,...,-0.042555,-0.007021,3.952860,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109,-10.379034
792,999,28.412848,0.529474,14.129504,21.055300,48.873077,34.743572,832.96234,972.408140,152.829000,...,-0.058397,-0.006240,5.495892,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921,-8.888767


Integrate Essentia all features into openSMILE ComParE2016 features

In [45]:
df_pmemo_integrated_essentia_all_opensmile_compare2016 = pd.merge(df_essentia_features_flattened, df_pmemo_opensmile_compare2016_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_all_opensmile_compare2016.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_all_opensmile_compare2016.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_all_opensmile_compare2016

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,1,0.970365,11.411017,0.161770,6.455077,1.609292,30.026752,0.459247,3.059849,9.847343,...,9.387893,0.527035,2.584944,2.586223,-20.000000,0.636703,108.890144,50.417366,107.016620,58.077972
1,4,0.938222,14.166083,0.301084,37.839718,4.362428,8.677779,0.437324,2.800550,11.660427,...,7.649520,0.493504,2.592234,2.594137,-19.999989,0.637455,104.337410,59.133785,104.376335,56.402073
2,5,0.930816,18.050819,0.278281,61.556568,5.047149,13.578498,0.447465,3.557904,9.755386,...,9.301864,0.578882,2.421328,2.420420,20.000000,0.547071,104.202470,53.521988,107.850940,53.353096
3,6,0.954429,17.043421,0.222608,15.175429,3.000883,20.610300,0.457072,4.498772,10.106106,...,9.001396,0.577419,2.309364,2.310359,-20.000000,0.491508,97.956540,49.070080,99.153725,51.412000
4,7,0.970629,10.193353,0.128590,5.166962,1.568603,23.933802,0.453411,2.241942,9.153606,...,8.049467,0.513050,2.204685,2.211791,-19.971230,0.553399,94.180350,49.567486,91.844630,46.409393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.900841,11.216174,0.141947,9.660921,1.687560,23.742342,0.455372,3.381016,10.037439,...,9.415093,0.519761,2.557367,2.557284,20.000000,0.653659,103.939530,59.248730,107.771060,58.497486
763,996,0.923707,10.426634,0.114360,11.828347,1.801686,29.687521,0.471651,3.373200,8.276285,...,9.883374,0.546208,2.423050,2.423151,-20.000000,0.616050,112.232630,57.986270,107.318695,52.195194
764,997,0.913454,11.849738,0.114356,2.702694,1.236795,39.045204,0.469326,6.539768,8.761862,...,9.722556,0.604539,2.208963,2.208760,20.000000,0.511724,104.249886,53.459106,100.819820,54.904305
765,999,0.955402,11.403626,0.107581,2.736115,1.312780,44.180817,0.477721,9.656989,7.768255,...,8.010264,0.556838,2.027305,2.026831,20.000000,0.565515,98.668120,51.425926,96.999370,52.407906


In [46]:
df_pmemo_integrated_essentia_all_opensmile_compare2016.to_csv(get_pmemo_path('processed/features/integrated/essentia_all_opensmile_compare2016_features.csv'))

Integrate Essentia all features into openSMILE emobase features

In [47]:
df_pmemo_integrated_essentia_all_opensmile_emobase = pd.merge(df_essentia_features_flattened, df_pmemo_opensmile_emobase_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_all_opensmile_emobase.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_all_opensmile_emobase.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_all_opensmile_emobase

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,1,0.970365,11.411017,0.161770,6.455077,1.609292,30.026752,0.459247,3.059849,9.847343,...,24.663260,4.969058,1.753136,23.013176,-0.823868,0.0,0.684412,0.823868,0.684412,1.508279
1,4,0.938222,14.166083,0.301084,37.839718,4.362428,8.677779,0.437324,2.800550,11.660427,...,6.708408,2.596165,0.676692,38.048588,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
2,5,0.930816,18.050819,0.278281,61.556568,5.047149,13.578498,0.447465,3.557904,9.755386,...,31.890630,5.647370,0.340074,6.558109,-1.443779,0.0,1.079922,1.443779,1.079922,2.523702
3,6,0.954429,17.043421,0.222608,15.175429,3.000883,20.610300,0.457072,4.498772,10.106106,...,42.263218,6.504123,-0.035226,4.723821,-2.500313,0.0,2.730563,2.500313,2.730563,5.230876
4,7,0.970629,10.193353,0.128590,5.166962,1.568603,23.933802,0.453411,2.241942,9.153606,...,16.184336,4.024880,2.228209,31.618954,-0.120087,0.0,0.047111,0.120087,0.047111,0.167197
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.900841,11.216174,0.141947,9.660921,1.687560,23.742342,0.455372,3.381016,10.037439,...,21.283978,4.614609,1.045086,23.684689,-0.047910,0.0,0.427303,0.047910,0.427303,0.475213
763,996,0.923707,10.426634,0.114360,11.828347,1.801686,29.687521,0.471651,3.373200,8.276285,...,16.420145,4.055081,0.161662,11.100394,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
764,997,0.913454,11.849738,0.114356,2.702694,1.236795,39.045204,0.469326,6.539768,8.761862,...,16.416569,4.052447,1.994095,28.915573,-0.340640,0.0,0.133139,0.340640,0.133139,0.473779
765,999,0.955402,11.403626,0.107581,2.736115,1.312780,44.180817,0.477721,9.656989,7.768255,...,21.666965,4.656510,0.786846,19.122852,0.000000,0.0,0.006744,0.000000,0.006744,0.006744


In [48]:
df_pmemo_integrated_essentia_all_opensmile_emobase.to_csv(get_pmemo_path('processed/features/integrated/essentia_all_opensmile_emobase_features.csv'))

Integrate Essentia all features into openSMILE GeMAPS features

In [49]:
df_pmemo_integrated_essentia_all_opensmile_gemaps = pd.merge(df_essentia_features_flattened, df_pmemo_opensmile_gemaps_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_all_opensmile_gemaps.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_all_opensmile_gemaps.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_all_opensmile_gemaps

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.970365,11.411017,0.161770,6.455077,1.609292,30.026752,0.459247,3.059849,9.847343,...,-9.104095,18.212526,-0.064266,-0.011526,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119
1,4,0.938222,14.166083,0.301084,37.839718,4.362428,8.677779,0.437324,2.800550,11.660427,...,-23.523148,34.651325,-0.106618,-0.022618,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009
2,5,0.930816,18.050819,0.278281,61.556568,5.047149,13.578498,0.447465,3.557904,9.755386,...,-22.474178,32.335780,-0.098165,-0.014625,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928
3,6,0.954429,17.043421,0.222608,15.175429,3.000883,20.610300,0.457072,4.498772,10.106106,...,-13.287336,22.688347,-0.116574,-0.008948,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769
4,7,0.970629,10.193353,0.128590,5.166962,1.568603,23.933802,0.453411,2.241942,9.153606,...,-6.551864,12.755936,-0.000146,-0.015459,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.900841,11.216174,0.141947,9.660921,1.687560,23.742342,0.455372,3.381016,10.037439,...,-5.758517,12.847198,-0.031968,-0.011104,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384
763,996,0.923707,10.426634,0.114360,11.828347,1.801686,29.687521,0.471651,3.373200,8.276285,...,-6.366639,16.632452,-0.027622,-0.003730,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034
764,997,0.913454,11.849738,0.114356,2.702694,1.236795,39.045204,0.469326,6.539768,8.761862,...,-4.795950,14.046479,-0.042555,-0.007021,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109
765,999,0.955402,11.403626,0.107581,2.736115,1.312780,44.180817,0.477721,9.656989,7.768255,...,-5.796313,14.950939,-0.058397,-0.006240,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921


In [50]:
df_pmemo_integrated_essentia_all_opensmile_gemaps.to_csv(get_pmemo_path('processed/features/integrated/essentia_all_opensmile_gemaps_features.csv'))

Integrate Essentia all features into openSMILE eGeMAPS features

In [51]:
df_pmemo_integrated_essentia_all_opensmile_egemaps = pd.merge(df_essentia_features_flattened, df_pmemo_opensmile_egemaps_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_all_opensmile_egemaps.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_all_opensmile_egemaps.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_all_opensmile_egemaps

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,0.970365,11.411017,0.161770,6.455077,1.609292,30.026752,0.459247,3.059849,9.847343,...,-0.064266,-0.011526,3.343537,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119,-14.376077
1,4,0.938222,14.166083,0.301084,37.839718,4.362428,8.677779,0.437324,2.800550,11.660427,...,-0.106618,-0.022618,9.082945,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009,-8.796783
2,5,0.930816,18.050819,0.278281,61.556568,5.047149,13.578498,0.447465,3.557904,9.755386,...,-0.098165,-0.014625,4.480471,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928,-10.281398
3,6,0.954429,17.043421,0.222608,15.175429,3.000883,20.610300,0.457072,4.498772,10.106106,...,-0.116574,-0.008948,3.457023,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769,-10.272443
4,7,0.970629,10.193353,0.128590,5.166962,1.568603,23.933802,0.453411,2.241942,9.153606,...,-0.000146,-0.015459,4.280516,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226,-10.382742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.900841,11.216174,0.141947,9.660921,1.687560,23.742342,0.455372,3.381016,10.037439,...,-0.031968,-0.011104,5.368369,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384,-10.396038
763,996,0.923707,10.426634,0.114360,11.828347,1.801686,29.687521,0.471651,3.373200,8.276285,...,-0.027622,-0.003730,5.295775,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034,-9.722353
764,997,0.913454,11.849738,0.114356,2.702694,1.236795,39.045204,0.469326,6.539768,8.761862,...,-0.042555,-0.007021,3.952860,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109,-10.379034
765,999,0.955402,11.403626,0.107581,2.736115,1.312780,44.180817,0.477721,9.656989,7.768255,...,-0.058397,-0.006240,5.495892,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921,-8.888767


In [52]:
df_pmemo_integrated_essentia_all_opensmile_egemaps.to_csv(get_pmemo_path('processed/features/integrated/essentia_all_opensmile_egemaps_features.csv'))

Integrate Essentia best overall features into openSMILE ComParE2016 features

In [53]:
df_pmemo_integrated_essentia_best_overall_opensmile_compare2016 = pd.merge(df_essentia_best_overall_features, df_pmemo_opensmile_compare2016_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_best_overall_opensmile_compare2016.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_best_overall_opensmile_compare2016.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_best_overall_opensmile_compare2016

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,1,19.044914,2.916830,0.030159,0.483265,1.773851,0.647135,0.678089,0.414544,0.056752,...,9.387893,0.527035,2.584944,2.586223,-20.000000,0.636703,108.890144,50.417366,107.016620,58.077972
1,4,95.002518,6.664732,0.173342,0.495532,1.739260,0.427637,0.442323,0.687195,0.074914,...,7.649520,0.493504,2.592234,2.594137,-19.999989,0.637455,104.337410,59.133785,104.376335,56.402073
2,5,154.612579,7.777407,0.099738,0.548106,1.986469,0.754484,0.722807,0.713449,0.064303,...,9.301864,0.578882,2.421328,2.420420,20.000000,0.547071,104.202470,53.521988,107.850940,53.353096
3,6,47.095280,5.359872,0.056681,0.514703,1.623544,0.556869,0.529661,0.700700,0.072363,...,9.001396,0.577419,2.309364,2.310359,-20.000000,0.491508,97.956540,49.070080,99.153725,51.412000
4,7,15.404405,2.676914,0.043341,0.541958,2.095964,0.773975,0.798527,0.772618,0.059041,...,8.049467,0.513050,2.204685,2.211791,-19.971230,0.553399,94.180350,49.567486,91.844630,46.409393
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.578063,1.937530,0.694123,0.727199,0.482837,0.253678,...,9.415093,0.519761,2.557367,2.557284,20.000000,0.653659,103.939530,59.248730,107.771060,58.497486
763,996,34.581482,3.150197,0.039496,0.576943,2.071364,0.709678,0.728921,0.159821,0.219893,...,9.883374,0.546208,2.423050,2.423151,-20.000000,0.616050,112.232630,57.986270,107.318695,52.195194
764,997,14.138631,2.758755,0.042173,0.442799,1.921766,0.659779,0.683450,0.474969,0.091173,...,9.722556,0.604539,2.208963,2.208760,20.000000,0.511724,104.249886,53.459106,100.819820,54.904305
765,999,18.854839,3.191128,0.049650,0.440451,1.786400,0.613418,0.644549,0.618887,0.017351,...,8.010264,0.556838,2.027305,2.026831,20.000000,0.565515,98.668120,51.425926,96.999370,52.407906


In [54]:
df_pmemo_integrated_essentia_best_overall_opensmile_compare2016.to_csv(get_pmemo_path('processed/features/integrated/essentia_best_overall_opensmile_compare2016_features.csv'))

Integrate Essentia best overall features into openSMILE emobase features

In [55]:
df_pmemo_integrated_essentia_best_overall_opensmile_emobase = pd.merge(df_essentia_best_overall_features, df_pmemo_opensmile_emobase_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_best_overall_opensmile_emobase.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_best_overall_opensmile_emobase.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_best_overall_opensmile_emobase

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,1,19.044914,2.916830,0.030159,0.483265,1.773851,0.647135,0.678089,0.414544,0.056752,...,24.663260,4.969058,1.753136,23.013176,-0.823868,0.0,0.684412,0.823868,0.684412,1.508279
1,4,95.002518,6.664732,0.173342,0.495532,1.739260,0.427637,0.442323,0.687195,0.074914,...,6.708408,2.596165,0.676692,38.048588,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
2,5,154.612579,7.777407,0.099738,0.548106,1.986469,0.754484,0.722807,0.713449,0.064303,...,31.890630,5.647370,0.340074,6.558109,-1.443779,0.0,1.079922,1.443779,1.079922,2.523702
3,6,47.095280,5.359872,0.056681,0.514703,1.623544,0.556869,0.529661,0.700700,0.072363,...,42.263218,6.504123,-0.035226,4.723821,-2.500313,0.0,2.730563,2.500313,2.730563,5.230876
4,7,15.404405,2.676914,0.043341,0.541958,2.095964,0.773975,0.798527,0.772618,0.059041,...,16.184336,4.024880,2.228209,31.618954,-0.120087,0.0,0.047111,0.120087,0.047111,0.167197
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.578063,1.937530,0.694123,0.727199,0.482837,0.253678,...,21.283978,4.614609,1.045086,23.684689,-0.047910,0.0,0.427303,0.047910,0.427303,0.475213
763,996,34.581482,3.150197,0.039496,0.576943,2.071364,0.709678,0.728921,0.159821,0.219893,...,16.420145,4.055081,0.161662,11.100394,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
764,997,14.138631,2.758755,0.042173,0.442799,1.921766,0.659779,0.683450,0.474969,0.091173,...,16.416569,4.052447,1.994095,28.915573,-0.340640,0.0,0.133139,0.340640,0.133139,0.473779
765,999,18.854839,3.191128,0.049650,0.440451,1.786400,0.613418,0.644549,0.618887,0.017351,...,21.666965,4.656510,0.786846,19.122852,0.000000,0.0,0.006744,0.000000,0.006744,0.006744


In [56]:
df_pmemo_integrated_essentia_best_overall_opensmile_emobase.to_csv(get_pmemo_path('processed/features/integrated/essentia_best_overall_opensmile_emobase_features.csv'))

Integrate Essentia best overall features into openSMILE GeMAPS features

In [57]:
df_pmemo_integrated_essentia_best_overall_opensmile_gemaps = pd.merge(df_essentia_best_overall_features, df_pmemo_opensmile_gemaps_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_best_overall_opensmile_gemaps.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_best_overall_opensmile_gemaps.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_best_overall_opensmile_gemaps

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,19.044914,2.916830,0.030159,0.483265,1.773851,0.647135,0.678089,0.414544,0.056752,...,-9.104095,18.212526,-0.064266,-0.011526,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119
1,4,95.002518,6.664732,0.173342,0.495532,1.739260,0.427637,0.442323,0.687195,0.074914,...,-23.523148,34.651325,-0.106618,-0.022618,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009
2,5,154.612579,7.777407,0.099738,0.548106,1.986469,0.754484,0.722807,0.713449,0.064303,...,-22.474178,32.335780,-0.098165,-0.014625,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928
3,6,47.095280,5.359872,0.056681,0.514703,1.623544,0.556869,0.529661,0.700700,0.072363,...,-13.287336,22.688347,-0.116574,-0.008948,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769
4,7,15.404405,2.676914,0.043341,0.541958,2.095964,0.773975,0.798527,0.772618,0.059041,...,-6.551864,12.755936,-0.000146,-0.015459,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.578063,1.937530,0.694123,0.727199,0.482837,0.253678,...,-5.758517,12.847198,-0.031968,-0.011104,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384
763,996,34.581482,3.150197,0.039496,0.576943,2.071364,0.709678,0.728921,0.159821,0.219893,...,-6.366639,16.632452,-0.027622,-0.003730,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034
764,997,14.138631,2.758755,0.042173,0.442799,1.921766,0.659779,0.683450,0.474969,0.091173,...,-4.795950,14.046479,-0.042555,-0.007021,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109
765,999,18.854839,3.191128,0.049650,0.440451,1.786400,0.613418,0.644549,0.618887,0.017351,...,-5.796313,14.950939,-0.058397,-0.006240,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921


In [58]:
df_pmemo_integrated_essentia_best_overall_opensmile_gemaps.to_csv(get_pmemo_path('processed/features/integrated/essentia_best_overall_opensmile_gemaps_features.csv'))

Integrate Essentia best overall features into openSMILE eGeMAPS features

In [59]:
df_pmemo_integrated_essentia_best_overall_opensmile_egemaps = pd.merge(df_essentia_best_overall_features, df_pmemo_opensmile_egemaps_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_best_overall_opensmile_egemaps.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_best_overall_opensmile_egemaps.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_best_overall_opensmile_egemaps

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,19.044914,2.916830,0.030159,0.483265,1.773851,0.647135,0.678089,0.414544,0.056752,...,-0.064266,-0.011526,3.343537,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119,-14.376077
1,4,95.002518,6.664732,0.173342,0.495532,1.739260,0.427637,0.442323,0.687195,0.074914,...,-0.106618,-0.022618,9.082945,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009,-8.796783
2,5,154.612579,7.777407,0.099738,0.548106,1.986469,0.754484,0.722807,0.713449,0.064303,...,-0.098165,-0.014625,4.480471,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928,-10.281398
3,6,47.095280,5.359872,0.056681,0.514703,1.623544,0.556869,0.529661,0.700700,0.072363,...,-0.116574,-0.008948,3.457023,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769,-10.272443
4,7,15.404405,2.676914,0.043341,0.541958,2.095964,0.773975,0.798527,0.772618,0.059041,...,-0.000146,-0.015459,4.280516,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226,-10.382742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.578063,1.937530,0.694123,0.727199,0.482837,0.253678,...,-0.031968,-0.011104,5.368369,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384,-10.396038
763,996,34.581482,3.150197,0.039496,0.576943,2.071364,0.709678,0.728921,0.159821,0.219893,...,-0.027622,-0.003730,5.295775,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034,-9.722353
764,997,14.138631,2.758755,0.042173,0.442799,1.921766,0.659779,0.683450,0.474969,0.091173,...,-0.042555,-0.007021,3.952860,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109,-10.379034
765,999,18.854839,3.191128,0.049650,0.440451,1.786400,0.613418,0.644549,0.618887,0.017351,...,-0.058397,-0.006240,5.495892,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921,-8.888767


In [60]:
df_pmemo_integrated_essentia_best_overall_opensmile_egemaps.to_csv(get_pmemo_path('processed/features/integrated/essentia_best_overall_opensmile_egemaps_features.csv'))

Integrate Essentia best valence features into openSMILE GeMAPS features

In [61]:
df_pmemo_integrated_essentia_best_valence_opensmile_gemaps = pd.merge(df_essentia_best_valence_features, df_pmemo_opensmile_gemaps_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_best_valence_opensmile_gemaps.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_best_valence_opensmile_gemaps.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_best_valence_opensmile_gemaps

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,19.044914,2.916830,0.030159,0.075053,0.049574,5.118037,0.483265,1.773851,0.647135,...,-9.104095,18.212526,-0.064266,-0.011526,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119
1,4,95.002518,6.664732,0.173342,0.020421,0.216983,5.026791,0.495532,1.739260,0.427637,...,-23.523148,34.651325,-0.106618,-0.022618,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009
2,5,154.612579,7.777407,0.099738,0.038424,0.148369,5.317868,0.548106,1.986469,0.754484,...,-22.474178,32.335780,-0.098165,-0.014625,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928
3,6,47.095280,5.359872,0.056681,0.052518,0.073169,4.851120,0.514703,1.623544,0.556869,...,-13.287336,22.688347,-0.116574,-0.008948,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769
4,7,15.404405,2.676914,0.043341,0.079573,0.114386,3.750890,0.541958,2.095964,0.773975,...,-6.551864,12.755936,-0.000146,-0.015459,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.092792,0.066337,3.592518,0.578063,1.937530,0.694123,...,-5.758517,12.847198,-0.031968,-0.011104,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384
763,996,34.581482,3.150197,0.039496,0.107249,0.043886,4.314631,0.576943,2.071364,0.709678,...,-6.366639,16.632452,-0.027622,-0.003730,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034
764,997,14.138631,2.758755,0.042173,0.105412,0.054690,3.410177,0.442799,1.921766,0.659779,...,-4.795950,14.046479,-0.042555,-0.007021,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109
765,999,18.854839,3.191128,0.049650,0.111079,0.073621,2.894145,0.440451,1.786400,0.613418,...,-5.796313,14.950939,-0.058397,-0.006240,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921


In [62]:
df_pmemo_integrated_essentia_best_valence_opensmile_gemaps.to_csv(get_pmemo_path('processed/features/integrated/essentia_best_valence_opensmile_gemaps_features.csv'))

Integrate Essentia best valence features into openSMILE eGeMAPS features

In [63]:
df_pmemo_integrated_essentia_best_valence_opensmile_egemaps = pd.merge(df_essentia_best_valence_features, df_pmemo_opensmile_egemaps_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_best_valence_opensmile_egemaps.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_best_valence_opensmile_egemaps.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_best_valence_opensmile_egemaps

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,19.044914,2.916830,0.030159,0.075053,0.049574,5.118037,0.483265,1.773851,0.647135,...,-0.064266,-0.011526,3.343537,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119,-14.376077
1,4,95.002518,6.664732,0.173342,0.020421,0.216983,5.026791,0.495532,1.739260,0.427637,...,-0.106618,-0.022618,9.082945,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009,-8.796783
2,5,154.612579,7.777407,0.099738,0.038424,0.148369,5.317868,0.548106,1.986469,0.754484,...,-0.098165,-0.014625,4.480471,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928,-10.281398
3,6,47.095280,5.359872,0.056681,0.052518,0.073169,4.851120,0.514703,1.623544,0.556869,...,-0.116574,-0.008948,3.457023,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769,-10.272443
4,7,15.404405,2.676914,0.043341,0.079573,0.114386,3.750890,0.541958,2.095964,0.773975,...,-0.000146,-0.015459,4.280516,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226,-10.382742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,17.451988,2.597063,0.040386,0.092792,0.066337,3.592518,0.578063,1.937530,0.694123,...,-0.031968,-0.011104,5.368369,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384,-10.396038
763,996,34.581482,3.150197,0.039496,0.107249,0.043886,4.314631,0.576943,2.071364,0.709678,...,-0.027622,-0.003730,5.295775,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034,-9.722353
764,997,14.138631,2.758755,0.042173,0.105412,0.054690,3.410177,0.442799,1.921766,0.659779,...,-0.042555,-0.007021,3.952860,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109,-10.379034
765,999,18.854839,3.191128,0.049650,0.111079,0.073621,2.894145,0.440451,1.786400,0.613418,...,-0.058397,-0.006240,5.495892,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921,-8.888767


In [64]:
df_pmemo_integrated_essentia_best_valence_opensmile_egemaps.to_csv(get_pmemo_path('processed/features/integrated/essentia_best_valence_opensmile_egemaps_features.csv'))

Integrate Essentia best arousal features into openSMILE GeMAPS features

In [65]:
df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps = pd.merge(df_essentia_best_arousal_features, df_pmemo_opensmile_gemaps_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.970365,30.026752,21.315563,0.210012,19.044914,2.916830,34.354786,0.030159,7.647935,...,-9.104095,18.212526,-0.064266,-0.011526,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119
1,4,0.938222,8.677779,30.797611,0.372297,95.002518,6.664732,8.631808,0.173342,7.264801,...,-23.523148,34.651325,-0.106618,-0.022618,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009
2,5,0.930816,13.578498,26.574856,0.361869,154.612579,7.777407,15.093005,0.099738,7.315362,...,-22.474178,32.335780,-0.098165,-0.014625,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928
3,6,0.954429,20.610300,28.322435,0.300273,47.095280,5.359872,16.303146,0.056681,7.528450,...,-13.287336,22.688347,-0.116574,-0.008948,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769
4,7,0.970629,23.933802,13.910563,0.183233,15.404405,2.676914,29.061829,0.043341,7.649847,...,-6.551864,12.755936,-0.000146,-0.015459,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.900841,23.742342,16.773016,0.198606,17.451988,2.597063,29.571623,0.040386,7.658952,...,-5.758517,12.847198,-0.031968,-0.011104,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384
763,996,0.923707,29.687521,14.956428,0.155688,34.581482,3.150197,34.261261,0.039496,8.051765,...,-6.366639,16.632452,-0.027622,-0.003730,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034
764,997,0.913454,39.045204,18.180822,0.169300,14.138631,2.758755,38.427273,0.042173,8.011445,...,-4.795950,14.046479,-0.042555,-0.007021,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109
765,999,0.955402,44.180817,19.358309,0.162716,18.854839,3.191128,36.419331,0.049650,8.179975,...,-5.796313,14.950939,-0.058397,-0.006240,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921


In [66]:
df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps.to_csv(get_pmemo_path('processed/features/integrated/essentia_best_arousal_opensmile_gemaps_features.csv'))

Integrate Essentia best arousal features into openSMILE eGeMAPS features

In [67]:
df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps = pd.merge(df_essentia_best_arousal_features, df_pmemo_opensmile_egemaps_features, on='song_id', how='inner')

# Identify identical columns for dropping
identical_cols = [col for col in df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps.columns if '_x' in col or '_y' in col]

# Drop identical columns
df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps.drop(columns=identical_cols, inplace=True)

df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,0.970365,30.026752,21.315563,0.210012,19.044914,2.916830,34.354786,0.030159,7.647935,...,-0.064266,-0.011526,3.343537,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119,-14.376077
1,4,0.938222,8.677779,30.797611,0.372297,95.002518,6.664732,8.631808,0.173342,7.264801,...,-0.106618,-0.022618,9.082945,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009,-8.796783
2,5,0.930816,13.578498,26.574856,0.361869,154.612579,7.777407,15.093005,0.099738,7.315362,...,-0.098165,-0.014625,4.480471,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928,-10.281398
3,6,0.954429,20.610300,28.322435,0.300273,47.095280,5.359872,16.303146,0.056681,7.528450,...,-0.116574,-0.008948,3.457023,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769,-10.272443
4,7,0.970629,23.933802,13.910563,0.183233,15.404405,2.676914,29.061829,0.043341,7.649847,...,-0.000146,-0.015459,4.280516,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226,-10.382742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.900841,23.742342,16.773016,0.198606,17.451988,2.597063,29.571623,0.040386,7.658952,...,-0.031968,-0.011104,5.368369,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384,-10.396038
763,996,0.923707,29.687521,14.956428,0.155688,34.581482,3.150197,34.261261,0.039496,8.051765,...,-0.027622,-0.003730,5.295775,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034,-9.722353
764,997,0.913454,39.045204,18.180822,0.169300,14.138631,2.758755,38.427273,0.042173,8.011445,...,-0.042555,-0.007021,3.952860,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109,-10.379034
765,999,0.955402,44.180817,19.358309,0.162716,18.854839,3.191128,36.419331,0.049650,8.179975,...,-0.058397,-0.006240,5.495892,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921,-8.888767


In [68]:
df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps.to_csv(get_pmemo_path('processed/features/integrated/essentia_best_arousal_opensmile_egemaps_features.csv'))

## Data Scaling

Import relevant libraries

In [69]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

### Data Standardisation

In [70]:
scaler = StandardScaler()

Essentia All + openSMILE ComParE2016

In [71]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_all_opensmile_compare2016_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_all_opensmile_compare2016), columns=df_pmemo_integrated_essentia_all_opensmile_compare2016.columns)

df_pmemo_integrated_essentia_all_opensmile_compare2016_standardised = df_pmemo_integrated_essentia_all_opensmile_compare2016_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_all_opensmile_compare2016_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_all_opensmile_compare2016_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_all_opensmile_compare2016_features.csv'))

df_pmemo_integrated_essentia_all_opensmile_compare2016_standardised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,1,0.426199,-0.099373,0.026608,-0.335592,-0.307756,0.932735,0.258703,-0.087297,-0.098705,...,0.414081,-0.574161,0.839269,0.844107,-1.041440,1.048951,0.418375,-0.413265,0.408912,0.834654
1,4,0.113776,1.008513,2.497473,2.074834,2.459013,-1.858236,-0.840371,-0.251327,0.987117,...,-0.607920,-1.151596,0.862690,0.869544,-1.041439,1.061399,0.004919,0.817544,0.155745,0.579720
2,5,0.041793,2.570669,2.093040,3.896353,3.147124,-1.217561,-0.331988,0.227768,-0.153776,...,0.363504,0.318682,0.313592,0.311192,0.960482,-0.433949,-0.007336,0.025125,0.488912,0.115916
3,6,0.271303,2.165567,1.105635,0.334155,1.090725,-0.298287,0.149669,0.822953,0.056263,...,0.186857,0.293497,-0.046137,-0.042560,-1.041440,-1.353200,-0.574560,-0.603510,-0.345032,-0.179360
4,7,0.428761,-0.589028,-0.561868,-0.434523,-0.348646,0.136198,-0.033895,-0.604697,-0.514171,...,-0.372789,-0.814992,-0.382455,-0.359371,-1.040000,-0.329263,-0.917495,-0.533274,-1.045875,-0.940345
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.249560,-0.177725,-0.324966,-0.089374,-0.229101,0.111168,0.064434,0.115870,0.015140,...,0.430072,-0.699417,0.750665,0.751092,0.960482,1.329490,-0.031215,0.833775,0.481252,0.898469
763,996,-0.027310,-0.495220,-0.814255,0.077090,-0.114410,0.888387,0.880557,0.110926,-1.039582,...,0.705378,-0.243985,0.319124,0.319969,-1.041440,0.707271,0.721923,0.655508,0.437877,-0.060222
764,997,-0.126972,0.077048,-0.814325,-0.623785,-0.682098,2.111725,0.764013,2.114068,-0.748780,...,0.610832,0.760531,-0.368711,-0.369116,0.960482,-1.018734,-0.003030,0.016246,-0.185277,0.351882
765,999,0.280758,-0.102345,-0.934490,-0.621218,-0.605737,2.783108,1.184842,4.085993,-1.343832,...,-0.395837,-0.060931,-0.952355,-0.953861,0.960482,-0.128810,-0.509938,-0.270851,-0.551606,-0.027865


Essentia All + openSMILE emobase

In [72]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_all_opensmile_emobase_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_all_opensmile_emobase), columns=df_pmemo_integrated_essentia_all_opensmile_emobase.columns)

df_pmemo_integrated_essentia_all_opensmile_emobase_standardised = df_pmemo_integrated_essentia_all_opensmile_emobase_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_all_opensmile_emobase_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_all_opensmile_emobase_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_all_opensmile_emobase_features.csv'))

df_pmemo_integrated_essentia_all_opensmile_emobase_standardised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,1,0.426199,-0.099373,0.026608,-0.335592,-0.307756,0.932735,0.258703,-0.087297,-0.098705,...,0.648434,0.739197,-0.234297,-0.374198,-0.501003,0.068443,0.362839,0.503870,0.360561,0.435927
1,4,0.113776,1.008513,2.497473,2.074834,2.459013,-1.858236,-0.840371,-0.251327,0.987117,...,-1.229995,-1.424203,-0.674824,-0.059408,0.700582,0.068443,-0.661676,-0.701876,-0.660782,-0.686273
2,5,0.041793,2.570669,2.093040,3.896353,3.147124,-1.217561,-0.331988,0.227768,-0.153776,...,1.404558,1.357623,-0.812583,-0.718711,-1.405124,0.068443,0.954891,1.411122,0.950780,1.191428
3,6,0.271303,2.165567,1.105635,0.334155,1.090725,-0.298287,0.149669,0.822953,0.056263,...,2.489734,2.138737,-0.966172,-0.757114,-2.946044,0.068443,3.425784,2.957379,3.414022,3.205639
4,7,0.428761,-0.589028,-0.561868,-0.434523,-0.348646,0.136198,-0.033895,-0.604697,-0.514171,...,-0.238628,-0.121624,-0.039876,-0.194023,0.525439,0.068443,-0.591155,-0.526126,-0.590479,-0.561874
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.249560,-0.177725,-0.324966,-0.089374,-0.229101,0.111168,0.064434,0.115870,0.015140,...,0.294895,0.416041,-0.524061,-0.360139,0.630706,0.068443,-0.022035,-0.631758,-0.023121,-0.332702
763,996,-0.027310,-0.495220,-0.814255,0.077090,-0.114410,0.888387,0.880557,0.110926,-1.039582,...,-0.213957,-0.094089,-0.885597,-0.623611,0.700582,0.068443,-0.661676,-0.701876,-0.660782,-0.686273
764,997,-0.126972,0.077048,-0.814325,-0.623785,-0.682098,2.111725,0.764013,2.114068,-0.748780,...,-0.214331,-0.096490,-0.135686,-0.250622,0.203769,0.068443,-0.462377,-0.203342,-0.462100,-0.333769
765,999,0.280758,-0.102345,-0.934490,-0.621218,-0.605737,2.783108,1.184842,4.085993,-1.343832,...,0.334963,0.454243,-0.629744,-0.455648,0.700582,0.068443,-0.651581,-0.701876,-0.650718,-0.681255


Essentia All + openSMILE GeMAPS

In [73]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_all_opensmile_gemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_all_opensmile_gemaps), columns=df_pmemo_integrated_essentia_all_opensmile_gemaps.columns)

df_pmemo_integrated_essentia_all_opensmile_gemaps_standardised = df_pmemo_integrated_essentia_all_opensmile_gemaps_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_all_opensmile_gemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_all_opensmile_gemaps_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_all_opensmile_gemaps_features.csv'))

df_pmemo_integrated_essentia_all_opensmile_gemaps_standardised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.426199,-0.099373,0.026608,-0.335592,-0.307756,0.932735,0.258703,-0.087297,-0.098705,...,0.010665,0.112443,-0.356517,0.241192,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513
1,4,0.113776,1.008513,2.497473,2.074834,2.459013,-1.858236,-0.840371,-0.251327,0.987117,...,-2.582425,2.521296,-1.587073,-1.756907,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127
2,5,0.041793,2.570669,2.093040,3.896353,3.147124,-1.217561,-0.331988,0.227768,-0.153776,...,-2.393781,2.181989,-1.341486,-0.316982,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650
3,6,0.271303,2.165567,1.105635,0.334155,1.090725,-0.298287,0.149669,0.822953,0.056263,...,-0.741640,0.768306,-1.876367,0.705660,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107
4,7,0.428761,-0.589028,-0.561868,-0.434523,-0.348646,0.136198,-0.033895,-0.604697,-0.514171,...,0.469653,-0.687136,1.506562,-0.467269,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.249560,-0.177725,-0.324966,-0.089374,-0.229101,0.111168,0.064434,0.115870,0.015140,...,0.612326,-0.673763,0.581939,0.317231,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172
763,996,-0.027310,-0.495220,-0.814255,0.077090,-0.114410,0.888387,0.880557,0.110926,-1.039582,...,0.502963,-0.119092,0.708211,1.645550,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591
764,997,-0.126972,0.077048,-0.814325,-0.623785,-0.682098,2.111725,0.764013,2.114068,-0.748780,...,0.785432,-0.498027,0.274327,1.052788,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979
765,999,0.280758,-0.102345,-0.934490,-0.621218,-0.605737,2.783108,1.184842,4.085993,-1.343832,...,0.605529,-0.365492,-0.185980,1.193421,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346


Essentia All + openSMILE eGeMAPS

In [74]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_all_opensmile_egemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_all_opensmile_egemaps), columns=df_pmemo_integrated_essentia_all_opensmile_egemaps.columns)

df_pmemo_integrated_essentia_all_opensmile_egemaps_standardised = df_pmemo_integrated_essentia_all_opensmile_egemaps_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_all_opensmile_egemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_all_opensmile_egemaps_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_all_opensmile_egemaps_features.csv'))

df_pmemo_integrated_essentia_all_opensmile_egemaps_standardised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,0.426199,-0.099373,0.026608,-0.335592,-0.307756,0.932735,0.258703,-0.087297,-0.098705,...,-0.356517,0.241192,-1.078395,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513,-1.197473
1,4,0.113776,1.008513,2.497473,2.074834,2.459013,-1.858236,-0.840371,-0.251327,0.987117,...,-1.587073,-1.756907,2.595686,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127,1.014336
2,5,0.041793,2.570669,2.093040,3.896353,3.147124,-1.217561,-0.331988,0.227768,-0.153776,...,-1.341486,-0.316982,-0.350587,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650,0.425787
3,6,0.271303,2.165567,1.105635,0.334155,1.090725,-0.298287,0.149669,0.822953,0.056263,...,-1.876367,0.705660,-1.005747,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107,0.429337
4,7,0.428761,-0.589028,-0.561868,-0.434523,-0.348646,0.136198,-0.033895,-0.604697,-0.514171,...,1.506562,-0.467269,-0.478589,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730,0.385611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.249560,-0.177725,-0.324966,-0.089374,-0.229101,0.111168,0.064434,0.115870,0.015140,...,0.581939,0.317231,0.217800,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172,0.380340
763,996,-0.027310,-0.495220,-0.814255,0.077090,-0.114410,0.888387,0.880557,0.110926,-1.039582,...,0.708211,1.645550,0.171330,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591,0.647410
764,997,-0.126972,0.077048,-0.814325,-0.623785,-0.682098,2.111725,0.764013,2.114068,-0.748780,...,0.274327,1.052788,-0.688337,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979,0.387081
765,999,0.280758,-0.102345,-0.934490,-0.621218,-0.605737,2.783108,1.184842,4.085993,-1.343832,...,-0.185980,1.193421,0.299434,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346,0.977870


Essentia Best Overall + openSMILE ComParE2016

In [75]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_overall_opensmile_compare2016), columns=df_pmemo_integrated_essentia_best_overall_opensmile_compare2016.columns)

df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_standardised = df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_best_overall_opensmile_compare2016_features.csv'))

df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,1,-0.290830,-0.161553,-0.674310,-0.966508,-0.692188,-0.466809,-0.224314,-0.490847,-0.744314,...,0.414081,-0.574161,0.839269,0.844107,-1.041440,1.048951,0.418375,-0.413265,0.408912,0.834654
1,4,2.148017,2.355089,6.176068,-0.692793,-0.875022,-2.504826,-2.346037,1.052759,-0.515532,...,-0.607920,-1.151596,0.862690,0.869544,-1.041439,1.061399,0.004919,0.817544,0.155745,0.579720
2,5,4.061978,3.102229,2.654601,0.480322,0.431641,0.529917,0.178117,1.201395,-0.649196,...,0.363504,0.318682,0.313592,0.311192,0.960482,-0.433949,-0.007336,0.025125,0.488912,0.115916
3,6,0.609811,1.478902,0.594623,-0.265029,-1.486659,-1.304917,-1.560061,1.129215,-0.547660,...,0.186857,0.293497,-0.046137,-0.042560,-1.041440,-1.353200,-0.574560,-0.603510,-0.345032,-0.179360
4,7,-0.407720,-0.322652,-0.043632,0.343143,1.010394,0.710881,0.859542,1.536379,-0.715476,...,-0.372789,-0.814992,-0.382455,-0.359371,-1.040000,-0.329263,-0.917495,-0.533274,-1.045875,-0.940345
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.341976,-0.376271,-0.184984,1.148765,0.172968,-0.030536,0.217640,-0.104210,1.736366,...,0.430072,-0.699417,0.750665,0.751092,0.960482,1.329490,-0.031215,0.833775,0.481252,0.898469
763,996,0.208018,-0.004852,-0.227602,1.123782,0.880366,0.113897,0.233136,-1.932951,1.310770,...,0.705378,-0.243985,0.319124,0.319969,-1.041440,0.707271,0.721923,0.655508,0.437877,-0.060222
764,997,-0.448361,-0.267697,-0.099497,-1.869459,0.089641,-0.349415,-0.176068,-0.148750,-0.310712,...,0.610832,0.760531,-0.368711,-0.369116,0.960482,-1.018734,-0.003030,0.016246,-0.185277,0.351882
765,999,-0.296933,0.022632,0.258205,-1.921844,-0.625854,-0.779872,-0.526150,0.666036,-1.240647,...,-0.395837,-0.060931,-0.952355,-0.953861,0.960482,-0.128810,-0.509938,-0.270851,-0.551606,-0.027865


Essentia Best Overall + openSMILE emobase

In [76]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_overall_opensmile_emobase_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_overall_opensmile_emobase), columns=df_pmemo_integrated_essentia_best_overall_opensmile_emobase.columns)

df_pmemo_integrated_essentia_best_overall_opensmile_emobase_standardised = df_pmemo_integrated_essentia_best_overall_opensmile_emobase_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_overall_opensmile_emobase_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_overall_opensmile_emobase_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_best_overall_opensmile_emobase_features.csv'))

df_pmemo_integrated_essentia_best_overall_opensmile_emobase_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,1,-0.290830,-0.161553,-0.674310,-0.966508,-0.692188,-0.466809,-0.224314,-0.490847,-0.744314,...,0.648434,0.739197,-0.234297,-0.374198,-0.501003,0.068443,0.362839,0.503870,0.360561,0.435927
1,4,2.148017,2.355089,6.176068,-0.692793,-0.875022,-2.504826,-2.346037,1.052759,-0.515532,...,-1.229995,-1.424203,-0.674824,-0.059408,0.700582,0.068443,-0.661676,-0.701876,-0.660782,-0.686273
2,5,4.061978,3.102229,2.654601,0.480322,0.431641,0.529917,0.178117,1.201395,-0.649196,...,1.404558,1.357623,-0.812583,-0.718711,-1.405124,0.068443,0.954891,1.411122,0.950780,1.191428
3,6,0.609811,1.478902,0.594623,-0.265029,-1.486659,-1.304917,-1.560061,1.129215,-0.547660,...,2.489734,2.138737,-0.966172,-0.757114,-2.946044,0.068443,3.425784,2.957379,3.414022,3.205639
4,7,-0.407720,-0.322652,-0.043632,0.343143,1.010394,0.710881,0.859542,1.536379,-0.715476,...,-0.238628,-0.121624,-0.039876,-0.194023,0.525439,0.068443,-0.591155,-0.526126,-0.590479,-0.561874
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.341976,-0.376271,-0.184984,1.148765,0.172968,-0.030536,0.217640,-0.104210,1.736366,...,0.294895,0.416041,-0.524061,-0.360139,0.630706,0.068443,-0.022035,-0.631758,-0.023121,-0.332702
763,996,0.208018,-0.004852,-0.227602,1.123782,0.880366,0.113897,0.233136,-1.932951,1.310770,...,-0.213957,-0.094089,-0.885597,-0.623611,0.700582,0.068443,-0.661676,-0.701876,-0.660782,-0.686273
764,997,-0.448361,-0.267697,-0.099497,-1.869459,0.089641,-0.349415,-0.176068,-0.148750,-0.310712,...,-0.214331,-0.096490,-0.135686,-0.250622,0.203769,0.068443,-0.462377,-0.203342,-0.462100,-0.333769
765,999,-0.296933,0.022632,0.258205,-1.921844,-0.625854,-0.779872,-0.526150,0.666036,-1.240647,...,0.334963,0.454243,-0.629744,-0.455648,0.700582,0.068443,-0.651581,-0.701876,-0.650718,-0.681255


Essentia Best Overall + openSMILE GeMAPS

In [77]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_overall_opensmile_gemaps), columns=df_pmemo_integrated_essentia_best_overall_opensmile_gemaps.columns)

df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_standardised = df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_best_overall_opensmile_gemaps_features.csv'))

df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,-0.290830,-0.161553,-0.674310,-0.966508,-0.692188,-0.466809,-0.224314,-0.490847,-0.744314,...,0.010665,0.112443,-0.356517,0.241192,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513
1,4,2.148017,2.355089,6.176068,-0.692793,-0.875022,-2.504826,-2.346037,1.052759,-0.515532,...,-2.582425,2.521296,-1.587073,-1.756907,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127
2,5,4.061978,3.102229,2.654601,0.480322,0.431641,0.529917,0.178117,1.201395,-0.649196,...,-2.393781,2.181989,-1.341486,-0.316982,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650
3,6,0.609811,1.478902,0.594623,-0.265029,-1.486659,-1.304917,-1.560061,1.129215,-0.547660,...,-0.741640,0.768306,-1.876367,0.705660,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107
4,7,-0.407720,-0.322652,-0.043632,0.343143,1.010394,0.710881,0.859542,1.536379,-0.715476,...,0.469653,-0.687136,1.506562,-0.467269,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.341976,-0.376271,-0.184984,1.148765,0.172968,-0.030536,0.217640,-0.104210,1.736366,...,0.612326,-0.673763,0.581939,0.317231,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172
763,996,0.208018,-0.004852,-0.227602,1.123782,0.880366,0.113897,0.233136,-1.932951,1.310770,...,0.502963,-0.119092,0.708211,1.645550,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591
764,997,-0.448361,-0.267697,-0.099497,-1.869459,0.089641,-0.349415,-0.176068,-0.148750,-0.310712,...,0.785432,-0.498027,0.274327,1.052788,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979
765,999,-0.296933,0.022632,0.258205,-1.921844,-0.625854,-0.779872,-0.526150,0.666036,-1.240647,...,0.605529,-0.365492,-0.185980,1.193421,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346


Essentia Best Overall + openSMILE eGeMAPS

In [78]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_overall_opensmile_egemaps), columns=df_pmemo_integrated_essentia_best_overall_opensmile_egemaps.columns)

df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_standardised = df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_best_overall_opensmile_egemaps_features.csv'))

df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,-0.290830,-0.161553,-0.674310,-0.966508,-0.692188,-0.466809,-0.224314,-0.490847,-0.744314,...,-0.356517,0.241192,-1.078395,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513,-1.197473
1,4,2.148017,2.355089,6.176068,-0.692793,-0.875022,-2.504826,-2.346037,1.052759,-0.515532,...,-1.587073,-1.756907,2.595686,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127,1.014336
2,5,4.061978,3.102229,2.654601,0.480322,0.431641,0.529917,0.178117,1.201395,-0.649196,...,-1.341486,-0.316982,-0.350587,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650,0.425787
3,6,0.609811,1.478902,0.594623,-0.265029,-1.486659,-1.304917,-1.560061,1.129215,-0.547660,...,-1.876367,0.705660,-1.005747,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107,0.429337
4,7,-0.407720,-0.322652,-0.043632,0.343143,1.010394,0.710881,0.859542,1.536379,-0.715476,...,1.506562,-0.467269,-0.478589,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730,0.385611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.341976,-0.376271,-0.184984,1.148765,0.172968,-0.030536,0.217640,-0.104210,1.736366,...,0.581939,0.317231,0.217800,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172,0.380340
763,996,0.208018,-0.004852,-0.227602,1.123782,0.880366,0.113897,0.233136,-1.932951,1.310770,...,0.708211,1.645550,0.171330,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591,0.647410
764,997,-0.448361,-0.267697,-0.099497,-1.869459,0.089641,-0.349415,-0.176068,-0.148750,-0.310712,...,0.274327,1.052788,-0.688337,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979,0.387081
765,999,-0.296933,0.022632,0.258205,-1.921844,-0.625854,-0.779872,-0.526150,0.666036,-1.240647,...,-0.185980,1.193421,0.299434,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346,0.977870


Essentia Best Overall

In [79]:
# Fit and transform the selected columns
df_essentia_best_overall_features_standardised = pd.DataFrame(scaler.fit_transform(df_essentia_best_overall_features), columns=df_essentia_best_overall_features.columns)

df_essentia_best_overall_features_standardised = df_essentia_best_overall_features_standardised.drop('song_id', axis=1)
df_essentia_best_overall_features_standardised.insert(0, column='song_id', value=song_ids)

df_essentia_best_overall_features_standardised.to_csv(get_pmemo_path('processed/features/standardised_essentia_best_overall_features.csv'))

df_essentia_best_overall_features_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,-0.290830,-0.161553,-0.674310,-0.966508,-0.692188,-0.466809,-0.224314,-0.490847,-0.744314,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,0.345904,-0.285526,1.021745,-0.039876,0.671714
1,4,2.148017,2.355089,6.176068,-0.692793,-0.875022,-2.504826,-2.346037,1.052759,-0.515532,...,0.004772,0.540440,4.181020,-0.456893,-0.005882,0.576715,-0.285526,0.213223,-0.846705,-0.053671
2,5,4.061978,3.102229,2.654601,0.480322,0.431641,0.529917,0.178117,1.201395,-0.649196,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,-0.305337,-0.203477,-0.535816,-0.846705,-0.907128
3,6,0.609811,1.478902,0.594623,-0.265029,-1.486659,-1.304917,-1.560061,1.129215,-0.547660,...,-0.196902,0.386979,-0.251295,-0.456893,-0.278709,-0.374009,-0.285526,-0.179551,-0.846705,-0.907128
4,7,-0.407720,-0.322652,-0.043632,0.343143,1.010394,0.710881,0.859542,1.536379,-0.715476,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,-0.374009,-0.285526,0.457974,0.503538,-0.375831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.341976,-0.376271,-0.184984,1.148765,0.172968,-0.030536,0.217640,-0.104210,1.736366,...,-0.196902,-0.312361,-0.251295,-0.129750,-0.278709,-0.374009,-0.285526,-0.443708,0.429159,3.825664
763,996,0.208018,-0.004852,-0.227602,1.123782,0.880366,0.113897,0.233136,-1.932951,1.310770,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,-0.374009,-0.285526,-0.535816,-0.483101,-0.814508
764,997,-0.448361,-0.267697,-0.099497,-1.869459,0.089641,-0.349415,-0.176068,-0.148750,-0.310712,...,-0.099370,-0.027929,-0.251295,0.099811,-0.278709,-0.150331,-0.285526,0.505638,-0.557852,-0.088299
765,999,-0.296933,0.022632,0.258205,-1.921844,-0.625854,-0.779872,-0.526150,0.666036,-1.240647,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.229701,-0.374009,0.288011,-0.535816,0.072929,-0.798329


Essentia Best Valence

In [80]:
# Fit and transform the selected columns
df_essentia_best_valence_features_standardised = pd.DataFrame(scaler.fit_transform(df_essentia_best_valence_features), columns=df_essentia_best_valence_features.columns)

df_essentia_best_valence_features_standardised = df_essentia_best_valence_features_standardised.drop('song_id', axis=1)
df_essentia_best_valence_features_standardised.insert(0, column='song_id', value=song_ids)

df_essentia_best_valence_features_standardised.to_csv(get_pmemo_path('processed/features/standardised_essentia_best_valence_features.csv'))

df_essentia_best_valence_features_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,-0.290830,-0.161553,-0.674310,0.595025,-0.452851,1.298902,-0.966508,-0.692188,-0.466809,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,0.345904,-0.285526,1.021745,-0.039876,0.671714
1,4,2.148017,2.355089,6.176068,-2.027051,4.119475,1.198675,-0.692793,-0.875022,-2.504826,...,0.004772,0.540440,4.181020,-0.456893,-0.005882,0.576715,-0.285526,0.213223,-0.846705,-0.053671
2,5,4.061978,3.102229,2.654601,-1.162994,2.245453,1.518401,0.480322,0.431641,0.529917,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,-0.305337,-0.203477,-0.535816,-0.846705,-0.907128
3,6,0.609811,1.478902,0.594623,-0.486537,0.191578,1.005714,-0.265029,-1.486659,-1.304917,...,-0.196902,0.386979,-0.251295,-0.456893,-0.278709,-0.374009,-0.285526,-0.179551,-0.846705,-0.907128
4,7,-0.407720,-0.322652,-0.043632,0.811939,1.317306,-0.202803,0.343143,1.010394,0.710881,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,-0.374009,-0.285526,0.457974,0.503538,-0.375831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.341976,-0.376271,-0.184984,1.446416,0.004970,-0.376761,1.148765,0.172968,-0.030536,...,-0.196902,-0.312361,-0.251295,-0.129750,-0.278709,-0.374009,-0.285526,-0.443708,0.429159,3.825664
763,996,0.208018,-0.004852,-0.227602,2.140284,-0.608213,0.416423,1.123782,0.880366,0.113897,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,-0.374009,-0.285526,-0.535816,-0.483101,-0.814508
764,997,-0.448361,-0.267697,-0.099497,2.052115,-0.313137,-0.577048,-1.869459,0.089641,-0.349415,...,-0.099370,-0.027929,-0.251295,0.099811,-0.278709,-0.150331,-0.285526,0.505638,-0.557852,-0.088299
765,999,-0.296933,0.022632,0.258205,2.324076,0.203926,-1.143870,-1.921844,-0.625854,-0.779872,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.229701,-0.374009,0.288011,-0.535816,0.072929,-0.798329


Essentia Best Arousal

In [81]:
# Fit and transform the selected columns
df_essentia_best_arousal_features_standardised = pd.DataFrame(scaler.fit_transform(df_essentia_best_arousal_features), columns=df_essentia_best_arousal_features.columns)

df_essentia_best_arousal_features_standardised = df_essentia_best_arousal_features_standardised.drop('song_id', axis=1)
df_essentia_best_arousal_features_standardised.insert(0, column='song_id', value=song_ids)

df_essentia_best_arousal_features_standardised.to_csv(get_pmemo_path('processed/features/standardised_essentia_best_arousal_features.csv'))

df_essentia_best_arousal_features_standardised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,0.426199,0.932735,0.787811,-0.239068,-0.290830,-0.161553,1.128864,-0.674310,0.320006,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,0.345904,-0.285526,1.021745,-0.039876,0.671714
1,4,0.113776,-1.858236,2.721361,2.193439,2.148017,2.355089,-1.742031,6.176068,-0.700916,...,0.004772,0.540440,4.181020,-0.456893,-0.005882,0.576715,-0.285526,0.213223,-0.846705,-0.053671
2,5,0.041793,-1.217561,1.860270,2.037130,4.061978,3.102229,-1.020908,2.654601,-0.566188,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,-0.305337,-0.203477,-0.535816,-0.846705,-0.907128
3,6,0.271303,-0.298287,2.216631,1.113853,0.609811,1.478902,-0.885847,0.594623,0.001617,...,-0.196902,0.386979,-0.251295,-0.456893,-0.278709,-0.374009,-0.285526,-0.179551,-0.846705,-0.907128
4,7,0.428761,0.136198,-0.722193,-0.640467,-0.407720,-0.322652,0.538127,-0.043632,0.325100,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,-0.374009,-0.285526,0.457974,0.503538,-0.375831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.249560,0.111168,-0.138491,-0.410041,-0.341976,-0.376271,0.595024,-0.184984,0.349363,...,-0.196902,-0.312361,-0.251295,-0.129750,-0.278709,-0.374009,-0.285526,-0.443708,0.429159,3.825664
763,996,-0.027310,0.888387,-0.508924,-1.053345,0.208018,-0.004852,1.118426,-0.227602,1.396076,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.278709,-0.374009,-0.285526,-0.535816,-0.483101,-0.814508
764,997,-0.126972,2.111725,0.148585,-0.849312,-0.448361,-0.267697,1.583387,-0.099497,1.288636,...,-0.099370,-0.027929,-0.251295,0.099811,-0.278709,-0.150331,-0.285526,0.505638,-0.557852,-0.088299
765,999,0.280758,2.783108,0.388694,-0.947989,-0.296933,0.022632,1.359285,0.258205,1.737709,...,-0.196902,-0.312361,-0.251295,-0.456893,-0.229701,-0.374009,0.288011,-0.535816,0.072929,-0.798329


Essentia Best Valence + openSMILE GeMAPS

In [82]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_valence_opensmile_gemaps), columns=df_pmemo_integrated_essentia_best_valence_opensmile_gemaps.columns)

df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_standardised = df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_best_valence_opensmile_gemaps_features.csv'))

df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,-0.290830,-0.161553,-0.674310,0.595025,-0.452851,1.298902,-0.966508,-0.692188,-0.466809,...,0.010665,0.112443,-0.356517,0.241192,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513
1,4,2.148017,2.355089,6.176068,-2.027051,4.119475,1.198675,-0.692793,-0.875022,-2.504826,...,-2.582425,2.521296,-1.587073,-1.756907,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127
2,5,4.061978,3.102229,2.654601,-1.162994,2.245453,1.518401,0.480322,0.431641,0.529917,...,-2.393781,2.181989,-1.341486,-0.316982,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650
3,6,0.609811,1.478902,0.594623,-0.486537,0.191578,1.005714,-0.265029,-1.486659,-1.304917,...,-0.741640,0.768306,-1.876367,0.705660,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107
4,7,-0.407720,-0.322652,-0.043632,0.811939,1.317306,-0.202803,0.343143,1.010394,0.710881,...,0.469653,-0.687136,1.506562,-0.467269,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.341976,-0.376271,-0.184984,1.446416,0.004970,-0.376761,1.148765,0.172968,-0.030536,...,0.612326,-0.673763,0.581939,0.317231,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172
763,996,0.208018,-0.004852,-0.227602,2.140284,-0.608213,0.416423,1.123782,0.880366,0.113897,...,0.502963,-0.119092,0.708211,1.645550,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591
764,997,-0.448361,-0.267697,-0.099497,2.052115,-0.313137,-0.577048,-1.869459,0.089641,-0.349415,...,0.785432,-0.498027,0.274327,1.052788,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979
765,999,-0.296933,0.022632,0.258205,2.324076,0.203926,-1.143870,-1.921844,-0.625854,-0.779872,...,0.605529,-0.365492,-0.185980,1.193421,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346


Essentia Best Valence + openSMILE eGeMAPS

In [83]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_valence_opensmile_egemaps), columns=df_pmemo_integrated_essentia_best_valence_opensmile_egemaps.columns)

df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_standardised = df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_best_valence_opensmile_egemaps_features.csv'))

df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,-0.290830,-0.161553,-0.674310,0.595025,-0.452851,1.298902,-0.966508,-0.692188,-0.466809,...,-0.356517,0.241192,-1.078395,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513,-1.197473
1,4,2.148017,2.355089,6.176068,-2.027051,4.119475,1.198675,-0.692793,-0.875022,-2.504826,...,-1.587073,-1.756907,2.595686,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127,1.014336
2,5,4.061978,3.102229,2.654601,-1.162994,2.245453,1.518401,0.480322,0.431641,0.529917,...,-1.341486,-0.316982,-0.350587,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650,0.425787
3,6,0.609811,1.478902,0.594623,-0.486537,0.191578,1.005714,-0.265029,-1.486659,-1.304917,...,-1.876367,0.705660,-1.005747,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107,0.429337
4,7,-0.407720,-0.322652,-0.043632,0.811939,1.317306,-0.202803,0.343143,1.010394,0.710881,...,1.506562,-0.467269,-0.478589,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730,0.385611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.341976,-0.376271,-0.184984,1.446416,0.004970,-0.376761,1.148765,0.172968,-0.030536,...,0.581939,0.317231,0.217800,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172,0.380340
763,996,0.208018,-0.004852,-0.227602,2.140284,-0.608213,0.416423,1.123782,0.880366,0.113897,...,0.708211,1.645550,0.171330,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591,0.647410
764,997,-0.448361,-0.267697,-0.099497,2.052115,-0.313137,-0.577048,-1.869459,0.089641,-0.349415,...,0.274327,1.052788,-0.688337,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979,0.387081
765,999,-0.296933,0.022632,0.258205,2.324076,0.203926,-1.143870,-1.921844,-0.625854,-0.779872,...,-0.185980,1.193421,0.299434,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346,0.977870


Essentia Best Arousal + openSMILE GeMAPS

In [84]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps), columns=df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps.columns)

df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_standardised = df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_best_arousal_opensmile_gemaps_features.csv'))

df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_standardised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.426199,0.932735,0.787811,-0.239068,-0.290830,-0.161553,1.128864,-0.674310,0.320006,...,0.010665,0.112443,-0.356517,0.241192,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513
1,4,0.113776,-1.858236,2.721361,2.193439,2.148017,2.355089,-1.742031,6.176068,-0.700916,...,-2.582425,2.521296,-1.587073,-1.756907,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127
2,5,0.041793,-1.217561,1.860270,2.037130,4.061978,3.102229,-1.020908,2.654601,-0.566188,...,-2.393781,2.181989,-1.341486,-0.316982,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650
3,6,0.271303,-0.298287,2.216631,1.113853,0.609811,1.478902,-0.885847,0.594623,0.001617,...,-0.741640,0.768306,-1.876367,0.705660,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107
4,7,0.428761,0.136198,-0.722193,-0.640467,-0.407720,-0.322652,0.538127,-0.043632,0.325100,...,0.469653,-0.687136,1.506562,-0.467269,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.249560,0.111168,-0.138491,-0.410041,-0.341976,-0.376271,0.595024,-0.184984,0.349363,...,0.612326,-0.673763,0.581939,0.317231,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172
763,996,-0.027310,0.888387,-0.508924,-1.053345,0.208018,-0.004852,1.118426,-0.227602,1.396076,...,0.502963,-0.119092,0.708211,1.645550,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591
764,997,-0.126972,2.111725,0.148585,-0.849312,-0.448361,-0.267697,1.583387,-0.099497,1.288636,...,0.785432,-0.498027,0.274327,1.052788,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979
765,999,0.280758,2.783108,0.388694,-0.947989,-0.296933,0.022632,1.359285,0.258205,1.737709,...,0.605529,-0.365492,-0.185980,1.193421,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346


Essentia Best Arousal + openSMILE eGeMAPS

In [85]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps), columns=df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps.columns)

df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_standardised = df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_standardised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_standardised.to_csv(get_pmemo_path('processed/features/integrated/standardised_essentia_best_arousal_opensmile_egemaps_features.csv'))

df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_standardised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,0.426199,0.932735,0.787811,-0.239068,-0.290830,-0.161553,1.128864,-0.674310,0.320006,...,-0.356517,0.241192,-1.078395,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513,-1.197473
1,4,0.113776,-1.858236,2.721361,2.193439,2.148017,2.355089,-1.742031,6.176068,-0.700916,...,-1.587073,-1.756907,2.595686,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127,1.014336
2,5,0.041793,-1.217561,1.860270,2.037130,4.061978,3.102229,-1.020908,2.654601,-0.566188,...,-1.341486,-0.316982,-0.350587,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650,0.425787
3,6,0.271303,-0.298287,2.216631,1.113853,0.609811,1.478902,-0.885847,0.594623,0.001617,...,-1.876367,0.705660,-1.005747,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107,0.429337
4,7,0.428761,0.136198,-0.722193,-0.640467,-0.407720,-0.322652,0.538127,-0.043632,0.325100,...,1.506562,-0.467269,-0.478589,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730,0.385611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,-0.249560,0.111168,-0.138491,-0.410041,-0.341976,-0.376271,0.595024,-0.184984,0.349363,...,0.581939,0.317231,0.217800,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172,0.380340
763,996,-0.027310,0.888387,-0.508924,-1.053345,0.208018,-0.004852,1.118426,-0.227602,1.396076,...,0.708211,1.645550,0.171330,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591,0.647410
764,997,-0.126972,2.111725,0.148585,-0.849312,-0.448361,-0.267697,1.583387,-0.099497,1.288636,...,0.274327,1.052788,-0.688337,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979,0.387081
765,999,0.280758,2.783108,0.388694,-0.947989,-0.296933,0.022632,1.359285,0.258205,1.737709,...,-0.185980,1.193421,0.299434,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346,0.977870


Select the 767 songs

In [88]:
df_pmemo_opensmile_gemaps_features = df_pmemo_opensmile_gemaps_features[df_pmemo_opensmile_gemaps_features['song_id'].isin(song_ids)]
df_pmemo_opensmile_gemaps_features

Unnamed: 0,song_id,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,33.337425,0.292724,26.119843,36.004000,40.604490,14.484646,305.86212,450.777680,146.386750,...,-9.104095,18.212526,-0.064266,-0.011526,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119
1,4,28.663845,0.257283,22.888890,28.891180,34.114970,11.226082,181.81364,127.028496,60.697544,...,-23.523148,34.651325,-0.106618,-0.022618,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009
2,5,17.131292,0.444438,12.300488,12.616845,24.207296,11.906808,164.74464,236.056610,100.181330,...,-22.474178,32.335780,-0.098165,-0.014625,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928
3,6,27.172243,0.447355,13.918818,25.472513,43.135548,29.216728,311.85764,277.610320,97.866790,...,-13.287336,22.688347,-0.116574,-0.008948,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769
4,7,27.857573,0.326031,16.902195,30.955670,36.047790,19.145596,331.95724,427.345400,121.956184,...,-6.551864,12.755936,-0.000146,-0.015459,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,28.589037,0.445477,16.152197,24.940647,44.368267,28.216070,673.32890,974.933530,230.685240,...,-5.758517,12.847198,-0.031968,-0.011104,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384
790,996,34.963680,0.308940,23.050102,39.976578,43.117737,20.067635,337.40967,449.238000,146.028760,...,-6.366639,16.632452,-0.027622,-0.003730,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034
791,997,23.521826,0.515144,13.664935,16.773577,37.080620,23.415684,354.20798,692.777160,256.592250,...,-4.795950,14.046479,-0.042555,-0.007021,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109
792,999,28.412848,0.529474,14.129504,21.055300,48.873077,34.743572,832.96234,972.408140,152.829000,...,-5.796313,14.950939,-0.058397,-0.006240,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921


In [89]:
df_pmemo_opensmile_egemaps_features = df_pmemo_opensmile_egemaps_features[df_pmemo_opensmile_egemaps_features['song_id'].isin(song_ids)]
df_pmemo_opensmile_egemaps_features

Unnamed: 0,song_id,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,33.337425,0.292724,26.119843,36.004000,40.604490,14.484646,305.86212,450.777680,146.386750,...,-0.064266,-0.011526,3.343537,3.623868,3.161190,0.175842,0.189566,0.130000,0.104119,-14.376077
1,4,28.663845,0.257283,22.888890,28.891180,34.114970,11.226082,181.81364,127.028496,60.697544,...,-0.106618,-0.022618,9.082945,4.177080,0.500715,0.465000,0.899490,1.410667,1.558009,-8.796783
2,5,17.131292,0.444438,12.300488,12.616845,24.207296,11.906808,164.74464,236.056610,100.181330,...,-0.098165,-0.014625,4.480471,4.177577,2.227251,0.301123,0.359941,0.153117,0.170928,-10.281398
3,6,27.172243,0.447355,13.918818,25.472513,43.135548,29.216728,311.85764,277.610320,97.866790,...,-0.116574,-0.008948,3.457023,3.565365,1.734694,0.103922,0.143323,0.489787,0.522769,-10.272443
4,7,27.857573,0.326031,16.902195,30.955670,36.047790,19.145596,331.95724,427.345400,121.956184,...,-0.000146,-0.015459,4.280516,3.375818,2.485330,0.342917,0.373298,0.054167,0.046226,-10.382742
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
789,993,28.589037,0.445477,16.152197,24.940647,44.368267,28.216070,673.32890,974.933530,230.685240,...,-0.031968,-0.011104,5.368369,2.822581,3.123903,0.203596,0.721575,0.106313,0.100384,-10.396038
790,996,34.963680,0.308940,23.050102,39.976578,43.117737,20.067635,337.40967,449.238000,146.028760,...,-0.027622,-0.003730,5.295775,3.517186,3.542834,0.147345,0.171755,0.125217,0.128034,-9.722353
791,997,23.521826,0.515144,13.664935,16.773577,37.080620,23.415684,354.20798,692.777160,256.592250,...,-0.042555,-0.007021,3.952860,3.032914,2.966540,0.219593,0.315522,0.112945,0.254109,-10.379034
792,999,28.412848,0.529474,14.129504,21.055300,48.873077,34.743572,832.96234,972.408140,152.829000,...,-0.058397,-0.006240,5.495892,2.307101,3.567762,0.128705,0.298521,0.140853,0.374921,-8.888767


openSMILE GeMAPS

In [90]:
# Fit and transform the selected columns
df_pmemo_opensmile_gemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_opensmile_gemaps_features), columns=df_pmemo_opensmile_gemaps_features.columns)

df_pmemo_opensmile_gemaps_standardised = df_pmemo_opensmile_gemaps_standardised.drop('song_id', axis=1)
df_pmemo_opensmile_gemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_opensmile_gemaps_standardised.to_csv(get_pmemo_path('processed/features/standardised_opensmile_gemaps_features.csv'))

df_pmemo_opensmile_gemaps_standardised

Unnamed: 0,song_id,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,1.434258,-1.328035,2.376117,1.515872,0.629944,-0.557790,-0.306150,-0.222493,-0.480440,...,0.010665,0.112443,-0.356517,0.241192,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513
1,4,0.535097,-1.724856,1.587093,0.622601,-0.144518,-0.966115,-1.054108,-1.558729,-1.832491,...,-2.582425,2.521296,-1.587073,-1.756907,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127
2,5,-1.683680,0.370638,-0.998679,-1.421227,-1.326903,-0.880814,-1.157027,-1.108728,-1.209495,...,-2.393781,2.181989,-1.341486,-0.316982,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650
3,6,0.248124,0.403294,-0.603470,0.193264,0.932001,1.288260,-0.269999,-0.937220,-1.246015,...,-0.741640,0.768306,-1.876367,0.705660,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107
4,7,0.379976,-0.955111,0.125094,0.881872,0.086146,0.026265,-0.148807,-0.319207,-0.865919,...,0.469653,-0.687136,1.506562,-0.467269,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.520704,0.382269,-0.058061,0.126470,1.079114,1.162870,1.909517,1.940898,0.849668,...,0.612326,-0.673763,0.581939,0.317231,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172
763,996,1.747137,-1.146469,1.626462,2.014772,0.929876,0.141804,-0.115931,-0.228848,-0.486089,...,0.502963,-0.119092,0.708211,1.645550,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591
764,997,-0.454189,1.162305,-0.665471,-0.899200,0.209404,0.561342,-0.014645,0.776331,1.258443,...,0.785432,-0.498027,0.274327,1.052788,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979
765,999,0.486807,1.322742,-0.552019,-0.361476,1.616720,1.980819,2.872037,1.930474,-0.378791,...,0.605529,-0.365492,-0.185980,1.193421,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346


openSMILE eGeMAPS

In [91]:
# Fit and transform the selected columns
df_pmemo_opensmile_egemaps_standardised = pd.DataFrame(scaler.fit_transform(df_pmemo_opensmile_egemaps_features), columns=df_pmemo_opensmile_egemaps_features.columns)

df_pmemo_opensmile_egemaps_standardised = df_pmemo_opensmile_egemaps_standardised.drop('song_id', axis=1)
df_pmemo_opensmile_egemaps_standardised.insert(0, column='song_id', value=song_ids)

df_pmemo_opensmile_egemaps_standardised.to_csv(get_pmemo_path('processed/features/standardised_opensmile_egemaps_features.csv'))

df_pmemo_opensmile_egemaps_standardised

Unnamed: 0,song_id,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,1.434258,-1.328035,2.376117,1.515872,0.629944,-0.557790,-0.306150,-0.222493,-0.480440,...,-0.356517,0.241192,-1.078395,0.626061,0.381042,-0.336055,-0.374205,0.262637,-0.077513,-1.197473
1,4,0.535097,-1.724856,1.587093,0.622601,-0.144518,-0.966115,-1.054108,-1.558729,-1.832491,...,-1.587073,-1.756907,2.595686,1.246634,-2.081171,0.077305,0.364584,13.929680,12.635127,1.014336
2,5,-1.683680,0.370638,-0.998679,-1.421227,-1.326903,-0.880814,-1.157027,-1.108728,-1.209495,...,-1.341486,-0.316982,-0.350587,1.247192,-0.483298,-0.156962,-0.196903,0.509336,0.506650,0.425787
3,6,0.248124,0.403294,-0.603470,0.193264,0.932001,1.288260,-0.269999,-0.937220,-1.246015,...,-1.876367,0.705660,-1.005747,0.560435,-0.939150,-0.438867,-0.422328,4.102221,3.583107,0.429337
4,7,0.379976,-0.955111,0.125094,0.881872,0.086146,0.026265,-0.148807,-0.319207,-0.865919,...,1.506562,-0.467269,-0.478589,0.347808,-0.244452,-0.097217,-0.183003,-0.546643,-0.583730,0.385611
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.520704,0.382269,-0.058061,0.126470,1.079114,1.162870,1.909517,1.940898,0.849668,...,0.581939,0.317231,0.217800,-0.272794,0.346534,-0.296380,0.179435,0.009848,-0.110172,0.380340
763,996,1.747137,-1.146469,1.626462,2.014772,0.929876,0.141804,-0.115931,-0.228848,-0.486089,...,0.708211,1.645550,0.171330,0.506390,0.734247,-0.376792,-0.392740,0.211598,0.131591,0.647410
764,997,-0.454189,1.162305,-0.665471,-0.899200,0.209404,0.561342,-0.014645,0.776331,1.258443,...,0.274327,1.052788,-0.688337,-0.036849,0.200898,-0.273511,-0.243128,0.080631,1.233979,0.387081
765,999,0.486807,1.322742,-0.552019,-0.361476,1.616720,1.980819,2.872037,1.930474,-0.378791,...,-0.185980,1.193421,0.299434,-0.851040,0.757317,-0.403438,-0.260820,0.378455,2.290346,0.977870


### Data Normalisation

In [92]:
scaler = MinMaxScaler()

Essentia All + openSMILE ComParE2016

In [93]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_all_opensmile_compare2016_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_all_opensmile_compare2016), columns=df_pmemo_integrated_essentia_all_opensmile_compare2016.columns)

df_pmemo_integrated_essentia_all_opensmile_compare2016_normalised = df_pmemo_integrated_essentia_all_opensmile_compare2016_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_all_opensmile_compare2016_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_all_opensmile_compare2016_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_all_opensmile_compare2016_features.csv'))

df_pmemo_integrated_essentia_all_opensmile_compare2016_normalised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,1,0.984985,0.399331,0.312101,0.092938,0.244844,0.579073,0.801952,0.162367,0.379782,...,0.303279,0.399650,0.392199,0.392725,0.000000e+00,0.617706,0.370658,0.244224,0.351489,0.401564
1,4,0.952336,0.583063,0.725248,0.500226,0.681324,0.071979,0.652588,0.140435,0.549745,...,0.171655,0.304287,0.395144,0.395924,2.750000e-07,0.619267,0.308409,0.446880,0.316289,0.365470
2,5,0.944813,0.842132,0.657624,0.808006,0.789879,0.188384,0.721677,0.204492,0.371161,...,0.296765,0.547101,0.326090,0.325708,1.000000e+00,0.431744,0.306564,0.316406,0.362612,0.299804
3,6,0.968798,0.774950,0.492522,0.206105,0.465465,0.355408,0.787135,0.284071,0.404039,...,0.274015,0.542942,0.280851,0.281222,0.000000e+00,0.316465,0.221165,0.212899,0.246664,0.257999
4,7,0.985253,0.318126,0.213703,0.076222,0.238393,0.434349,0.762188,0.093188,0.314749,...,0.201938,0.359877,0.238556,0.241381,7.192500e-04,0.444872,0.169534,0.224464,0.149222,0.150258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.914366,0.386337,0.253315,0.134541,0.257252,0.429802,0.775551,0.189531,0.397602,...,0.305339,0.378964,0.381056,0.381028,1.000000e+00,0.652887,0.302969,0.449552,0.361547,0.410599
763,996,0.937592,0.333683,0.171502,0.162668,0.275346,0.571015,0.886463,0.188870,0.232506,...,0.340795,0.454177,0.326786,0.326812,0.000000e+00,0.574858,0.416359,0.420200,0.355516,0.274867
764,997,0.927177,0.428588,0.171490,0.044242,0.185788,0.793285,0.870624,0.456700,0.278026,...,0.328619,0.620072,0.240284,0.240155,1.000000e+00,0.358409,0.307213,0.314944,0.268875,0.333213
765,999,0.969786,0.398838,0.151398,0.044676,0.197835,0.915269,0.927815,0.720355,0.184882,...,0.198969,0.484409,0.166886,0.166620,1.000000e+00,0.470009,0.230894,0.267673,0.217943,0.279448


Essentia All + openSMILE emobase

In [94]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_all_opensmile_emobase_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_all_opensmile_emobase), columns=df_pmemo_integrated_essentia_all_opensmile_emobase.columns)

df_pmemo_integrated_essentia_all_opensmile_emobase_normalised = df_pmemo_integrated_essentia_all_opensmile_emobase_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_all_opensmile_emobase_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_all_opensmile_emobase_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_all_opensmile_emobase_features.csv'))

df_pmemo_integrated_essentia_all_opensmile_emobase_normalised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,1,0.984985,0.399331,0.312101,0.092938,0.244844,0.579073,0.801952,0.162367,0.379782,...,0.403700,0.588364,0.125589,0.047908,0.758451,1.0,0.203203,0.243753,0.203203,0.229604
1,4,0.952336,0.583063,0.725248,0.500226,0.681324,0.071979,0.652588,0.140435,0.549745,...,0.096648,0.233917,0.067041,0.087292,1.000000,1.0,0.000000,0.000000,0.000000,0.000000
2,5,0.944813,0.842132,0.657624,0.808006,0.789879,0.188384,0.721677,0.204492,0.371161,...,0.527297,0.689686,0.048732,0.004805,0.576701,1.0,0.320631,0.427163,0.320631,0.384180
3,6,0.968798,0.774950,0.492522,0.206105,0.465465,0.355408,0.787135,0.284071,0.404039,...,0.704682,0.817662,0.028320,0.000000,0.266937,1.0,0.810709,0.739754,0.810709,0.796290
4,7,0.985253,0.318126,0.213703,0.076222,0.238393,0.434349,0.762188,0.093188,0.314749,...,0.258699,0.447329,0.151428,0.070450,0.964792,1.0,0.013987,0.035529,0.013987,0.025452
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.914366,0.386337,0.253315,0.134541,0.257252,0.429802,0.775551,0.189531,0.397602,...,0.345910,0.535419,0.087078,0.049667,0.985953,1.0,0.126867,0.014175,0.126867,0.072341
763,996,0.937592,0.333683,0.171502,0.162668,0.275346,0.571015,0.886463,0.188870,0.232506,...,0.262732,0.451840,0.039029,0.016703,1.000000,1.0,0.000000,0.000000,0.000000,0.000000
764,997,0.927177,0.428588,0.171490,0.044242,0.185788,0.793285,0.870624,0.456700,0.278026,...,0.262670,0.451447,0.138694,0.063369,0.900128,1.0,0.039529,0.100783,0.039529,0.072123
765,999,0.969786,0.398838,0.151398,0.044676,0.197835,0.915269,0.927815,0.720355,0.184882,...,0.352459,0.541678,0.073032,0.037717,1.000000,1.0,0.002002,0.000000,0.002002,0.001027


Essentia All + openSMILE GeMAPs

In [95]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_all_opensmile_gemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_all_opensmile_gemaps), columns=df_pmemo_integrated_essentia_all_opensmile_gemaps.columns)

df_pmemo_integrated_essentia_all_opensmile_gemaps_normalised = df_pmemo_integrated_essentia_all_opensmile_gemaps_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_all_opensmile_gemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_all_opensmile_gemaps_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_all_opensmile_gemaps_features.csv'))

df_pmemo_integrated_essentia_all_opensmile_gemaps_normalised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.984985,0.399331,0.312101,0.092938,0.244844,0.579073,0.801952,0.162367,0.379782,...,0.655572,0.388723,0.360946,0.477083,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829
1,4,0.952336,0.583063,0.725248,0.500226,0.681324,0.071979,0.652588,0.140435,0.549745,...,0.188228,0.814309,0.194518,0.197515,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000
2,5,0.944813,0.842132,0.657624,0.808006,0.789879,0.188384,0.721677,0.204492,0.371161,...,0.222227,0.754362,0.227733,0.398985,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709
3,6,0.968798,0.774950,0.492522,0.206105,0.465465,0.355408,0.787135,0.284071,0.404039,...,0.519987,0.504598,0.155392,0.542070,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536
4,7,0.985253,0.318126,0.213703,0.076222,0.238393,0.434349,0.762188,0.093188,0.314749,...,0.738294,0.247457,0.612921,0.377957,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.914366,0.386337,0.253315,0.134541,0.257252,0.429802,0.775551,0.189531,0.397602,...,0.764008,0.249820,0.487869,0.487722,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431
763,996,0.937592,0.333683,0.171502,0.162668,0.275346,0.571015,0.886463,0.188870,0.232506,...,0.744298,0.347816,0.504947,0.673577,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178
764,997,0.927177,0.428588,0.171490,0.044242,0.185788,0.793285,0.870624,0.456700,0.278026,...,0.795206,0.280868,0.446266,0.590639,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099
765,999,0.969786,0.398838,0.151398,0.044676,0.197835,0.915269,0.927815,0.720355,0.184882,...,0.762783,0.304284,0.384011,0.610316,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641


Essentia All + openSMILE eGeMAPS

In [96]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_all_opensmile_egemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_all_opensmile_egemaps), columns=df_pmemo_integrated_essentia_all_opensmile_egemaps.columns)

df_pmemo_integrated_essentia_all_opensmile_egemaps_normalised = df_pmemo_integrated_essentia_all_opensmile_egemaps_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_all_opensmile_egemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_all_opensmile_egemaps_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_all_opensmile_egemaps_features.csv'))

df_pmemo_integrated_essentia_all_opensmile_egemaps_normalised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_crest.mean,lowlevel.barkbands_flatness_db.mean,lowlevel.barkbands_kurtosis.mean,lowlevel.barkbands_skewness.mean,lowlevel.barkbands_spread.mean,lowlevel.dissonance.mean,lowlevel.dynamic_complexity,lowlevel.erbbands_crest.mean,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,0.984985,0.399331,0.312101,0.092938,0.244844,0.579073,0.801952,0.162367,0.379782,...,0.360946,0.477083,0.354845,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829,0.644904
1,4,0.952336,0.583063,0.725248,0.500226,0.681324,0.071979,0.652588,0.140435,0.549745,...,0.194518,0.197515,0.997077,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000,0.893336
2,5,0.944813,0.842132,0.657624,0.808006,0.789879,0.188384,0.721677,0.204492,0.371161,...,0.227733,0.398985,0.482067,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709,0.827230
3,6,0.968798,0.774950,0.492522,0.206105,0.465465,0.355408,0.787135,0.284071,0.404039,...,0.155392,0.542070,0.367544,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536,0.827628
4,7,0.985253,0.318126,0.213703,0.076222,0.238393,0.434349,0.762188,0.093188,0.314749,...,0.612921,0.377957,0.459692,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670,0.822717
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.914366,0.386337,0.253315,0.134541,0.257252,0.429802,0.775551,0.189531,0.397602,...,0.487869,0.487722,0.581421,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431,0.822125
763,996,0.937592,0.333683,0.171502,0.162668,0.275346,0.571015,0.886463,0.188870,0.232506,...,0.504947,0.673577,0.573298,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178,0.852123
764,997,0.927177,0.428588,0.171490,0.044242,0.185788,0.793285,0.870624,0.456700,0.278026,...,0.446266,0.590639,0.423028,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099,0.822882
765,999,0.969786,0.398838,0.151398,0.044676,0.197835,0.915269,0.927815,0.720355,0.184882,...,0.384011,0.610316,0.595691,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641,0.889240


Essentia Best Overall + openSMILE ComParE2016

In [97]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_overall_opensmile_compare2016), columns=df_pmemo_integrated_essentia_best_overall_opensmile_compare2016.columns)

df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_normalised = df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_best_overall_opensmile_compare2016_features.csv'))

df_pmemo_integrated_essentia_best_overall_opensmile_compare2016_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,1,0.064362,0.214289,0.125964,0.267193,0.482615,0.533418,0.570785,0.423573,0.085510,...,0.303279,0.399650,0.392199,0.392725,0.000000e+00,0.617706,0.370658,0.244224,0.351489,0.401564
1,4,0.345452,0.539877,0.949078,0.304507,0.461919,0.163914,0.158169,0.721330,0.116353,...,0.171655,0.304287,0.395144,0.395924,2.750000e-07,0.619267,0.308409,0.446880,0.316289,0.365470
2,5,0.566046,0.636537,0.525953,0.464434,0.609828,0.714130,0.649047,0.750001,0.098333,...,0.296765,0.547101,0.326090,0.325708,1.000000e+00,0.431744,0.306564,0.316406,0.362612,0.299804
3,6,0.168166,0.426521,0.278434,0.362823,0.392684,0.381464,0.311019,0.736078,0.112022,...,0.274015,0.542942,0.280851,0.281222,0.000000e+00,0.316465,0.221165,0.212899,0.246664,0.257999
4,7,0.050890,0.193447,0.201743,0.445732,0.675341,0.746939,0.781565,0.814618,0.089398,...,0.201938,0.359877,0.238556,0.241381,7.192500e-04,0.444872,0.169534,0.224464,0.149222,0.150258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.058467,0.186511,0.184759,0.555560,0.580548,0.612516,0.656733,0.498154,0.419939,...,0.305339,0.378964,0.381056,0.381028,1.000000e+00,0.652887,0.302969,0.449552,0.361547,0.410599
763,996,0.121857,0.234562,0.179638,0.552154,0.660622,0.638703,0.659746,0.145396,0.362563,...,0.340795,0.454177,0.326786,0.326812,0.000000e+00,0.574858,0.416359,0.420200,0.355516,0.274867
764,997,0.046206,0.200557,0.195031,0.144097,0.571115,0.554702,0.580168,0.489563,0.143965,...,0.328619,0.620072,0.240284,0.240155,1.000000e+00,0.358409,0.307213,0.314944,0.268875,0.333213
765,999,0.063658,0.238118,0.238011,0.136955,0.490124,0.476657,0.512086,0.646732,0.018598,...,0.198969,0.484409,0.166886,0.166620,1.000000e+00,0.470009,0.230894,0.267673,0.217943,0.279448


Essentia Best Overall + openSMILE emobase

In [98]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_overall_opensmile_emobase_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_overall_opensmile_emobase), columns=df_pmemo_integrated_essentia_best_overall_opensmile_emobase.columns)

df_pmemo_integrated_essentia_best_overall_opensmile_emobase_normalised = df_pmemo_integrated_essentia_best_overall_opensmile_emobase_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_overall_opensmile_emobase_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_overall_opensmile_emobase_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_best_overall_opensmile_emobase_features.csv'))

df_pmemo_integrated_essentia_best_overall_opensmile_emobase_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,F0env_sma_de_linregerrQ,F0env_sma_de_stddev,F0env_sma_de_skewness,F0env_sma_de_kurtosis,F0env_sma_de_quartile1,F0env_sma_de_quartile2,F0env_sma_de_quartile3,F0env_sma_de_iqr1-2,F0env_sma_de_iqr2-3,F0env_sma_de_iqr1-3
0,1,0.064362,0.214289,0.125964,0.267193,0.482615,0.533418,0.570785,0.423573,0.085510,...,0.403700,0.588364,0.125589,0.047908,0.758451,1.0,0.203203,0.243753,0.203203,0.229604
1,4,0.345452,0.539877,0.949078,0.304507,0.461919,0.163914,0.158169,0.721330,0.116353,...,0.096648,0.233917,0.067041,0.087292,1.000000,1.0,0.000000,0.000000,0.000000,0.000000
2,5,0.566046,0.636537,0.525953,0.464434,0.609828,0.714130,0.649047,0.750001,0.098333,...,0.527297,0.689686,0.048732,0.004805,0.576701,1.0,0.320631,0.427163,0.320631,0.384180
3,6,0.168166,0.426521,0.278434,0.362823,0.392684,0.381464,0.311019,0.736078,0.112022,...,0.704682,0.817662,0.028320,0.000000,0.266937,1.0,0.810709,0.739754,0.810709,0.796290
4,7,0.050890,0.193447,0.201743,0.445732,0.675341,0.746939,0.781565,0.814618,0.089398,...,0.258699,0.447329,0.151428,0.070450,0.964792,1.0,0.013987,0.035529,0.013987,0.025452
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.058467,0.186511,0.184759,0.555560,0.580548,0.612516,0.656733,0.498154,0.419939,...,0.345910,0.535419,0.087078,0.049667,0.985953,1.0,0.126867,0.014175,0.126867,0.072341
763,996,0.121857,0.234562,0.179638,0.552154,0.660622,0.638703,0.659746,0.145396,0.362563,...,0.262732,0.451840,0.039029,0.016703,1.000000,1.0,0.000000,0.000000,0.000000,0.000000
764,997,0.046206,0.200557,0.195031,0.144097,0.571115,0.554702,0.580168,0.489563,0.143965,...,0.262670,0.451447,0.138694,0.063369,0.900128,1.0,0.039529,0.100783,0.039529,0.072123
765,999,0.063658,0.238118,0.238011,0.136955,0.490124,0.476657,0.512086,0.646732,0.018598,...,0.352459,0.541678,0.073032,0.037717,1.000000,1.0,0.002002,0.000000,0.002002,0.001027


Essentia Best Overall + openSMILE GeMAPS

In [99]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_overall_opensmile_gemaps), columns=df_pmemo_integrated_essentia_best_overall_opensmile_gemaps.columns)

df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_normalised = df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_best_overall_opensmile_gemaps_features.csv'))

df_pmemo_integrated_essentia_best_overall_opensmile_gemaps_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.064362,0.214289,0.125964,0.267193,0.482615,0.533418,0.570785,0.423573,0.085510,...,0.655572,0.388723,0.360946,0.477083,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829
1,4,0.345452,0.539877,0.949078,0.304507,0.461919,0.163914,0.158169,0.721330,0.116353,...,0.188228,0.814309,0.194518,0.197515,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000
2,5,0.566046,0.636537,0.525953,0.464434,0.609828,0.714130,0.649047,0.750001,0.098333,...,0.222227,0.754362,0.227733,0.398985,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709
3,6,0.168166,0.426521,0.278434,0.362823,0.392684,0.381464,0.311019,0.736078,0.112022,...,0.519987,0.504598,0.155392,0.542070,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536
4,7,0.050890,0.193447,0.201743,0.445732,0.675341,0.746939,0.781565,0.814618,0.089398,...,0.738294,0.247457,0.612921,0.377957,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.058467,0.186511,0.184759,0.555560,0.580548,0.612516,0.656733,0.498154,0.419939,...,0.764008,0.249820,0.487869,0.487722,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431
763,996,0.121857,0.234562,0.179638,0.552154,0.660622,0.638703,0.659746,0.145396,0.362563,...,0.744298,0.347816,0.504947,0.673577,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178
764,997,0.046206,0.200557,0.195031,0.144097,0.571115,0.554702,0.580168,0.489563,0.143965,...,0.795206,0.280868,0.446266,0.590639,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099
765,999,0.063658,0.238118,0.238011,0.136955,0.490124,0.476657,0.512086,0.646732,0.018598,...,0.762783,0.304284,0.384011,0.610316,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641


Essentia Best Overall + openSMILE eGeMAPS

In [100]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_overall_opensmile_egemaps), columns=df_pmemo_integrated_essentia_best_overall_opensmile_egemaps.columns)

df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_normalised = df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_best_overall_opensmile_egemaps_features.csv'))

df_pmemo_integrated_essentia_best_overall_opensmile_egemaps_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,0.064362,0.214289,0.125964,0.267193,0.482615,0.533418,0.570785,0.423573,0.085510,...,0.360946,0.477083,0.354845,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829,0.644904
1,4,0.345452,0.539877,0.949078,0.304507,0.461919,0.163914,0.158169,0.721330,0.116353,...,0.194518,0.197515,0.997077,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000,0.893336
2,5,0.566046,0.636537,0.525953,0.464434,0.609828,0.714130,0.649047,0.750001,0.098333,...,0.227733,0.398985,0.482067,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709,0.827230
3,6,0.168166,0.426521,0.278434,0.362823,0.392684,0.381464,0.311019,0.736078,0.112022,...,0.155392,0.542070,0.367544,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536,0.827628
4,7,0.050890,0.193447,0.201743,0.445732,0.675341,0.746939,0.781565,0.814618,0.089398,...,0.612921,0.377957,0.459692,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670,0.822717
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.058467,0.186511,0.184759,0.555560,0.580548,0.612516,0.656733,0.498154,0.419939,...,0.487869,0.487722,0.581421,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431,0.822125
763,996,0.121857,0.234562,0.179638,0.552154,0.660622,0.638703,0.659746,0.145396,0.362563,...,0.504947,0.673577,0.573298,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178,0.852123
764,997,0.046206,0.200557,0.195031,0.144097,0.571115,0.554702,0.580168,0.489563,0.143965,...,0.446266,0.590639,0.423028,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099,0.822882
765,999,0.063658,0.238118,0.238011,0.136955,0.490124,0.476657,0.512086,0.646732,0.018598,...,0.384011,0.610316,0.595691,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641,0.889240


Essentia Best Overall

In [101]:
# Fit and transform the selected columns
df_essentia_best_overall_features_normalised = pd.DataFrame(scaler.fit_transform(df_essentia_best_overall_features), columns=df_essentia_best_overall_features.columns)

df_essentia_best_overall_features_normalised = df_essentia_best_overall_features_normalised.drop('song_id', axis=1)
df_essentia_best_overall_features_normalised.insert(0, column='song_id', value=song_ids)

df_essentia_best_overall_features_normalised.to_csv(get_pmemo_path('processed/features/normalised_essentia_best_overall_features.csv'))

df_essentia_best_overall_features_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,0.064362,0.214289,0.125964,0.267193,0.482615,0.533418,0.570785,0.423573,0.085510,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.089160,0.000000,0.223899,0.098657,0.288181
1,4,0.345452,0.539877,0.949078,0.304507,0.461919,0.163914,0.158169,0.721330,0.116353,...,0.016972,0.077021,0.402838,0.000000,0.030457,0.117746,0.000000,0.107674,0.000000,0.155779
2,5,0.566046,0.636537,0.525953,0.464434,0.609828,0.714130,0.649047,0.750001,0.098333,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.008505,0.007329,0.000000,0.000000,0.000000
3,6,0.168166,0.426521,0.278434,0.362823,0.392684,0.381464,0.311019,0.736078,0.112022,...,0.000000,0.063161,0.000000,0.000000,0.000000,0.000000,0.000000,0.051213,0.000000,0.000000
4,7,0.050890,0.193447,0.201743,0.445732,0.675341,0.746939,0.781565,0.814618,0.089398,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.142857,0.165104,0.096976
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.058467,0.186511,0.184759,0.555560,0.580548,0.612516,0.656733,0.498154,0.419939,...,0.000000,0.000000,0.000000,0.031499,0.000000,0.000000,0.000000,0.013240,0.156009,0.863862
763,996,0.121857,0.234562,0.179638,0.552154,0.660622,0.638703,0.659746,0.145396,0.362563,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.044460,0.016906
764,997,0.046206,0.200557,0.195031,0.144097,0.571115,0.554702,0.580168,0.489563,0.143965,...,0.008208,0.025688,0.000000,0.053602,0.000000,0.027702,0.000000,0.149709,0.035320,0.149458
765,999,0.063658,0.238118,0.238011,0.136955,0.490124,0.476657,0.512086,0.646732,0.018598,...,0.000000,0.000000,0.000000,0.000000,0.005471,0.000000,0.051231,0.000000,0.112450,0.019859


Essentia Best Valence

In [102]:
# Fit and transform the selected columns
df_essentia_best_valence_features_normalised = pd.DataFrame(scaler.fit_transform(df_essentia_best_valence_features), columns=df_essentia_best_valence_features.columns)

df_essentia_best_valence_features_normalised = df_essentia_best_valence_features_normalised.drop('song_id', axis=1)
df_essentia_best_valence_features_normalised.insert(0, column='song_id', value=song_ids)

df_essentia_best_valence_features_normalised.to_csv(get_pmemo_path('processed/features/normalised_essentia_best_valence_features.csv'))

df_essentia_best_valence_features_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,0.064362,0.214289,0.125964,0.416985,0.086404,0.810704,0.267193,0.482615,0.533418,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.089160,0.000000,0.223899,0.098657,0.288181
1,4,0.345452,0.539877,0.949078,0.050339,0.462009,0.795216,0.304507,0.461919,0.163914,...,0.016972,0.077021,0.402838,0.000000,0.030457,0.117746,0.000000,0.107674,0.000000,0.155779
2,5,0.566046,0.636537,0.525953,0.171160,0.308063,0.844622,0.464434,0.609828,0.714130,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.008505,0.007329,0.000000,0.000000,0.000000
3,6,0.168166,0.426521,0.278434,0.265749,0.139343,0.765399,0.362823,0.392684,0.381464,...,0.000000,0.063161,0.000000,0.000000,0.000000,0.000000,0.000000,0.051213,0.000000,0.000000
4,7,0.050890,0.193447,0.201743,0.447316,0.231818,0.578654,0.445732,0.675341,0.746939,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.142857,0.165104,0.096976
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.058467,0.186511,0.184759,0.536035,0.124013,0.551773,0.555560,0.580548,0.612516,...,0.000000,0.000000,0.000000,0.031499,0.000000,0.000000,0.000000,0.013240,0.156009,0.863862
763,996,0.121857,0.234562,0.179638,0.633059,0.073642,0.674339,0.552154,0.660622,0.638703,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.044460,0.016906
764,997,0.046206,0.200557,0.195031,0.620730,0.097882,0.520823,0.144097,0.571115,0.554702,...,0.008208,0.025688,0.000000,0.053602,0.000000,0.027702,0.000000,0.149709,0.035320,0.149458
765,999,0.063658,0.238118,0.238011,0.658759,0.140357,0.433236,0.136955,0.490124,0.476657,...,0.000000,0.000000,0.000000,0.000000,0.005471,0.000000,0.051231,0.000000,0.112450,0.019859


Essentia Best Arousal

In [103]:
# Fit and transform the selected columns
df_essentia_best_arousal_features_normalised = pd.DataFrame(scaler.fit_transform(df_essentia_best_arousal_features), columns=df_essentia_best_arousal_features.columns)

df_essentia_best_arousal_features_normalised = df_essentia_best_arousal_features_normalised.drop('song_id', axis=1)
df_essentia_best_arousal_features_normalised.insert(0, column='song_id', value=song_ids)

df_essentia_best_arousal_features_normalised.to_csv(get_pmemo_path('processed/features/normalised_essentia_best_arousal_features.csv'))

df_essentia_best_arousal_features_normalised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,1,0.984985,0.579073,0.466983,0.314570,0.064362,0.214289,0.548723,0.125964,0.763577,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.089160,0.000000,0.223899,0.098657,0.288181
1,4,0.952336,0.071979,0.811687,0.690897,0.345452,0.539877,0.075821,0.949078,0.593323,...,0.016972,0.077021,0.402838,0.000000,0.030457,0.117746,0.000000,0.107674,0.000000,0.155779
2,5,0.944813,0.188384,0.658175,0.666715,0.566046,0.636537,0.194606,0.525953,0.615791,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.008505,0.007329,0.000000,0.000000,0.000000
3,6,0.968798,0.355408,0.721706,0.523877,0.168166,0.426521,0.216854,0.278434,0.710481,...,0.000000,0.063161,0.000000,0.000000,0.000000,0.000000,0.000000,0.051213,0.000000,0.000000
4,7,0.985253,0.434349,0.197786,0.252470,0.050890,0.193447,0.451415,0.201743,0.764426,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.142857,0.165104,0.096976
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.914366,0.429802,0.301846,0.288119,0.058467,0.186511,0.460787,0.184759,0.768472,...,0.000000,0.000000,0.000000,0.031499,0.000000,0.000000,0.000000,0.013240,0.156009,0.863862
763,996,0.937592,0.571015,0.235807,0.188595,0.121857,0.234562,0.547003,0.179638,0.943027,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.044460,0.016906
764,997,0.927177,0.793285,0.353024,0.220160,0.046206,0.200557,0.623593,0.195031,0.925110,...,0.008208,0.025688,0.000000,0.053602,0.000000,0.027702,0.000000,0.149709,0.035320,0.149458
765,999,0.969786,0.915269,0.395830,0.204894,0.063658,0.238118,0.586678,0.238011,1.000000,...,0.000000,0.000000,0.000000,0.000000,0.005471,0.000000,0.051231,0.000000,0.112450,0.019859


Essentia Best Valence + openSMILE GeMAPS

In [104]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_valence_opensmile_gemaps), columns=df_pmemo_integrated_essentia_best_valence_opensmile_gemaps.columns)

df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_normalised = df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_best_valence_opensmile_gemaps_features.csv'))

df_pmemo_integrated_essentia_best_valence_opensmile_gemaps_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.064362,0.214289,0.125964,0.416985,0.086404,0.810704,0.267193,0.482615,0.533418,...,0.655572,0.388723,0.360946,0.477083,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829
1,4,0.345452,0.539877,0.949078,0.050339,0.462009,0.795216,0.304507,0.461919,0.163914,...,0.188228,0.814309,0.194518,0.197515,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000
2,5,0.566046,0.636537,0.525953,0.171160,0.308063,0.844622,0.464434,0.609828,0.714130,...,0.222227,0.754362,0.227733,0.398985,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709
3,6,0.168166,0.426521,0.278434,0.265749,0.139343,0.765399,0.362823,0.392684,0.381464,...,0.519987,0.504598,0.155392,0.542070,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536
4,7,0.050890,0.193447,0.201743,0.447316,0.231818,0.578654,0.445732,0.675341,0.746939,...,0.738294,0.247457,0.612921,0.377957,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.058467,0.186511,0.184759,0.536035,0.124013,0.551773,0.555560,0.580548,0.612516,...,0.764008,0.249820,0.487869,0.487722,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431
763,996,0.121857,0.234562,0.179638,0.633059,0.073642,0.674339,0.552154,0.660622,0.638703,...,0.744298,0.347816,0.504947,0.673577,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178
764,997,0.046206,0.200557,0.195031,0.620730,0.097882,0.520823,0.144097,0.571115,0.554702,...,0.795206,0.280868,0.446266,0.590639,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099
765,999,0.063658,0.238118,0.238011,0.658759,0.140357,0.433236,0.136955,0.490124,0.476657,...,0.762783,0.304284,0.384011,0.610316,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641


Essentia Best Valence + openSMILE eGeMAPS

In [105]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_valence_opensmile_egemaps), columns=df_pmemo_integrated_essentia_best_valence_opensmile_egemaps.columns)

df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_normalised = df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_best_valence_opensmile_egemaps_features.csv'))

df_pmemo_integrated_essentia_best_valence_opensmile_egemaps_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,0.064362,0.214289,0.125964,0.416985,0.086404,0.810704,0.267193,0.482615,0.533418,...,0.360946,0.477083,0.354845,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829,0.644904
1,4,0.345452,0.539877,0.949078,0.050339,0.462009,0.795216,0.304507,0.461919,0.163914,...,0.194518,0.197515,0.997077,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000,0.893336
2,5,0.566046,0.636537,0.525953,0.171160,0.308063,0.844622,0.464434,0.609828,0.714130,...,0.227733,0.398985,0.482067,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709,0.827230
3,6,0.168166,0.426521,0.278434,0.265749,0.139343,0.765399,0.362823,0.392684,0.381464,...,0.155392,0.542070,0.367544,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536,0.827628
4,7,0.050890,0.193447,0.201743,0.447316,0.231818,0.578654,0.445732,0.675341,0.746939,...,0.612921,0.377957,0.459692,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670,0.822717
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.058467,0.186511,0.184759,0.536035,0.124013,0.551773,0.555560,0.580548,0.612516,...,0.487869,0.487722,0.581421,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431,0.822125
763,996,0.121857,0.234562,0.179638,0.633059,0.073642,0.674339,0.552154,0.660622,0.638703,...,0.504947,0.673577,0.573298,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178,0.852123
764,997,0.046206,0.200557,0.195031,0.620730,0.097882,0.520823,0.144097,0.571115,0.554702,...,0.446266,0.590639,0.423028,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099,0.822882
765,999,0.063658,0.238118,0.238011,0.658759,0.140357,0.433236,0.136955,0.490124,0.476657,...,0.384011,0.610316,0.595691,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641,0.889240


Essentia Best Arousal + openSMILE GeMAPS

In [106]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps), columns=df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps.columns)

df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_normalised = df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_best_arousal_opensmile_gemaps_features.csv'))

df_pmemo_integrated_essentia_best_arousal_opensmile_gemaps_normalised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.984985,0.579073,0.466983,0.314570,0.064362,0.214289,0.548723,0.125964,0.763577,...,0.655572,0.388723,0.360946,0.477083,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829
1,4,0.952336,0.071979,0.811687,0.690897,0.345452,0.539877,0.075821,0.949078,0.593323,...,0.188228,0.814309,0.194518,0.197515,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000
2,5,0.944813,0.188384,0.658175,0.666715,0.566046,0.636537,0.194606,0.525953,0.615791,...,0.222227,0.754362,0.227733,0.398985,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709
3,6,0.968798,0.355408,0.721706,0.523877,0.168166,0.426521,0.216854,0.278434,0.710481,...,0.519987,0.504598,0.155392,0.542070,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536
4,7,0.985253,0.434349,0.197786,0.252470,0.050890,0.193447,0.451415,0.201743,0.764426,...,0.738294,0.247457,0.612921,0.377957,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.914366,0.429802,0.301846,0.288119,0.058467,0.186511,0.460787,0.184759,0.768472,...,0.764008,0.249820,0.487869,0.487722,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431
763,996,0.937592,0.571015,0.235807,0.188595,0.121857,0.234562,0.547003,0.179638,0.943027,...,0.744298,0.347816,0.504947,0.673577,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178
764,997,0.927177,0.793285,0.353024,0.220160,0.046206,0.200557,0.623593,0.195031,0.925110,...,0.795206,0.280868,0.446266,0.590639,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099
765,999,0.969786,0.915269,0.395830,0.204894,0.063658,0.238118,0.586678,0.238011,1.000000,...,0.762783,0.304284,0.384011,0.610316,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641


Essentia Best Arousal + openSMILE eGeMAPS

In [107]:
# Fit and transform the selected columns
df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps), columns=df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps.columns)

df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_normalised = df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_normalised.drop('song_id', axis=1)
df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_normalised.to_csv(get_pmemo_path('processed/features/integrated/normalised_essentia_best_arousal_opensmile_egemaps_features.csv'))

df_pmemo_integrated_essentia_best_arousal_opensmile_egemaps_normalised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,1,0.984985,0.579073,0.466983,0.314570,0.064362,0.214289,0.548723,0.125964,0.763577,...,0.360946,0.477083,0.354845,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829,0.644904
1,4,0.952336,0.071979,0.811687,0.690897,0.345452,0.539877,0.075821,0.949078,0.593323,...,0.194518,0.197515,0.997077,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000,0.893336
2,5,0.944813,0.188384,0.658175,0.666715,0.566046,0.636537,0.194606,0.525953,0.615791,...,0.227733,0.398985,0.482067,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709,0.827230
3,6,0.968798,0.355408,0.721706,0.523877,0.168166,0.426521,0.216854,0.278434,0.710481,...,0.155392,0.542070,0.367544,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536,0.827628
4,7,0.985253,0.434349,0.197786,0.252470,0.050890,0.193447,0.451415,0.201743,0.764426,...,0.612921,0.377957,0.459692,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670,0.822717
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.914366,0.429802,0.301846,0.288119,0.058467,0.186511,0.460787,0.184759,0.768472,...,0.487869,0.487722,0.581421,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431,0.822125
763,996,0.937592,0.571015,0.235807,0.188595,0.121857,0.234562,0.547003,0.179638,0.943027,...,0.504947,0.673577,0.573298,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178,0.852123
764,997,0.927177,0.793285,0.353024,0.220160,0.046206,0.200557,0.623593,0.195031,0.925110,...,0.446266,0.590639,0.423028,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099,0.822882
765,999,0.969786,0.915269,0.395830,0.204894,0.063658,0.238118,0.586678,0.238011,1.000000,...,0.384011,0.610316,0.595691,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641,0.889240


openSMILE GeMAPS

In [108]:
# Fit and transform the selected columns
df_pmemo_opensmile_gemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_opensmile_gemaps_features), columns=df_pmemo_opensmile_gemaps_features.columns)

df_pmemo_opensmile_gemaps_normalised = df_pmemo_opensmile_gemaps_normalised.drop('song_id', axis=1)
df_pmemo_opensmile_gemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_opensmile_gemaps_normalised.to_csv(get_pmemo_path('processed/features/normalised_opensmile_gemaps_features.csv'))

df_pmemo_opensmile_gemaps_normalised

Unnamed: 0,song_id,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.612197,0.346402,0.561734,0.687943,0.685819,0.392799,0.217347,0.293118,0.233743,...,0.655572,0.388723,0.360946,0.477083,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829
1,4,0.456603,0.284361,0.432422,0.478716,0.511988,0.301664,0.112021,0.060526,0.019030,...,0.188228,0.814309,0.194518,0.197515,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000
2,5,0.072656,0.611982,0.008644,0.000000,0.246597,0.320703,0.097528,0.138855,0.117965,...,0.222227,0.754362,0.227733,0.398985,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709
3,6,0.406944,0.617088,0.073414,0.378155,0.753616,0.804822,0.222438,0.168709,0.112166,...,0.519987,0.504598,0.155392,0.542070,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536
4,7,0.429760,0.404707,0.192817,0.539444,0.563761,0.523155,0.239504,0.276283,0.172527,...,0.738294,0.247457,0.612921,0.377957,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.454112,0.613801,0.162800,0.362510,0.786637,0.776836,0.529353,0.669689,0.444971,...,0.764008,0.249820,0.487869,0.487722,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431
763,996,0.666339,0.374789,0.438874,0.804797,0.753139,0.548943,0.244133,0.292012,0.232846,...,0.744298,0.347816,0.504947,0.673577,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178
764,997,0.285413,0.735756,0.063253,0.122272,0.591427,0.642580,0.258396,0.466979,0.509887,...,0.795206,0.280868,0.446266,0.590639,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099
765,999,0.448246,0.760840,0.081846,0.248221,0.907304,0.959395,0.664894,0.667875,0.249886,...,0.762783,0.304284,0.384011,0.610316,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641


openSMILE eGeMAPS

In [109]:
# Fit and transform the selected columns
df_pmemo_opensmile_egemaps_normalised = pd.DataFrame(scaler.fit_transform(df_pmemo_opensmile_gemaps_features), columns=df_pmemo_opensmile_gemaps_features.columns)

df_pmemo_opensmile_egemaps_normalised = df_pmemo_opensmile_egemaps_normalised.drop('song_id', axis=1)
df_pmemo_opensmile_egemaps_normalised.insert(0, column='song_id', value=song_ids)

df_pmemo_opensmile_egemaps_normalised.to_csv(get_pmemo_path('processed/features/normalised_opensmile_egemaps_features.csv'))

df_pmemo_opensmile_egemaps_normalised

Unnamed: 0,song_id,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,1,0.612197,0.346402,0.561734,0.687943,0.685819,0.392799,0.217347,0.293118,0.233743,...,0.655572,0.388723,0.360946,0.477083,0.618233,0.568024,0.012333,0.010856,0.092155,0.066829
1,4,0.456603,0.284361,0.432422,0.478716,0.511988,0.301664,0.112021,0.060526,0.019030,...,0.188228,0.814309,0.194518,0.197515,0.719396,0.075804,0.038816,0.059588,1.000000,1.000000
2,5,0.072656,0.611982,0.008644,0.000000,0.246597,0.320703,0.097528,0.138855,0.117965,...,0.222227,0.754362,0.227733,0.398985,0.719487,0.395234,0.023807,0.022551,0.108542,0.109709
3,6,0.406944,0.617088,0.073414,0.378155,0.753616,0.804822,0.222438,0.168709,0.112166,...,0.519987,0.504598,0.155392,0.542070,0.607535,0.304105,0.005747,0.007682,0.347203,0.335536
4,7,0.429760,0.404707,0.192817,0.539444,0.563761,0.523155,0.239504,0.276283,0.172527,...,0.738294,0.247457,0.612921,0.377957,0.572874,0.442982,0.027635,0.023468,0.038398,0.029670
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,993,0.454112,0.613801,0.162800,0.362510,0.786637,0.776836,0.529353,0.669689,0.444971,...,0.764008,0.249820,0.487869,0.487722,0.471706,0.561126,0.014875,0.047375,0.075363,0.064431
763,996,0.666339,0.374789,0.438874,0.804797,0.753139,0.548943,0.244133,0.292012,0.232846,...,0.744298,0.347816,0.504947,0.673577,0.598725,0.638633,0.009723,0.009633,0.088765,0.082178
764,997,0.285413,0.735756,0.063253,0.122272,0.591427,0.642580,0.258396,0.466979,0.509887,...,0.795206,0.280868,0.446266,0.590639,0.510169,0.532011,0.016340,0.019502,0.080065,0.163099
765,999,0.448246,0.760840,0.081846,0.248221,0.907304,0.959395,0.664894,0.667875,0.249886,...,0.762783,0.304284,0.384011,0.610316,0.377443,0.643245,0.008016,0.018335,0.099848,0.240641
