# Dataset Combination - Combining DEAM (2013 & 2014) and PMEmo datasets
For a larger dataset for training

## Import the relevant libraries

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import sys

## Combining annotations

Import DEAM annotations

In [2]:
df_deam_annotations = pd.read_csv('../data/DEAM/processed/annotations/deam_static_annotations.csv')
df_deam_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,2,-0.475,-0.500
1,3,-0.375,-0.425
2,4,0.175,0.125
3,5,-0.150,0.075
4,7,0.200,0.350
...,...,...,...
1739,1996,-0.275,0.225
1740,1997,0.075,-0.275
1741,1998,0.350,0.300
1742,1999,-0.100,0.100


Add a "deam_" tag infront of the song IDs

In [3]:
df_deam_annotations['song_id'] = df_deam_annotations['song_id'].apply(lambda x: f'deam_{x}')
df_deam_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,deam_2,-0.475,-0.500
1,deam_3,-0.375,-0.425
2,deam_4,0.175,0.125
3,deam_5,-0.150,0.075
4,deam_7,0.200,0.350
...,...,...,...
1739,deam_1996,-0.275,0.225
1740,deam_1997,0.075,-0.275
1741,deam_1998,0.350,0.300
1742,deam_1999,-0.100,0.100


Import PMEmo anotations

In [4]:
df_pmemo_annotations = pd.read_csv('../data/PMEmo/PMEmo2019/processed/annotations/pmemo_static_annotations.csv')
df_pmemo_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,1,0.150,-0.200
1,4,-0.425,-0.475
2,5,-0.600,-0.700
3,6,-0.300,0.025
4,7,0.450,0.400
...,...,...,...
762,993,0.525,0.725
763,996,0.125,0.750
764,997,0.325,0.425
765,999,0.550,0.750


Add a "pmemo_" tag infront of the song IDs

In [5]:
df_pmemo_annotations['song_id'] = df_pmemo_annotations['song_id'].apply(lambda x: f'pmemo_{x}')
df_pmemo_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,pmemo_1,0.150,-0.200
1,pmemo_4,-0.425,-0.475
2,pmemo_5,-0.600,-0.700
3,pmemo_6,-0.300,0.025
4,pmemo_7,0.450,0.400
...,...,...,...
762,pmemo_993,0.525,0.725
763,pmemo_996,0.125,0.750
764,pmemo_997,0.325,0.425
765,pmemo_999,0.550,0.750


Combine the annotations and export as .csv

In [6]:
df_combined_annotations = pd.concat([df_deam_annotations, df_pmemo_annotations])
df_combined_annotations.to_csv('../data/combined/annotations/combined_static_annotations.csv', index=False)
df_combined_annotations

Unnamed: 0,song_id,valence_mean_mapped,arousal_mean_mapped
0,deam_2,-0.475,-0.500
1,deam_3,-0.375,-0.425
2,deam_4,0.175,0.125
3,deam_5,-0.150,0.075
4,deam_7,0.200,0.350
...,...,...,...
762,pmemo_993,0.525,0.725
763,pmemo_996,0.125,0.750
764,pmemo_997,0.325,0.425
765,pmemo_999,0.550,0.750


## Combining features

In [7]:
def standarise(df):
  scaler = StandardScaler()
  song_ids = df['song_id'].tolist()
  df = df.drop('song_id', axis=1)

  # Fit and transform the selected columns
  df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

  df.insert(0, column='song_id', value=song_ids)
  return df

In [8]:
def normalise(df):
  scaler = MinMaxScaler()
  song_ids = df['song_id'].tolist()
  df = df.drop('song_id', axis=1)

  # Fit and transform the selected columns
  df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

  df.insert(0, column='song_id', value=song_ids)
  return df

The combine_featuresets() function will also perform standardisation and normalisation and export both dataframes

In [9]:
def combine_featuresets(deam_df_path, pmemo_df_path, output_path, standardised_output_path, normalised_output_path):
    df_deam = pd.read_csv(deam_df_path)
    df_pmemo = pd.read_csv(pmemo_df_path)

    # drop Unnamed:0 column
    df_deam = df_deam[df_deam.columns[1:]]
    df_pmemo = df_pmemo[df_pmemo.columns[1:]]

    # modify song_id column values
    df_deam['song_id'] = df_deam['song_id'].apply(lambda x: f'deam_{x}')
    df_pmemo['song_id'] = df_pmemo['song_id'].apply(lambda x: f'pmemo_{x}')

    # combine the two datasets
    df_combined = pd.concat([df_deam, df_pmemo])
    df_combined.to_csv(output_path, index=False)

    # # standardisation and normalisation
    df_combined_standardised = standarise(df_combined)
    df_combined_normalised = normalise(df_combined)

    # # exporting dataframes to .csv
    df_combined_standardised.to_csv(standardised_output_path, index=False)
    df_combined_normalised.to_csv(normalised_output_path, index=False)
    return df_combined_standardised, df_combined_normalised

### Essentia Best Overall + openSMILE GeMAPS

#### Standardised

In [10]:
df_combined_standardised, df_combined_normalised = combine_featuresets(
  deam_df_path='../data/DEAM/processed/features/integrated/essentia_best_overall_opensmile_gemaps_features.csv',
  pmemo_df_path= '../data/PMEmo/PMEmo2019/processed/features/integrated/essentia_best_overall_opensmile_gemaps_features.csv',
  output_path="../data/combined/features/essentia_best_overall_opensmile_gemaps_features.csv",
  standardised_output_path='../data/combined/features/standardised_essentia_best_overall_opensmile_gemaps_features.csv',
  normalised_output_path='../data/combined/features/normalised_essentia_best_overall_opensmile_gemaps_features.csv'
)

df_combined_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,deam_2,-0.098468,0.301399,-0.188070,-0.553131,0.877751,-0.174041,-0.388034,0.508848,0.163214,...,0.263687,-0.248631,0.048631,0.736841,-0.595414,-1.199546,0.050806,0.668138,-0.528256,-0.493215
1,deam_3,1.491691,1.964323,3.026911,-0.483520,0.191593,0.148780,0.124470,2.014884,-0.733999,...,-1.850559,1.591040,-1.633036,0.546719,-1.009914,0.001778,-0.317587,-0.376464,0.334998,0.266694
2,deam_4,-0.253373,0.159197,0.425925,1.247970,0.274049,-0.374255,-0.524174,0.999264,-0.184880,...,-0.008521,-0.146095,-0.538296,0.793107,0.020517,-0.063150,-0.308189,-0.420158,-0.301602,-0.425140
3,deam_5,-0.309106,-0.272476,-0.470838,1.179777,-0.972127,1.550632,1.472049,-0.895004,1.140672,...,0.273298,-0.541866,1.709836,-1.393514,-0.648359,-1.231729,0.121023,0.554996,-0.509953,-0.422295
4,deam_7,2.189271,1.975926,2.738867,-1.960243,0.648447,-1.271741,-1.365386,2.158976,-0.966910,...,-2.470887,2.114570,-0.604276,0.484982,1.247287,-1.150882,-0.024365,0.500863,-0.470296,-0.367104
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,-0.225960,-0.229863,0.107514,1.156892,0.188517,0.170979,0.433127,0.118200,0.409695,...,0.630204,-0.641883,-0.108483,0.227677,-0.231259,0.853161,-0.344064,-0.318848,0.357957,0.212864
2507,pmemo_996,0.276125,0.154828,0.071388,1.135611,0.613240,0.306657,0.447987,-1.406353,0.161938,...,0.531978,-0.165747,-0.017226,1.168695,0.334053,1.158894,-0.351551,-0.501699,0.600170,0.477358
2508,pmemo_997,-0.323078,-0.117410,0.179979,-1.414053,0.138488,-0.128571,0.055583,0.081068,-0.781994,...,0.785680,-0.491029,-0.330795,0.748766,-0.060077,0.738318,-0.341934,-0.453887,0.442936,1.683393
2509,pmemo_999,-0.184841,0.183294,0.483195,-1.458675,-0.291097,-0.532937,-0.280126,0.760324,-1.323348,...,0.624099,-0.377259,-0.663460,0.848394,-0.650787,1.177086,-0.354033,-0.459541,0.800492,2.839080


#### Normalised

In [11]:
df_combined_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,alphaRatioUV_sma3nz_amean,hammarbergIndexUV_sma3nz_amean,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength
0,deam_2,0.023745,0.224209,0.054855,0.368995,0.631265,0.625203,0.588560,0.579839,0.267674,...,0.698817,0.312126,0.363082,0.600638,0.264131,0.049256,0.069750,0.164484,0.026330,0.014235
1,deam_3,0.082083,0.348482,0.211289,0.378784,0.543954,0.676631,0.672442,0.907102,0.118704,...,0.425407,0.569125,0.142183,0.581319,0.206415,0.329843,0.008219,0.024446,0.074093,0.056789
2,deam_4,0.018062,0.213582,0.084731,0.622282,0.554446,0.593308,0.566277,0.686407,0.209877,...,0.663616,0.326450,0.285985,0.606355,0.349895,0.314678,0.009789,0.018589,0.038871,0.018047
3,deam_5,0.016018,0.181322,0.041096,0.612692,0.395875,0.899957,0.893002,0.274781,0.429967,...,0.700060,0.271162,0.581293,0.384170,0.256759,0.041739,0.081478,0.149317,0.027343,0.018206
4,deam_7,0.107675,0.349349,0.197274,0.171114,0.602087,0.450332,0.428595,0.938413,0.080032,...,0.345188,0.642261,0.277318,0.575046,0.520714,0.060622,0.057195,0.142060,0.029537,0.021297
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,0.019068,0.184507,0.069237,0.609474,0.543563,0.680168,0.722960,0.494951,0.308598,...,0.746214,0.257190,0.342444,0.548901,0.314837,0.528696,0.003797,0.032170,0.075363,0.053774
2507,pmemo_996,0.037488,0.213255,0.067479,0.606481,0.597607,0.701782,0.725393,0.163664,0.267462,...,0.733512,0.323705,0.354431,0.644519,0.393553,0.600105,0.002546,0.007657,0.088765,0.068585
2508,pmemo_997,0.015505,0.192911,0.072763,0.247924,0.537197,0.632447,0.661167,0.486882,0.110735,...,0.766320,0.278264,0.313242,0.601849,0.338673,0.501873,0.004152,0.014067,0.080065,0.136122
2509,pmemo_999,0.020577,0.215383,0.087517,0.241649,0.482534,0.568029,0.606221,0.634485,0.020851,...,0.745425,0.294157,0.269544,0.611973,0.256421,0.604354,0.002132,0.013309,0.099848,0.200839


### Essentia Best Overall + openSMILE eGeMAPS

#### Standardised

In [12]:
df_combined_standardised, df_combined_normalised = combine_featuresets(
  deam_df_path='../data/DEAM/processed/features/integrated/essentia_best_overall_opensmile_egemaps_features.csv',
  pmemo_df_path= '../data/PMEmo/PMEmo2019/processed/features/integrated/essentia_best_overall_opensmile_egemaps_features.csv',
  output_path="../data/combined/features/essentia_best_overall_opensmile_egemaps_features.csv",
  standardised_output_path='../data/combined/features/standardised_essentia_best_overall_opensmile_egemaps_features.csv',
  normalised_output_path='../data/combined/features/normalised_essentia_best_overall_opensmile_egemaps_features.csv'
)

df_combined_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,deam_2,-0.098468,0.301399,-0.188070,-0.553131,0.877751,-0.174041,-0.388034,0.508848,0.163214,...,0.048631,0.736841,-0.487271,-0.595414,-1.199546,0.050806,0.668138,-0.528256,-0.493215,-0.509196
1,deam_3,1.491691,1.964323,3.026911,-0.483520,0.191593,0.148780,0.124470,2.014884,-0.733999,...,-1.633036,0.546719,-0.315073,-1.009914,0.001778,-0.317587,-0.376464,0.334998,0.266694,-0.194329
2,deam_4,-0.253373,0.159197,0.425925,1.247970,0.274049,-0.374255,-0.524174,0.999264,-0.184880,...,-0.538296,0.793107,0.488975,0.020517,-0.063150,-0.308189,-0.420158,-0.301602,-0.425140,0.520806
3,deam_5,-0.309106,-0.272476,-0.470838,1.179777,-0.972127,1.550632,1.472049,-0.895004,1.140672,...,1.709836,-1.393514,-0.231324,-0.648359,-1.231729,0.121023,0.554996,-0.509953,-0.422295,0.105572
4,deam_7,2.189271,1.975926,2.738867,-1.960243,0.648447,-1.271741,-1.365386,2.158976,-0.966910,...,-0.604276,0.484982,0.511068,1.247287,-1.150882,-0.024365,0.500863,-0.470296,-0.367104,1.068924
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,-0.225960,-0.229863,0.107514,1.156892,0.188517,0.170979,0.433127,0.118200,0.409695,...,-0.108483,0.227677,1.172888,-0.231259,0.853161,-0.344064,-0.318848,0.357957,0.212864,1.245824
2507,pmemo_996,0.276125,0.154828,0.071388,1.135611,0.613240,0.306657,0.447987,-1.406353,0.161938,...,-0.017226,1.168695,1.136856,0.334053,1.158894,-0.351551,-0.501699,0.600170,0.477358,1.380734
2508,pmemo_997,-0.323078,-0.117410,0.179979,-1.414053,0.138488,-0.128571,0.055583,0.081068,-0.781994,...,-0.330795,0.748766,0.470301,-0.060077,0.738318,-0.341934,-0.453887,0.442936,1.683393,1.249229
2509,pmemo_999,-0.184841,0.183294,0.483195,-1.458675,-0.291097,-0.532937,-0.280126,0.760324,-1.323348,...,-0.663460,0.848394,1.236183,-0.650787,1.177086,-0.354033,-0.459541,0.800492,2.839080,1.547665


#### Normalised

In [13]:
df_combined_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
0,deam_2,0.023745,0.224209,0.054855,0.368995,0.631265,0.625203,0.588560,0.579839,0.267674,...,0.363082,0.600638,0.181705,0.264131,0.049256,0.069750,0.164484,0.026330,0.014235,0.594429
1,deam_3,0.082083,0.348482,0.211289,0.378784,0.543954,0.676631,0.672442,0.907102,0.118704,...,0.142183,0.581319,0.212856,0.206415,0.329843,0.008219,0.024446,0.074093,0.056789,0.639348
2,deam_4,0.018062,0.213582,0.084731,0.622282,0.554446,0.593308,0.566277,0.686407,0.209877,...,0.285985,0.606355,0.358312,0.349895,0.314678,0.009789,0.018589,0.038871,0.018047,0.741370
3,deam_5,0.016018,0.181322,0.041096,0.612692,0.395875,0.899957,0.893002,0.274781,0.429967,...,0.581293,0.384170,0.228007,0.256759,0.041739,0.081478,0.149317,0.027343,0.018206,0.682132
4,deam_7,0.107675,0.349349,0.197274,0.171114,0.602087,0.450332,0.428595,0.938413,0.080032,...,0.277318,0.575046,0.362308,0.520714,0.060622,0.057195,0.142060,0.029537,0.021297,0.819566
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,0.019068,0.184507,0.069237,0.609474,0.543563,0.680168,0.722960,0.494951,0.308598,...,0.342444,0.548901,0.482034,0.314837,0.528696,0.003797,0.032170,0.075363,0.053774,0.844803
2507,pmemo_996,0.037488,0.213255,0.067479,0.606481,0.597607,0.701782,0.725393,0.163664,0.267462,...,0.354431,0.644519,0.475516,0.393553,0.600105,0.002546,0.007657,0.088765,0.068585,0.864049
2508,pmemo_997,0.015505,0.192911,0.072763,0.247924,0.537197,0.632447,0.661167,0.486882,0.110735,...,0.313242,0.601849,0.354933,0.338673,0.501873,0.004152,0.014067,0.080065,0.136122,0.845288
2509,pmemo_999,0.020577,0.215383,0.087517,0.241649,0.482534,0.568029,0.606221,0.634485,0.020851,...,0.269544,0.611973,0.493484,0.256421,0.604354,0.002132,0.013309,0.099848,0.200839,0.887864


### Essentia Best Overall Mean

#### Standardised

In [14]:
df_combined_standardised, df_combined_normalised = combine_featuresets(
  deam_df_path='../data/DEAM/processed/features/essentia_best_overall_features.csv',
  pmemo_df_path= '../data/PMEmo/PMEmo2019/processed/features/essentia_best_overall_features.csv',
  output_path="../data/combined/features/essentia_best_overall_features.csv",
  standardised_output_path='../data/combined/features/standardised_essentia_best_overall_features.csv',
  normalised_output_path='../data/combined/features/normalised_essentia_best_overall_features.csv'
)

df_combined_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,deam_2,-0.098468,0.301399,-0.188070,-0.553131,0.877751,-0.174041,-0.388034,0.508848,0.163214,...,0.213031,-0.173802,0.631082,-0.190598,0.214819,-0.361391,-0.366158,0.279939,-0.497938,-0.658303
1,deam_3,1.491691,1.964323,3.026911,-0.483520,0.191593,0.148780,0.124470,2.014884,-0.733999,...,-0.211639,-0.312197,0.264776,-0.468323,-0.342085,-0.415891,-0.366158,-0.502457,-0.594454,-0.457276
2,deam_4,-0.253373,0.159197,0.425925,1.247970,0.274049,-0.374255,-0.524174,0.999264,-0.184880,...,-0.211639,-0.292406,-0.224862,-0.468323,0.255212,-0.415891,-0.366158,-0.502457,-0.777307,-0.758648
3,deam_5,-0.309106,-0.272476,-0.470838,1.179777,-0.972127,1.550632,1.472049,-0.895004,1.140672,...,-0.211639,-0.312197,-0.265665,-0.468323,-0.342085,0.047837,-0.366158,-0.455465,0.212247,-0.575998
4,deam_7,2.189271,1.975926,2.738867,-1.960243,0.648447,-1.271741,-1.365386,2.158976,-0.966910,...,-0.211639,-0.312197,-0.265665,0.259052,-0.342085,1.096486,2.050168,0.671137,-0.777307,-0.630934
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,-0.225960,-0.229863,0.107514,1.156892,0.188517,0.170979,0.433127,0.118200,0.409695,...,-0.211639,-0.312197,-0.265665,-0.196598,-0.342085,-0.415891,-0.366158,-0.403532,0.513345,4.271633
2507,pmemo_996,0.276125,0.154828,0.071388,1.135611,0.613240,0.306657,0.447987,-1.406353,0.161938,...,-0.211639,-0.312197,-0.265665,-0.468323,-0.342085,-0.415891,-0.366158,-0.502457,-0.409489,-0.669160
2508,pmemo_997,-0.323078,-0.117410,0.179979,-1.414053,0.138488,-0.128571,0.055583,0.081068,-0.781994,...,-0.101652,-0.004968,-0.265665,-0.005924,-0.342085,-0.225337,-0.366158,0.616088,-0.485106,0.104098
2509,pmemo_999,-0.184841,0.183294,0.483195,-1.458675,-0.291097,-0.532937,-0.280126,0.760324,-1.323348,...,-0.211639,-0.312197,-0.265665,-0.468323,-0.296164,-0.415891,0.130583,-0.502457,0.152987,-0.651932


#### Normalised

In [15]:
df_combined_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,tonal.key_temperley.strength,rhythm.beats_loudness_band_ratio.mean_0,rhythm.beats_loudness_band_ratio.mean_1,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,deam_2,0.023745,0.224209,0.054855,0.368995,0.631265,0.625203,0.588560,0.579839,0.267674,...,0.023256,0.010375,0.064544,0.026229,0.046262,0.005850,0.000000,0.083526,0.033769,0.012672
1,deam_3,0.082083,0.348482,0.211289,0.378784,0.543954,0.676631,0.672442,0.907102,0.118704,...,0.000000,0.000000,0.038179,0.000000,0.000000,0.000000,0.000000,0.000000,0.022102,0.035942
2,deam_4,0.018062,0.213582,0.084731,0.622282,0.554446,0.593308,0.566277,0.686407,0.209877,...,0.000000,0.001484,0.002937,0.000000,0.049618,0.000000,0.000000,0.000000,0.000000,0.001057
3,deam_5,0.016018,0.181322,0.041096,0.612692,0.395875,0.899957,0.893002,0.274781,0.429967,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.049780,0.000000,0.005017,0.119613,0.022199
4,deam_7,0.107675,0.349349,0.197274,0.171114,0.602087,0.450332,0.428595,0.938413,0.080032,...,0.000000,0.000000,0.000000,0.068694,0.000000,0.162351,0.183040,0.125289,0.000000,0.015840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,0.019068,0.184507,0.069237,0.609474,0.543563,0.680168,0.722960,0.494951,0.308598,...,0.000000,0.000000,0.000000,0.025662,0.000000,0.000000,0.000000,0.010561,0.156009,0.583327
2507,pmemo_996,0.037488,0.213255,0.067479,0.606481,0.597607,0.701782,0.725393,0.163664,0.267462,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.044460,0.011416
2508,pmemo_997,0.015505,0.192911,0.072763,0.247924,0.537197,0.632447,0.661167,0.486882,0.110735,...,0.006023,0.023032,0.000000,0.043670,0.000000,0.020456,0.000000,0.119412,0.035320,0.100922
2509,pmemo_999,0.020577,0.215383,0.087517,0.241649,0.482534,0.568029,0.606221,0.634485,0.020851,...,0.000000,0.000000,0.000000,0.000000,0.003815,0.000000,0.037629,0.000000,0.112450,0.013410


### Essentia Best Valence Mean

#### Standardised

In [16]:
df_combined_standardised, df_combined_normalised = combine_featuresets(
  deam_df_path='../data/DEAM/processed/features/essentia_best_valence_features.csv',
  pmemo_df_path= '../data/PMEmo/PMEmo2019/processed/features/essentia_best_valence_features.csv',
  output_path="../data/combined/features/essentia_best_valence_features.csv",
  standardised_output_path='../data/combined/features/standardised_essentia_best_valence_features.csv',
  normalised_output_path='../data/combined/features/normalised_essentia_best_valence_features.csv'
)

df_combined_standardised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,deam_2,-0.098468,0.301399,-0.188070,-0.588019,-0.425567,-0.478385,-0.553131,0.877751,-0.174041,...,0.213031,-0.173802,0.631082,-0.190598,0.214819,-0.361391,-0.366158,0.279939,-0.497938,-0.658303
1,deam_3,1.491691,1.964323,3.026911,-1.542890,3.002110,-0.588507,-0.483520,0.191593,0.148780,...,-0.211639,-0.312197,0.264776,-0.468323,-0.342085,-0.415891,-0.366158,-0.502457,-0.594454,-0.457276
2,deam_4,-0.253373,0.159197,0.425925,-0.552890,0.415961,0.514522,1.247970,0.274049,-0.374255,...,-0.211639,-0.292406,-0.224862,-0.468323,0.255212,-0.415891,-0.366158,-0.502457,-0.777307,-0.758648
3,deam_5,-0.309106,-0.272476,-0.470838,-0.598020,-0.499747,-0.438943,1.179777,-0.972127,1.550632,...,-0.211639,-0.312197,-0.265665,-0.468323,-0.342085,0.047837,-0.366158,-0.455465,0.212247,-0.575998
4,deam_7,2.189271,1.975926,2.738867,-1.568225,2.010294,1.389710,-1.960243,0.648447,-1.271741,...,-0.211639,-0.312197,-0.265665,0.259052,-0.342085,1.096486,2.050168,0.671137,-0.777307,-0.630934
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,-0.225960,-0.229863,0.107514,1.727178,0.295302,-0.105583,1.156892,0.188517,0.170979,...,-0.211639,-0.312197,-0.265665,-0.196598,-0.342085,-0.415891,-0.366158,-0.403532,0.513345,4.271633
2507,pmemo_996,0.276125,0.154828,0.071388,2.326991,-0.308241,0.502555,1.135611,0.613240,0.306657,...,-0.211639,-0.312197,-0.265665,-0.468323,-0.342085,-0.415891,-0.366158,-0.502457,-0.409489,-0.669160
2508,pmemo_997,-0.323078,-0.117410,0.179979,2.250773,-0.017804,-0.259145,-1.414053,0.138488,-0.128571,...,-0.101652,-0.004968,-0.265665,-0.005924,-0.342085,-0.225337,-0.366158,0.616088,-0.485106,0.104098
2509,pmemo_999,-0.184841,0.183294,0.483195,2.485869,0.491131,-0.693730,-1.458675,-0.291097,-0.532937,...,-0.211639,-0.312197,-0.265665,-0.468323,-0.296164,-0.415891,0.130583,-0.502457,0.152987,-0.651932


#### Normalised

In [17]:
df_combined_normalised

Unnamed: 0,song_id,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.spectral_energy.mean,lowlevel.zerocrossingrate.mean,rhythm.beats_loudness.mean,rhythm.onset_rate,tonal.chords_strength.mean,tonal.hpcp_entropy.mean,tonal.key_edma.strength,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,deam_2,0.023745,0.224209,0.054855,0.087567,0.069568,0.373016,0.368995,0.631265,0.625203,...,0.023256,0.010375,0.064544,0.026229,0.046262,0.005850,0.000000,0.083526,0.033769,0.012672
1,deam_3,0.082083,0.348482,0.211289,0.019437,0.338074,0.357421,0.378784,0.543954,0.676631,...,0.000000,0.000000,0.038179,0.000000,0.000000,0.000000,0.000000,0.000000,0.022102,0.035942
2,deam_4,0.018062,0.213582,0.084731,0.090074,0.135489,0.513626,0.622282,0.554446,0.593308,...,0.000000,0.001484,0.002937,0.000000,0.049618,0.000000,0.000000,0.000000,0.000000,0.001057
3,deam_5,0.016018,0.181322,0.041096,0.086854,0.063757,0.378601,0.612692,0.395875,0.899957,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.049780,0.000000,0.005017,0.119613,0.022199
4,deam_7,0.107675,0.349349,0.197274,0.017629,0.260381,0.637566,0.171114,0.602087,0.450332,...,0.000000,0.000000,0.000000,0.068694,0.000000,0.162351,0.183040,0.125289,0.000000,0.015840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,0.019068,0.184507,0.069237,0.252758,0.126037,0.425810,0.609474,0.543563,0.680168,...,0.000000,0.000000,0.000000,0.025662,0.000000,0.000000,0.000000,0.010561,0.156009,0.583327
2507,pmemo_996,0.037488,0.213255,0.067479,0.295555,0.078758,0.511932,0.606481,0.597607,0.701782,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.044460,0.011416
2508,pmemo_997,0.015505,0.192911,0.072763,0.290117,0.101510,0.404064,0.247924,0.537197,0.632447,...,0.006023,0.023032,0.000000,0.043670,0.000000,0.020456,0.000000,0.119412,0.035320,0.100922
2509,pmemo_999,0.020577,0.215383,0.087517,0.306891,0.141377,0.342520,0.241649,0.482534,0.568029,...,0.000000,0.000000,0.000000,0.000000,0.003815,0.000000,0.037629,0.000000,0.112450,0.013410


### Essentia Best Arousal Mean

#### Standardised

In [18]:
df_combined_standardised, df_combined_normalised = combine_featuresets(
  deam_df_path='../data/DEAM/processed/features/essentia_best_arousal_features.csv',
  pmemo_df_path= '../data/PMEmo/PMEmo2019/processed/features/essentia_best_arousal_features.csv',
  output_path="../data/combined/features/essentia_best_arousal_features.csv",
  standardised_output_path='../data/combined/features/standardised_essentia_best_arousal_features.csv',
  normalised_output_path='../data/combined/features/normalised_essentia_best_arousal_features.csv'
)

df_combined_standardised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,deam_2,0.494413,-0.208130,0.433610,-0.220156,-0.098468,0.301399,-0.400270,-0.188070,0.228591,...,0.213031,-0.173802,0.631082,-0.190598,0.214819,-0.361391,-0.366158,0.279939,-0.497938,-0.658303
1,deam_3,-1.472472,-1.492353,0.921871,1.488434,1.491691,1.964323,-1.541326,3.026911,-0.759719,...,-0.211639,-0.312197,0.264776,-0.468323,-0.342085,-0.415891,-0.366158,-0.502457,-0.594454,-0.457276
2,deam_4,0.586518,-0.205112,0.162375,-0.075284,-0.253373,0.159197,-0.314164,0.425925,0.032908,...,-0.211639,-0.292406,-0.224862,-0.468323,0.255212,-0.415891,-0.366158,-0.502457,-0.777307,-0.758648
3,deam_5,0.199557,-0.919556,-0.537061,0.069318,-0.309106,-0.272476,-0.795840,-0.470838,-0.609154,...,-0.211639,-0.312197,-0.265665,-0.468323,-0.342085,0.047837,-0.366158,-0.455465,0.212247,-0.575998
4,deam_7,0.587664,-1.579328,1.066337,1.518569,2.189271,1.975926,-1.608834,2.738867,-0.851648,...,-0.211639,-0.312197,-0.265665,0.259052,-0.342085,1.096486,2.050168,0.671137,-0.777307,-0.630934
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,0.189093,0.634746,0.142877,-0.751096,-0.225960,-0.229863,0.955886,0.107514,0.800623,...,-0.211639,-0.312197,-0.265665,-0.196598,-0.342085,-0.415891,-0.366158,-0.403532,0.513345,4.271633
2507,pmemo_996,0.306610,1.275736,-0.336808,-1.193907,0.276125,0.154828,1.398826,0.071388,1.362682,...,-0.211639,-0.312197,-0.265665,-0.468323,-0.342085,-0.415891,-0.366158,-0.502457,-0.409489,-0.669160
2508,pmemo_997,0.253913,2.284652,0.514619,-1.053463,-0.323078,-0.117410,1.792309,0.179979,1.304989,...,-0.101652,-0.004968,-0.265665,-0.005924,-0.342085,-0.225337,-0.366158,0.616088,-0.485106,0.104098
2509,pmemo_999,0.469505,2.838357,0.825544,-1.121387,-0.184841,0.183294,1.602657,0.483195,1.546130,...,-0.211639,-0.312197,-0.265665,-0.468323,-0.296164,-0.415891,0.130583,-0.502457,0.152987,-0.651932


#### Normalised

In [19]:
df_combined_normalised

Unnamed: 0,song_id,lowlevel.average_loudness,lowlevel.barkbands_spread.mean,lowlevel.melbands_crest.mean,lowlevel.melbands_flatness_db.mean,lowlevel.melbands_kurtosis.mean,lowlevel.melbands_skewness.mean,lowlevel.melbands_spread.mean,lowlevel.spectral_energy.mean,lowlevel.spectral_entropy.mean,...,tonal.chords_histogram_14,tonal.chords_histogram_15,tonal.chords_histogram_16,tonal.chords_histogram_17,tonal.chords_histogram_18,tonal.chords_histogram_19,tonal.chords_histogram_20,tonal.chords_histogram_21,tonal.chords_histogram_22,tonal.chords_histogram_23
0,deam_2,0.972818,0.252048,0.353876,0.238736,0.023745,0.224209,0.173672,0.054855,0.788542,...,0.023256,0.010375,0.064544,0.026229,0.046262,0.005850,0.000000,0.083526,0.033769,0.012672
1,deam_3,0.584843,0.053319,0.419870,0.463740,0.082083,0.348482,0.026271,0.211289,0.641965,...,0.000000,0.000000,0.038179,0.000000,0.000000,0.000000,0.000000,0.000000,0.022102,0.035942
2,deam_4,0.990986,0.252515,0.317216,0.257814,0.018062,0.213582,0.184795,0.084731,0.759520,...,0.000000,0.001484,0.002937,0.000000,0.049618,0.000000,0.000000,0.000000,0.000000,0.001057
3,deam_5,0.914656,0.141958,0.222680,0.276857,0.016018,0.181322,0.122572,0.041096,0.664296,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.049780,0.000000,0.005017,0.119613,0.022199
4,deam_7,0.991212,0.039860,0.439396,0.467708,0.107675,0.349349,0.017550,0.197274,0.628331,...,0.000000,0.000000,0.000000,0.068694,0.000000,0.162351,0.183040,0.125289,0.000000,0.015840
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2506,pmemo_993,0.912592,0.382480,0.314580,0.168817,0.019068,0.184507,0.348860,0.069237,0.873380,...,0.000000,0.000000,0.000000,0.025662,0.000000,0.000000,0.000000,0.010561,0.156009,0.583327
2507,pmemo_996,0.935773,0.481671,0.249746,0.110503,0.037488,0.213255,0.406079,0.067479,0.956739,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.044460,0.011416
2508,pmemo_997,0.925378,0.637797,0.364825,0.128998,0.015505,0.192911,0.456910,0.072763,0.948182,...,0.006023,0.023032,0.000000,0.043670,0.000000,0.020456,0.000000,0.119412,0.035320,0.100922
2509,pmemo_999,0.967904,0.723481,0.406850,0.120053,0.020577,0.215383,0.432410,0.087517,0.983946,...,0.000000,0.000000,0.000000,0.000000,0.003815,0.000000,0.037629,0.000000,0.112450,0.013410
