In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor

In [2]:
from mer.utils.const import get_config_from_json, setup_global_config
from mer.utils.utils import load_metadata, split_train_test

# Argument parsing
config_path = "../configs/config.json"
config = get_config_from_json(config_path)
feat_data_dir = '../data/PMEmo/PMEmo2019/PMEmo2019/features'

##### Workaround to setup global config ############
setup_global_config(config, verbose=True)
from mer.utils.const import GLOBAL_CONFIG
##### End of Workaround #####

Munch({'DEFAULT_FREQ': 44100, 'DEFAULT_TIME': 40, 'WINDOW_TIME': 5, 'TRAIN_RATIO': 0.8, 'BATCH_SIZE': 8, 'FREQUENCY_LENGTH': 129, 'N_CHANNEL': 1, 'N_CHANNEL_SEP': 4, 'SPECTROGRAM_TIME_LENGTH': 15502, 'SPECTROGRAM_HALF_SECOND_LENGTH': 171, 'SPECTROGRAM_5_SECOND_LENGTH': 1721, 'MFCCS_TIME_LENGTH': 3876, 'LEARNING_RATE': 0.0001, 'SOUND_EXTENSION': '.wav', 'MP3_EXTENSION': '.mp3', 'MIN_TIME_END_POINT': 15, 'AUDIO_FOLDER': '../data/PMEmo/PMEmo2019/PMEmo2019/chorus_wav', 'SEP_AUDIO_FOLDER': '../data/PMEmo/PMEmo2019/PMEmo2019/separation_16', 'ANNOTATION_SONG_LEVEL': ['../data/PMEmo/PMEmo2019/PMEmo2019/annotations/static_annotations.csv', '../data/PMEmo/PMEmo2019/PMEmo2019/annotations/static_annotations_std.csv'], 'K_FOLD_ANNOTATION_FOLDER': '../data/PMEmo/PMEmo2019/PMEmo2019/annotations/k_folds', 'WAVE_ARRAY_LENGTH': 1764000, 'WINDOW_SIZE': 220500})


In [3]:
def get_xy_train_test(df):
    train_df, test_df = split_train_test(df, GLOBAL_CONFIG.TRAIN_RATIO)

    labels = ['gt_valence_mean', 'gt_arousal_mean', 'gt_valence_std', 'gt_arousal_std']
    x_train_df = train_df.drop(columns=labels + ['song_id'])
    y_train_df = train_df[labels]
    x_test_df = test_df.drop(columns=labels + ['song_id'])
    y_test_df = test_df[['song_id'] + labels]

    return (x_train_df, y_train_df, x_test_df, y_test_df)

In [4]:
def train_mixed_dataset(df: pd.DataFrame) -> pd.DataFrame:
    # prepare mix_df_
    mixed_csv_path = os.path.join(feat_data_dir, 'mixed_wav_static_feat.csv')
    mix_df = pd.read_csv(mixed_csv_path)
    mix_df.musicID = mix_df.musicID.astype(np.int64)
    mix_df.rename(columns={'musicID': 'song_id'}, inplace=True)
    mix_df_ = mix_df.merge(df, how='right', on='song_id')
    
    # train-test split    
    x_train_df, y_train_df, x_test_df, y_test_df = get_xy_train_test(mix_df_)
    
    # model fit -- 2m 30s
    rf_reg = RandomForestRegressor(random_state=42)
    rf_reg.fit(x_train_df, y_train_df)
    
    # predict
    y_hat = rf_reg.predict(x_test_df)
    mixed_result_df = pd.DataFrame(y_hat, columns=['mixed_valence_mean','mixed_arousal_mean', 'mixed_valence_std', 'mixed_arousal_std'])
    
    return mixed_result_df

In [5]:
def train_sep_dataset(df: pd.DataFrame) -> pd.DataFrame:
    # prepare sep_df_
    sep_csv_paths = [os.path.join(feat_data_dir, 'sep_bass_static_feat.csv'),
                    os.path.join(feat_data_dir, 'sep_drums_static_feat.csv'),
                    os.path.join(feat_data_dir, 'sep_other_static_feat.csv'),
                    os.path.join(feat_data_dir, 'sep_vocals_static_feat.csv')]
    sep_df = pd.read_csv(sep_csv_paths[0])
    sep_df.musicID = sep_df.musicID.astype(np.int64)
    sep_df.rename(columns={'musicID': 'song_id'}, inplace=True)

    for i in range(1, len(sep_csv_paths)):
        local_df = pd.read_csv(sep_csv_paths[i])
        local_df.drop(columns=['musicID'], inplace=True)
        sep_df = pd.concat([sep_df, local_df], axis=1)

    sep_df_ = sep_df.merge(df, how='right', on='song_id')

    # train-test split
    x_train_df, y_train_df, x_test_df, y_test_df = get_xy_train_test(sep_df_)

    # model fit -- 10m 30s
    rf_reg = RandomForestRegressor(random_state=42)
    rf_reg.fit(x_train_df, y_train_df)

    # predict
    y_hat = rf_reg.predict(x_test_df)
    sep_result_df = pd.DataFrame(y_hat, columns=['sep_valence_mean','sep_arousal_mean', 'sep_valence_std', 'sep_arousal_std'])
    return sep_result_df

In [6]:
for i, fold in enumerate(os.listdir(GLOBAL_CONFIG.K_FOLD_ANNOTATION_FOLDER)):
    df = load_metadata(GLOBAL_CONFIG.ANNOTATION_SONG_LEVEL)
    new_col_name = {'musicId':          'song_id',
                    'Arousal(mean)':    'gt_arousal_mean',
                    'Valence(mean)':    'gt_valence_mean',
                    'Arousal(std)':     'gt_arousal_std',
                    'Valence(std)':     'gt_valence_std'
                    }
    df.rename(columns=new_col_name, inplace=True)

    mixed_result_df = train_mixed_dataset(df)
    sep_result_df = train_sep_dataset(df)
    # save result    
    result_df_ = pd.concat([df, mixed_result_df, sep_result_df], axis=1)
    result_df_.to_csv(f'./rf_result_fold_{i}.csv', index=False)
    break

In [36]:
mixed_result_df.head()
# print(mixed_result_df.tail())

Unnamed: 0,mixed_valence_mean,mixed_arousal_mean,mixed_valence_std,mixed_arousal_std
0,0.512083,0.580439,0.173189,0.163381
1,0.697375,0.741205,0.160144,0.139102
2,0.434966,0.327364,0.179993,0.160171
3,0.719841,0.734114,0.141695,0.129596
4,0.745205,0.78612,0.146482,0.122007


In [None]:
for i, fold in enumerate(os.listdir(GLOBAL_CONFIG.K_FOLD_ANNOTATION_FOLDER)):
    df = load_metadata(GLOBAL_CONFIG.ANNOTATION_SONG_LEVEL)
    new_col_name = {'musicId':          'song_id',
                    'Arousal(mean)':    'gt_arousal_mean',
                    'Valence(mean)':    'gt_valence_mean',
                    'Arousal(std)':     'gt_arousal_std',
                    'Valence(std)':     'gt_valence_std'
                    }
    df.rename(columns=new_col_name, inplace=True)
    print(df.head())

    # MIXED DATASET
    mixed_csv_path = os.path.join(feat_data_dir, 'mixed_wav_static_feat.csv')
    mix_df = pd.read_csv(mixed_csv_path)
    mix_df.musicID = mix_df.musicID.astype(np.int64)
    mix_df.rename(columns={'musicID': 'song_id'}, inplace=True)
    mix_df_ = mix_df.merge(df, how='right', on='song_id')
    # train-test split
    train_df, test_df = split_train_test(mix_df_, GLOBAL_CONFIG.TRAIN_RATIO)
    labels = ['gt_valence_mean', 'gt_arousal_mean', 'gt_valence_std', 'gt_arousal_std']
    x_train_df = train_df.drop(columns=labels + ['song_id'])
    y_train_df = train_df[labels]
    x_test_df = test_df.drop(columns=labels + ['song_id'])
    y_test_df = test_df[['song_id'] + labels]
    # model fit -- 2m 30s
    rf_reg = RandomForestRegressor(random_state=42)
    rf_reg.fit(x_train_df, y_train_df)
    # predict
    y_hat = rf_reg.predict(x_test_df)
    pred_labels = ['mixed_valence_mean','mixed_arousal_mean', 'mixed_valence_std', 'mixed_arousal_std']
    mixed_result_df = pd.DataFrame(y_hat, columns=pred_labels)

    # SEPARATED DATASET
    sep_csv_paths = [os.path.join(feat_data_dir, 'sep_bass_static_feat.csv'),
                os.path.join(feat_data_dir, 'sep_drums_static_feat.csv'),
                os.path.join(feat_data_dir, 'sep_other_static_feat.csv'),
                os.path.join(feat_data_dir, 'sep_vocals_static_feat.csv')]
    sep_df = pd.read_csv(sep_csv_paths[0])
    sep_df.musicID = sep_df.musicID.astype(np.int64)
    sep_df.rename(columns={'musicID': 'song_id'}, inplace=True)

    for i in range(1, len(sep_csv_paths)):
        local_df = pd.read_csv(sep_csv_paths[i])
        local_df.drop(columns=['musicID'], inplace=True)
        sep_df = pd.concat([sep_df, local_df], axis=1)

    sep_df_ = sep_df.merge(df, how='right', on='song_id')


    # save result    
    result_df_ = pd.concat([df, mixed_result_df], axis=1)
    result_df_.to_csv(f'rf_result_fold_{i}.csv', index=False)
    break

# Old

In [13]:
feat_data_dir = '../data/PMEmo/PMEmo2019/PMEmo2019/features'
mixed_csv_path = os.path.join(feat_data_dir, 'mixed_wav_static_feat.csv')
sep_csv_paths = [os.path.join(feat_data_dir, 'sep_bass_static_feat.csv'),
                os.path.join(feat_data_dir, 'sep_drums_static_feat.csv'),
                os.path.join(feat_data_dir, 'sep_other_static_feat.csv'),
                os.path.join(feat_data_dir, 'sep_vocals_static_feat.csv')]

mix_df = pd.read_csv(mixed_csv_path)
mix_df.musicID = mix_df.musicID.astype(np.int64)
mix_df.rename(columns={'musicID': 'musicId'}, inplace=True)
mix_df_ = mix_df.merge(df, how='right', on='musicId')

sep_df = pd.read_csv(sep_csv_paths[0])
sep_df.musicID = sep_df.musicID.astype(np.int64)
sep_df.rename(columns={'musicID': 'musicId'}, inplace=True)

for i in range(1, len(sep_csv_paths)):
    local_df = pd.read_csv(sep_csv_paths[i])
    local_df.drop(columns=['musicID'], inplace=True)
    sep_df = pd.concat([sep_df, local_df], axis=1)

sep_df_ = sep_df.merge(df, how='right', on='musicId')

mix_df.shape, mix_df_.shape, sep_df.shape, sep_df_.shape

((794, 6374), (767, 6378), (794, 25493), (767, 25497))

## Mixed data

In [42]:
train_df, test_df = split_train_test(mix_df_, GLOBAL_CONFIG.TRAIN_RATIO)

train_df.shape, test_df.shape

((613, 6378), (154, 6378))

In [75]:
labels = ['Arousal(mean)', 'Valence(mean)', 'Arousal(std)', 'Valence(std)']
x_train_df = train_df.drop(columns=labels+['musicId'])
y_train_df = train_df[labels]
x_test_df = test_df.drop(columns=labels+['musicId'])
y_test_df = test_df[['musicId'] + labels]

x_train_df.shape, y_train_df.shape, x_test_df.shape, y_test_df.shape

((613, 6373), (613, 4), (154, 6373), (154, 5))

In [90]:
x_train_df.head()

Unnamed: 0,musicId,Arousal(mean),Valence(mean),Arousal(std),Valence(std)
0,786,0.725,0.45,0.183712,0.238485
1,788,0.6125,0.525,0.103833,0.145774
2,789,0.35,0.125,0.261008,0.125
3,790,0.6875,0.725,0.115244,0.108972
4,791,0.7875,0.7125,0.097628,0.148429


In [None]:
rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(x_train_df, y_train_df)
# 2m 30s

In [68]:
feat_importances = zip(x_train_df.columns, rf_reg.feature_importances_)
most_important = sorted(feat_importances, key=lambda x: x[1], reverse=True)
most_important[:10]

[('pcm_fftMag_fband1000-4000_sma_quartile2', 0.2881807638912245),
 ('audspec_lengthL1norm_sma_rqmean', 0.061561973266041105),
 ('audspec_lengthL1norm_sma_quartile3', 0.030311662282134357),
 ('pcm_fftMag_spectralVariance_sma_quartile2', 0.02032532865043051),
 ('audspec_lengthL1norm_sma_amean', 0.017887715136378997),
 ('audspec_lengthL1norm_sma_quartile2', 0.017691544330695426),
 ('pcm_fftMag_spectralVariance_sma_quartile1', 0.015388884960989288),
 ('pcm_fftMag_spectralRollOff90.0_sma_de_quartile3', 0.014109480590190907),
 ('audspec_lengthL1norm_sma_peakMeanAbs', 0.004378854818775705),
 ('pcm_fftMag_fband1000-4000_sma_amean', 0.004177847251408741)]

In [83]:
music_id = y_test_df.musicId.to_numpy()
music_id = np.expand_dims(music_id, axis=1)
music_id.shape

(154, 1)

In [85]:
pred = rf_reg.predict(x_test_df)
print(pred.shape)
pred = np.concatenate([pred, music_id], axis=1)
print(pred.shape)

(154, 4)
(154, 5)


In [93]:
result_df = pd.DataFrame(pred, columns=labels+['musicId'])

result_df_ = result_df[['musicId'] + labels]
result_df_.musicId = result_df_.musicId.astype(np.int64)

result_df_

Unnamed: 0,Arousal(mean),Valence(mean),Arousal(std),Valence(std),musicId
0,0.584693,0.513472,0.164255,0.172140,786.0
1,0.733562,0.699583,0.134712,0.154967,788.0
2,0.314830,0.447091,0.154803,0.179296,789.0
3,0.727511,0.711545,0.123717,0.139222,790.0
4,0.790500,0.744625,0.121062,0.150913,791.0
...,...,...,...,...,...
149,0.725449,0.670844,0.130238,0.154735,993.0
150,0.817500,0.738242,0.114314,0.157283,996.0
151,0.765699,0.709375,0.126174,0.158029,997.0
152,0.832519,0.756966,0.113994,0.155294,999.0


In [None]:
y_test_df

In [96]:
result_df_.to_csv('sample_.csv', index=False)

## Sep data

In [14]:
train_sep_df, test_sep_df = split_train_test(sep_df_, GLOBAL_CONFIG.TRAIN_RATIO)

train_sep_df.shape, test_sep_df.shape

((613, 25497), (154, 25497))

In [15]:
# MIXED
labels = ['Arousal(mean)', 'Valence(mean)', 'Arousal(std)', 'Valence(std)']
x_train_df = train_df.drop(columns=labels+['musicId'])
y_train_df = train_df[labels]
x_test_df = test_df.drop(columns=labels+['musicId'])
y_test_df = test_df[['musicId'] + labels]

x_train_df.shape, y_train_df.shape, x_test_df.shape, y_test_df.shape

# SEP
labels = ['Arousal(mean)', 'Valence(mean)', 'Arousal(std)', 'Valence(std)']
x_train_sep_df = train_sep_df.drop(columns=labels+['musicId'])
y_train_sep_df = train_sep_df[labels]
x_test_sep_df = test_sep_df.drop(columns=labels+['musicId'])
y_test_sep_df = test_sep_df[['musicId'] + labels]

x_train_sep_df.shape, y_train_sep_df.shape, x_test_sep_df.shape, y_test_sep_df.shape

((613, 25492), (613, 4), (154, 25492), (154, 5))

In [17]:
x_train_sep_df.head()

Unnamed: 0,audspec_lengthL1norm_sma_range,audspec_lengthL1norm_sma_maxPos,audspec_lengthL1norm_sma_minPos,audspec_lengthL1norm_sma_quartile1,audspec_lengthL1norm_sma_quartile2,audspec_lengthL1norm_sma_quartile3,audspec_lengthL1norm_sma_iqr1-2,audspec_lengthL1norm_sma_iqr2-3,audspec_lengthL1norm_sma_iqr1-3,audspec_lengthL1norm_sma_percentile1.0,...,mfcc_sma_de[14]_peakRangeAbs,mfcc_sma_de[14]_peakRangeRel,mfcc_sma_de[14]_peakMeanAbs,mfcc_sma_de[14]_peakMeanMeanDist,mfcc_sma_de[14]_peakMeanRel,mfcc_sma_de[14]_minRangeRel,mfcc_sma_de[14]_meanRisingSlope,mfcc_sma_de[14]_stddevRisingSlope,mfcc_sma_de[14]_meanFallingSlope,mfcc_sma_de[14]_stddevFallingSlope
0,0.873262,0.304538,0.0,0.287148,0.357663,0.420637,0.070515,0.062974,0.133489,0.10849,...,11.634295,0.662197,2.691464,2.694624,-19.99973,0.520069,114.8165,59.405704,115.61979,67.23852
1,0.882073,0.74669,0.000716,0.392807,0.487217,0.59434,0.09441,0.107123,0.201533,0.0342,...,8.197068,0.551893,2.608493,2.614641,-19.9914,0.547826,110.87151,55.06114,105.7935,51.18563
2,0.897701,0.143402,0.000119,0.329629,0.500462,0.594339,0.170832,0.093877,0.264709,0.071325,...,8.670522,0.554185,2.506416,2.508491,-19.999971,0.578112,111.02978,56.393402,111.927734,56.0108
3,0.952561,0.629126,0.00034,0.407225,0.58522,0.676815,0.177994,0.091595,0.26959,0.04931,...,8.124354,0.587444,2.197115,2.202384,-19.99076,0.574385,97.39957,45.132236,101.394745,51.956814
4,0.826184,0.545392,0.0,0.403486,0.490401,0.588558,0.086915,0.098157,0.185072,0.148559,...,8.096084,0.505727,2.358759,2.368806,-19.92707,0.636681,95.59255,51.23665,101.15352,48.31584


In [18]:
rf_reg_sep = RandomForestRegressor(random_state=42)
rf_reg_sep.fit(x_train_sep_df, y_train_sep_df)
# 10m 40s

RandomForestRegressor(random_state=42)

In [19]:
feat_importances_sep = zip(x_train_sep_df.columns, rf_reg_sep.feature_importances_)
most_important_sep = sorted(feat_importances_sep, key=lambda x: x[1], reverse=True)
most_important_sep[:10]

[('audspec_lengthL1norm_sma_de_quartile3', 0.16498228684442195),
 ('audspec_lengthL1norm_sma_de_iqr1-2', 0.12727626377987064),
 ('audspec_lengthL1norm_sma_de_iqr1-3', 0.049049007261107495),
 ('audspec_lengthL1norm_sma_de_iqr2-3', 0.026083850133731976),
 ('audspec_lengthL1norm_sma_de_meanRisingSlope', 0.024918455977204924),
 ('audspec_lengthL1norm_sma_de_lpgain', 0.014513989666733276),
 ('audspec_lengthL1norm_sma_de_quartile1', 0.013675454565001733),
 ('audspec_lengthL1norm_sma_lpgain', 0.013475432268053375),
 ('audspec_lengthL1norm_sma_meanFallingSlope', 0.0076900161307178585),
 ('pcm_fftMag_spectralVariance_sma_quartile2', 0.006502985697883883)]

In [20]:
music_id = y_test_sep_df.musicId.to_numpy()
music_id = np.expand_dims(music_id, axis=1)
music_id.shape

(154, 1)

In [22]:
pred_sep = rf_reg_sep.predict(x_test_sep_df)
print(pred_sep.shape)
pred_sep = np.concatenate([pred_sep, music_id], axis=1)
print(pred_sep.shape)

(154, 4)
(154, 5)


In [25]:
result_sep_df = pd.DataFrame(pred_sep, columns=labels+['musicId'])

# switch musicId column
result_sep_df_ = result_sep_df[['musicId'] + labels]
result_sep_df_.musicId = result_sep_df_.musicId.astype(np.int64)

result_sep_df_

Unnamed: 0,musicId,Arousal(mean),Valence(mean),Arousal(std),Valence(std)
0,786,0.720125,0.662344,0.136611,0.160604
1,788,0.639756,0.590909,0.150121,0.156348
2,789,0.321000,0.463875,0.143470,0.177056
3,790,0.685170,0.658864,0.143549,0.156017
4,791,0.793920,0.730250,0.119959,0.151313
...,...,...,...,...,...
149,993,0.738949,0.705727,0.130667,0.156617
150,996,0.794068,0.717095,0.127050,0.159902
151,997,0.650068,0.598159,0.156493,0.159256
152,999,0.812886,0.719034,0.122360,0.163643


In [26]:
result_sep_df_.to_csv('rf_sep_all-feats_result.csv', index=False)

In [27]:
y_test_sep_df

Unnamed: 0,musicId,Arousal(mean),Valence(mean),Arousal(std),Valence(std)
0,786,0.7250,0.4500,0.183712,0.238485
1,788,0.6125,0.5250,0.103833,0.145774
2,789,0.3500,0.1250,0.261008,0.125000
3,790,0.6875,0.7250,0.115244,0.108972
4,791,0.7875,0.7125,0.097628,0.148429
...,...,...,...,...,...
149,993,0.8625,0.7625,0.103833,0.152582
150,996,0.8750,0.5625,0.111803,0.245268
151,997,0.7125,0.6625,0.177218,0.112500
152,999,0.8750,0.7750,0.096825,0.122474
