In [124]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import statsmodels.api as sm
import logging

from sklearn.preprocessing import StandardScaler

from scipy.stats import skew, kurtosis

from utils.general import *

import json

import warnings

from sklearn.feature_selection import mutual_info_classif, mutual_info_regression

In [125]:
warnings.filterwarnings('ignore')

In [126]:
random_state = 42
rng = np.random.default_rng(random_state)

# Data Loading And Preprocessing

In [127]:
billboard_df = pd.read_csv("../data/billboard/hot-100_all.csv")
spotify_df = pd.read_csv("../data/spofity/songs.csv")
audio_analysis_df = pd.read_csv("../data/audio/audio_features_full.csv")

In [128]:
billboard_df.drop(axis=1, inplace=True, labels=['image', 'artist'])
billboard_df['date'] = pd.to_datetime(billboard_df['date'])
spotify_df.drop(axis=1, inplace=True, labels=['spotify_name',
                                              'artist',
                                              'artist_genres',
                                              'spotify_id',
                                              'spotify_id',
                                              'spotify_uri',
                                              'spotify_external_url',
                                              'spotify_artist_popularity',
                                              'preview_url',
                                              'preview_url_audio',
                                              'full_audio',
                                              'full_audio_duration_s'
                                              ])
audio_analysis_df.drop(axis=1, inplace=True, labels=['name', 'tempo'])

In [129]:
songs_df = spotify_df.merge(audio_analysis_df, how='inner', on='billboard_name')
songs_df

Unnamed: 0,billboard_name,duration_ms,spotify_popularity,spotify_artist_popularity_mean,explicit,danceability,energy,key,loudness,mode,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,All I Want For Christmas Is You,241106.0,69.0,81.000000,False,0.336,0.627,7.0,-7.463,1.0,...,1.619618,99.735660,-3.865835,99.024666,2.729783,112.219580,-7.488522,122.569650,2.323859,141.572560
1,Rockin' Around The Christmas Tree,126266.0,62.0,59.000000,False,0.589,0.472,8.0,-8.749,1.0,...,-1.039626,78.420586,-4.437555,55.536427,3.890496,70.359543,0.014326,77.899239,6.889563,93.610161
2,Jingle Bell Rock,130973.0,62.0,50.000000,False,0.754,0.424,2.0,-8.463,1.0,...,1.430321,58.685158,-4.030815,67.332291,1.802275,58.469532,-5.335912,53.423290,0.133941,58.774597
3,A Holly Jolly Christmas,135533.0,54.0,48.000000,False,0.683,0.375,0.0,-13.056,1.0,...,-1.355817,60.197350,-6.695084,52.782772,-4.325858,66.221947,-3.533713,50.849602,-1.266797,90.991325
4,Circles,215280.0,86.0,91.000000,False,0.695,0.762,0.0,-3.497,1.0,...,2.563944,78.141319,-12.359889,83.661438,4.207565,65.643173,-5.280680,54.441185,-0.751733,59.799530
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1418,Christmas Isn't Canceled (Just You),231549.0,41.0,77.000000,False,0.580,0.789,1.0,-4.918,0.0,...,5.321839,70.412506,1.213545,77.698616,4.693950,85.284431,-2.604682,76.687698,5.805956,77.433144
1419,Moved To Miami,222225.0,66.0,88.500000,True,0.717,0.444,1.0,-11.126,1.0,...,4.598643,180.801086,-4.373017,110.878738,4.545245,111.550697,4.918246,63.780304,8.114554,75.806396
1420,Hibachi,170413.0,69.0,86.333333,True,0.681,0.522,5.0,-8.740,0.0,...,8.091636,79.854568,-1.646704,123.382797,5.309008,98.507568,0.213412,80.767159,2.804790,73.490234
1421,Thailand,200958.0,70.0,84.000000,True,0.875,0.478,7.0,-10.562,1.0,...,10.420262,91.743813,-2.071233,75.112267,8.341298,97.730263,-0.260812,69.875168,1.959964,62.722679


In [130]:
# print(songs_df.apply(lambda x: x.nunique()))
songs_df.describe()

Unnamed: 0,duration_ms,spotify_popularity,spotify_artist_popularity_mean,danceability,energy,key,loudness,mode,speechiness,acousticness,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
count,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0,...,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0,1423.0
mean,200696.557976,67.404076,83.302977,0.666045,0.622016,5.153197,-6.747289,0.622628,0.139837,0.223816,...,2.398114,84.513799,-3.647905,82.115186,2.668189,80.81822,-2.539943,79.075432,1.677588,80.37177
std,50770.975125,17.661345,12.018877,0.151579,0.162841,3.60034,2.57996,0.4849,0.125819,0.253148,...,3.631675,24.471115,3.398574,24.63778,3.253561,24.665541,3.023733,24.767819,3.186344,26.740148
min,32000.0,0.0,0.0,0.15,0.0076,0.0,-33.663,0.0,0.0232,3e-06,...,-13.240079,32.670311,-18.392536,31.161884,-10.105947,31.325922,-14.229393,28.560005,-13.992401,28.597084
25%,170322.0,64.0,78.0,0.57,0.525,1.0,-7.8415,0.0,0.0428,0.03155,...,0.246056,68.009289,-5.805181,65.208897,0.716595,64.160015,-4.468847,62.009989,-0.239844,61.936447
50%,195428.0,70.0,86.0,0.68,0.633,5.0,-6.36,1.0,0.0798,0.121,...,2.442511,81.295982,-3.595455,78.402687,2.753472,77.774918,-2.510375,75.437561,1.633557,75.101379
75%,223599.0,77.0,91.0,0.776,0.7335,8.0,-5.0775,1.0,0.218,0.3215,...,4.722763,98.500587,-1.416037,94.838615,4.810408,93.11882,-0.625247,91.067223,3.652482,93.510078
max,613026.0,95.0,100.0,0.965,0.984,11.0,-1.321,1.0,0.699,0.995,...,13.629806,207.025589,6.79554,219.371109,12.521308,229.869766,7.686097,229.040588,14.772246,226.710175


In [131]:
songs_df.isna().sum()

billboard_name                    0
duration_ms                       0
spotify_popularity                0
spotify_artist_popularity_mean    0
explicit                          0
                                 ..
mfcc18_var                        0
mfcc19_mean                       0
mfcc19_var                        0
mfcc20_mean                       0
mfcc20_var                        0
Length: 75, dtype: int64

In [132]:
songs_df = pd.get_dummies(songs_df, prefix=['explicit'], columns=['explicit'])

# Feature Engineering

### I don't know what made me think we need *MORE* features, but here's more features...
Some extra feature engineering for spotify's audio analysis object that did't happen during dataset construction.

In [133]:
# this code is so slow it hurts me...

def spotify_audio_analysis(filename):
    file = open(filename)
    analysis_json = json.load(file)
    file.close()
    timbre_coefs = np.arange(13, 1, -1)
    bars = analysis_json['bars']
    beats = analysis_json['beats']
    sections = analysis_json['sections']
    segments = analysis_json['segments']
    tatums = analysis_json['tatums']

    return pd.DataFrame(data={
        # "time_signature": [analysis_json['track']['time_signature']],
        # "mode": [analysis_json['track']['mode']],
        "num_bars": len(bars),
        "mean_bar_duration": np.mean([b['duration'] for b in bars]),
        "var_bar_duration": np.var([b['duration'] for b in bars]),
        "num_beats": len(beats),
        "mean_beat_duration": np.mean([b['duration'] for b in beats]),
        "var_beat_duration": np.var([b['duration'] for b in beats]),
        "num_sections": len(sections),
        "mean_section_duration": np.mean([s['duration'] for s in sections]),
        "var_section_duration": np.var([s['duration'] for s in sections]),
        "mean_section_tempo": np.mean([s['tempo'] for s in sections]),
        "var_section_tempo": np.var([s['tempo'] for s in sections]),
        "mean_section_loudness": np.mean([s['loudness'] for s in sections]),
        "var_section_loudness": np.var([s['loudness'] for s in sections]),
        # other section stuff
        "num_segments": len(segments),
        "mean_segment_duration": np.mean([s['duration'] for s in segments]),
        "var_segment_duration": np.var([s['duration'] for s in segments]),
        "mean_segment_loudness_start": np.mean([s['loudness_start'] for s in segments]),
        "var_segment_loudness_start": np.var([s['loudness_start'] for s in segments]),
        "mean_segment_loudness_max": np.mean([s['loudness_max'] for s in segments]),
        "var_segment_loudness_max": np.var([s['loudness_max'] for s in segments]),
        "mean_segment_loudness_max_time": np.mean([s['loudness_max_time'] for s in segments]),
        "var_segment_loudness_max_time": np.var([s['loudness_max_time'] for s in segments]),
        "mean_segment_loudness_end": np.mean([s['loudness_end'] for s in segments]),
        "var_segment_loudness_end": np.var([s['loudness_end'] for s in segments]),
        "mean_segment_loudness_start_max_diff": np.mean([abs(s['loudness_start'] - s['loudness_max']) for s in segments]),
        "var_segment_loudness_start_max_diff": np.var([abs(s['loudness_start'] - s['loudness_max']) for s in segments]),
        "mean_segment_loudness_start_end_diff": np.mean([abs(s['loudness_start'] - s['loudness_end']) for s in segments]),
        "var_segment_loudness_start_end_diff": np.var([abs(s['loudness_start'] - s['loudness_end']) for s in segments]),
        "mean_segment_loudness_max_end_diff": np.mean([abs(s['loudness_max'] - s['loudness_end']) for s in segments]),
        "var_segment_loudness_max_end_diff": np.var([abs(s['loudness_max'] - s['loudness_end']) for s in segments]),
        "mean_segment_num_pitches": np.mean([len(s['pitches']) for s in segments]),
        "var_segment_num_pitches": np.var([len(s['pitches']) for s in segments]),
        "mean_segment_num_pure_pitches": np.mean([len(np.array(s['pitches'])[np.array(s['pitches']) > 0.5]) for s in segments]),
        "var_segment_num_pure_pitches": np.var([len(np.array(s['pitches'])[np.array(s['pitches']) > 0.5]) for s in segments]),
        "mean_segment_timbre": np.mean([np.dot(s['timbre'], timbre_coefs) for s in segments]),
        "var_segment_timbre": np.var([np.dot(s['timbre'], timbre_coefs) for s in segments]),
        "num_tatums": len(tatums),
        "mean_tatum_duration": np.mean([t['duration'] for t in tatums]),
        "var_tatum_duration": np.var([t['duration'] for t in tatums]),
    }, index=[0])

##### This will take a little bit to run

In [134]:
aa = pd.DataFrame()
for i in range(len(songs_df)):
    aa = pd.concat([aa, spotify_audio_analysis("../data/spofity/audio_analysis/" + spotify_df['audio_analysis_file'].iloc[0])], axis=0)
aa.reset_index(inplace=True)
songs_df = pd.concat([songs_df, aa], axis=1)
songs_df.drop(labels='index', axis=1, inplace=True)

In [135]:
songs_df

Unnamed: 0,billboard_name,duration_ms,spotify_popularity,spotify_artist_popularity_mean,danceability,energy,key,loudness,mode,speechiness,...,var_segment_loudness_max_end_diff,mean_segment_num_pitches,var_segment_num_pitches,mean_segment_num_pure_pitches,var_segment_num_pure_pitches,mean_segment_timbre,var_segment_timbre,num_tatums,mean_tatum_duration,var_tatum_duration
0,All I Want For Christmas Is You,241106.0,69.0,81.000000,0.336,0.627,7.0,-7.463,1.0,0.0384,...,45.33173,12.0,0.0,2.263092,2.966942,1864.665905,1.180701e+06,1106,0.215763,0.00616
1,Rockin' Around The Christmas Tree,126266.0,62.0,59.000000,0.589,0.472,8.0,-8.749,1.0,0.0502,...,45.33173,12.0,0.0,2.263092,2.966942,1864.665905,1.180701e+06,1106,0.215763,0.00616
2,Jingle Bell Rock,130973.0,62.0,50.000000,0.754,0.424,2.0,-8.463,1.0,0.0363,...,45.33173,12.0,0.0,2.263092,2.966942,1864.665905,1.180701e+06,1106,0.215763,0.00616
3,A Holly Jolly Christmas,135533.0,54.0,48.000000,0.683,0.375,0.0,-13.056,1.0,0.0303,...,45.33173,12.0,0.0,2.263092,2.966942,1864.665905,1.180701e+06,1106,0.215763,0.00616
4,Circles,215280.0,86.0,91.000000,0.695,0.762,0.0,-3.497,1.0,0.0395,...,45.33173,12.0,0.0,2.263092,2.966942,1864.665905,1.180701e+06,1106,0.215763,0.00616
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1418,Christmas Isn't Canceled (Just You),231549.0,41.0,77.000000,0.580,0.789,1.0,-4.918,0.0,0.0457,...,45.33173,12.0,0.0,2.263092,2.966942,1864.665905,1.180701e+06,1106,0.215763,0.00616
1419,Moved To Miami,222225.0,66.0,88.500000,0.717,0.444,1.0,-11.126,1.0,0.1800,...,45.33173,12.0,0.0,2.263092,2.966942,1864.665905,1.180701e+06,1106,0.215763,0.00616
1420,Hibachi,170413.0,69.0,86.333333,0.681,0.522,5.0,-8.740,0.0,0.3030,...,45.33173,12.0,0.0,2.263092,2.966942,1864.665905,1.180701e+06,1106,0.215763,0.00616
1421,Thailand,200958.0,70.0,84.000000,0.875,0.478,7.0,-10.562,1.0,0.2180,...,45.33173,12.0,0.0,2.263092,2.966942,1864.665905,1.180701e+06,1106,0.215763,0.00616


### Testing Different Popularity Metrics

In [136]:
def popularity_metrics(df, score_type='basic'):
    metrics = ['peak_rank', 'debut_rank', 'lifetime_peak_rank', 'sensationality', 'avg_rank_score', 'std_rank_score', 'time_on_chart', 'num_occurrences', 'rank_sum', 'skewness', 'kurtosis', 'spotify_popularity']
    metric_vals = []

    for i in range(len(df)):
        song = df.iloc[i]
        billboard_entries = billboard_df[billboard_df['title'] == song['billboard_name']].sort_values(by='date')
        value_counts = billboard_entries['rank'].value_counts()
        ranks = value_counts.index
        rank_counts = value_counts.values

        # max rank
        peak = min(ranks)
        if score_type == 'classic':
            ranks = [rank_score_classic(peak, ra) for ra in ranks]
            scaled = False
        elif score_type == 'score_01':
            ranks = [rank_score_01(peak, ra) for ra in ranks]
            scaled = True
        elif score_type == 'score_02':
            ranks = [rank_score_02(peak, ra) for ra in ranks]
            scaled = True
        else:
            ranks = [rank_score_basic(ra) for ra in ranks]
            scaled = True

        # sensationality
        sensation = squiggle(rank_counts, ranks, scaled=scaled)

        # mean rank
        avg_rank = np.mean(ranks)

        # rank std
        std_rank = np.std(ranks)

        # length
        time_on_chart = np.max(billboard_entries['weeks'].values)

        # lifetime_peak
        lifetime_peak = np.min(billboard_entries['peakPos'].values)

        # debut rank
        debut_rank = billboard_entries['rank'].iloc[0]

        # number of occurrences
        num_occurrences = len(billboard_entries)

        rank_sum = sum(ranks)

        skewness = skew(ranks)

        kurt = kurtosis(ranks)

        metric_vals.append([peak, debut_rank, lifetime_peak, sensation, avg_rank, std_rank, time_on_chart, num_occurrences, rank_sum, skewness, kurt, song['spotify_popularity']])

    return pd.DataFrame(data=metric_vals, columns=metrics)

In [137]:
pop_metrics_basic = popularity_metrics(songs_df, 'basic')
pop_metrics_basic

Unnamed: 0,peak_rank,debut_rank,lifetime_peak_rank,sensationality,avg_rank_score,std_rank_score,time_on_chart,num_occurrences,rank_sum,skewness,kurtosis,spotify_popularity
0,1,1,1,0.999999,0.270183,0.316913,50,14,2.161467,1.395650,0.673814,69.0
1,2,2,2,0.998958,0.123908,0.156764,44,17,1.362990,1.352697,0.450922,62.0
2,3,3,3,0.981122,0.151254,0.099182,41,12,1.210036,0.477332,-0.985764,62.0
3,4,4,4,0.931350,0.121060,0.075093,25,11,0.968480,0.378995,-1.307941,54.0
4,1,5,1,0.999999,0.153094,0.215097,61,47,2.908793,3.182407,9.711951,86.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1418,79,79,79,0.012658,0.012658,0.000000,1,1,0.012658,0.000000,-3.000000,41.0
1419,85,85,85,0.011764,0.011765,0.000000,1,1,0.011765,0.000000,-3.000000,66.0
1420,91,91,91,0.010989,0.010989,0.000000,1,1,0.010989,0.000000,-3.000000,69.0
1421,95,95,95,0.010526,0.010526,0.000000,1,1,0.010526,0.000000,-3.000000,70.0


In [174]:
data_df = songs_df.copy()
song_billboard_names = data_df.pop('billboard_name')
song_audio_analysis_files = data_df.pop('audio_analysis_file')
data_df.drop(labels=['spotify_popularity', 'spotify_artist_popularity_mean'], axis=1, inplace=True)
data_df[data_df.columns] = StandardScaler().fit_transform(data_df)
# data_df = data_df.filter(regex='^((?!mfcc).)*$')
# data_df.to_csv("../data/data.csv")
# pop_metrics_basic.to_csv("../data/popularity_metrics.csv")

# Reducing Initial Feature Set
To reduce the number of features used going forward, we estimate mutual info for each variable in relation to the chosen predictor variable

In [175]:
from IPython.display import display
def mutual_information_feature_reduction(X, y, mi_type='regression', threshold=0.0, display_mi=False):
    assert mi_type == "regression" or mi_type == "classification", "valid mi_type value are: 'regression' or 'classification'"
    if mi_type == 'regression':
        mi = mutual_info_regression(X, y, random_state=random_state)
    else:
        mi = mutual_info_classif(X, y, random_state=random_state)
    if display_mi:
        display(pd.Series(mi, index=X.columns).sort_values(ascending=False).to_frame().style.bar(color='red'))
    return X.loc[:, mi > threshold]

In [176]:
predictors = []
for dep in pop_metrics_basic.columns:
    reduced = mutual_information_feature_reduction(data_df, pop_metrics_basic[dep], mi_type='regression', display_mi=False, threshold=0.005)
    predictors.append(list(reduced.columns))
predictors_df = pd.DataFrame(data=[predictors], columns=pop_metrics_basic.columns)
predictors_df.to_csv("./regression_features.csv", index=False)

In [177]:
data_df['title'] = songs_df['billboard_name']
bb_df = billboard_df.merge(data_df, how='inner', on='title')
bb_df['top_50'] = bb_df.apply(lambda x: 1 if x['rank'] >= 50 else 0, axis=1)
bb_df['top_25'] = bb_df.apply(lambda x: 1 if x['rank'] >= 20 else 0, axis=1)
bb_df['top_10'] = bb_df.apply(lambda x: 1 if x['rank'] >= 10 else 0, axis=1)
top_50 = bb_df.pop('top_50')
top_25 = bb_df.pop('top_25')
top_10 = bb_df.pop('top_10')
clf_metrics = pd.concat([top_50, top_25, top_10], axis=1)
names = bb_df.pop('title')
bb_df.drop(labels=['peakPos', 'lastPos', 'weeks', 'isNew', 'date', 'rank'], axis=1, inplace=True)

In [179]:
predictors = []
for dep in clf_metrics.columns:
    reduced = mutual_information_feature_reduction(bb_df, clf_metrics[dep], mi_type='classification', display_mi=False, threshold=0.05)
    predictors.append(list(reduced.columns))
predictors_df = pd.DataFrame(data=[predictors], columns=clf_metrics.columns)
predictors_df.to_csv("./classification_features.csv", index=False)

In [117]:
reduced_cols = mutual_information_feature_reduction(bb_df, top_25, mi_type='classification', display_mi=True, threshold=0.05)
reduced_cols

Unnamed: 0,0
mfcc6_mean,0.230659
mfcc5_mean,0.228393
mfcc8_var,0.227125
mfcc20_mean,0.227116
harmony_mean,0.226519
mfcc16_var,0.226294
spectral_bandwidth_var,0.226157
mfcc8_mean,0.225888
mfcc11_var,0.225502
mfcc3_mean,0.22549


Unnamed: 0,duration_ms,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
1,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
2,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
3,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
4,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10420,0.607892,-0.567858,1.025804,0.709287,-0.748454,-0.714917,-0.153169,-0.258879,0.420927,0.569736,...,0.805345,-0.576445,1.430941,-0.179323,0.622848,0.181135,-0.021418,-0.096439,1.296100,-0.109934
10421,0.424180,0.336280,-1.093574,-1.697798,0.319326,-0.806990,-0.149733,-0.505803,-1.640216,0.067860,...,0.606140,3.936116,-0.213433,1.167868,0.577126,1.246406,2.467417,-0.617757,2.020883,-0.170791
10422,-0.596684,0.098696,-0.614410,-0.772652,1.297263,-0.808966,-0.153169,1.816830,-1.174651,1.659673,...,1.568291,-0.190464,0.589043,1.675562,0.811956,0.717421,0.910901,0.068327,0.353885,-0.257439
10423,0.005151,1.379008,-0.884708,-1.479113,0.621453,-0.856109,-0.153169,-0.251162,-0.315841,0.209854,...,2.209715,0.295555,0.464085,-0.284335,1.744274,0.685896,0.754013,-0.371591,0.088652,-0.660254


In [118]:
reduced_cols = mutual_information_feature_reduction(bb_df, top_10, mi_type='classification', display_mi=True, threshold=0.05)
reduced_cols

Unnamed: 0,0
mfcc13_var,0.152976
rolloff_var,0.152548
mfcc4_mean,0.151955
mfcc17_mean,0.151779
mfcc7_mean,0.151481
mfcc19_mean,0.151007
mfcc8_mean,0.150888
rolloff_mean,0.150713
mfcc6_mean,0.150646
mfcc17_var,0.15041


Unnamed: 0,duration_ms,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo,...,mfcc16_mean,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var
0,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
1,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
2,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
3,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
4,0.796196,-2.178146,0.030618,-0.277509,-0.806495,-0.236372,-0.153169,-0.839151,-0.582524,0.911840,...,-0.214438,0.622253,-0.064147,0.686564,0.018938,1.273534,-1.637154,1.756695,0.202897,2.289528
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10420,0.607892,-0.567858,1.025804,0.709287,-0.748454,-0.714917,-0.153169,-0.258879,0.420927,0.569736,...,0.805345,-0.576445,1.430941,-0.179323,0.622848,0.181135,-0.021418,-0.096439,1.296100,-0.109934
10421,0.424180,0.336280,-1.093574,-1.697798,0.319326,-0.806990,-0.149733,-0.505803,-1.640216,0.067860,...,0.606140,3.936116,-0.213433,1.167868,0.577126,1.246406,2.467417,-0.617757,2.020883,-0.170791
10422,-0.596684,0.098696,-0.614410,-0.772652,1.297263,-0.808966,-0.153169,1.816830,-1.174651,1.659673,...,1.568291,-0.190464,0.589043,1.675562,0.811956,0.717421,0.910901,0.068327,0.353885,-0.257439
10423,0.005151,1.379008,-0.884708,-1.479113,0.621453,-0.856109,-0.153169,-0.251162,-0.315841,0.209854,...,2.209715,0.295555,0.464085,-0.284335,1.744274,0.685896,0.754013,-0.371591,0.088652,-0.660254
