In [1]:
import pandas as pd
import numpy as np

In [2]:
import processing
sp = processing.sp

# Getting The Data

A list of playlists of representative popular songs

In [3]:
playlist_urls = [
'https://open.spotify.com/playlist/37i9dQZEVXbMDoHDwVN2tF?si=nFnIsrGQSdm3_L1HFlot5w', 
'https://open.spotify.com/playlist/37i9dQZF1DWSs8ya2jFSgv?si=xDRgEl0gQtCEBVI_cKtYrw',
'https://open.spotify.com/playlist/37i9dQZF1DX4o1oenSJRJd?si=JOUw8S39QNCZiiReqrME5A',
'https://open.spotify.com/playlist/37i9dQZF1DXbTxeAdrVG2l?si=5P5aDR0PQNenSK37hkNhqQ',
'https://open.spotify.com/playlist/37i9dQZF1DX4UtSsGT1Sbe?si=JYcGVIgKRzmKKO5s-me4VQ',
'https://open.spotify.com/playlist/37i9dQZF1DWTwnEm1IYyoj?si=B8RXdxzATySmquQT1jBFSQ',
'https://open.spotify.com/playlist/37i9dQZF1DWSfMe9z89s9B?si=kS_8w-uwQWuOtP-E9m0l5w',
'https://open.spotify.com/playlist/37i9dQZF1DWSfMe9z89s9B?si=kS_8w-uwQWuOtP-E9m0l5w',
'https://open.spotify.com/playlist/37i9dQZF1DX2UgsUIg75Vg?si=iAH2_vqCSR-t9VAbe7kVCw',
'https://open.spotify.com/playlist/37i9dQZF1DWXRqgorJj26U?si=GxqiD7rEQEic2nM-WkIlZw']

Compute audio features for each song in the above playlists and create dataframe

In [4]:
music_features = pd.DataFrame()

pl_feat_dfs = []

for url in playlist_urls:
    pl = sp.playlist(url)
    
    pl_name = pl['name']
    
    pl_feat = processing.get_playlist_audio_features(pl)
    pl_feat_df = pd.DataFrame(pl_feat)
    pl_feat_df['playlist'] = pl_name
    
    pl_feat_dfs.append(pl_feat_df)

music_features = pd.concat(pl_feat_dfs, ignore_index=True)

In [5]:
music_features['duration'] = music_features['duration_ms'] / 1e3
music_features['mode'] = music_features['mode'].apply(lambda x: {0: 'Minor', 1: 'Major'}[x])

key_dict = {-1: 'No Key Detected', 0: 'C', 1:'C#/D♭', 2: 'D', 3: 'D#/E♭', 4: 'E', 5: 'F', 6: 'F#/G♭', 7: 'G', 8: 'G#/A♭', 9: 'A', 10: 'A#/B♭', 11: 'B'}
music_features['key'] = music_features['key'].apply(lambda x: key_dict[x])

In [6]:
music_features

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,playlist,duration
0,0.746,0.690,B,-7.956,Major,0.1640,0.247000,0.000000,0.1010,0.497,89.977,181733,4,Global Top 50,181.733
1,0.514,0.730,C#/D♭,-5.934,Major,0.0598,0.001460,0.000095,0.0897,0.334,171.005,200040,4,Global Top 50,200.040
2,0.770,0.724,G#/A♭,-5.484,Major,0.0495,0.016700,0.010700,0.3530,0.898,121.975,176840,4,Global Top 50,176.840
3,0.672,0.855,A,-3.764,Major,0.0397,0.021000,0.000000,0.3230,0.646,123.056,182200,4,Global Top 50,182.200
4,0.830,0.490,C#/D♭,-8.820,Minor,0.2090,0.289000,0.000003,0.1130,0.845,81.604,247059,4,Global Top 50,247.059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1084,0.702,0.578,D,-13.415,Major,0.0310,0.382000,0.000006,0.0425,0.975,111.617,363640,4,Rock Classics,363.640
1085,0.398,0.952,E,-6.042,Major,0.0610,0.000373,0.000077,0.1000,0.584,122.093,278680,4,Rock Classics,278.680
1086,0.623,0.910,A,-8.448,Major,0.0384,0.319000,0.000448,0.0810,0.745,145.624,195307,4,Rock Classics,195.307
1087,0.673,0.814,A,-5.411,Major,0.0367,0.013300,0.002930,0.3430,0.728,147.193,252200,4,Rock Classics,252.200


In [7]:
set(music_features['playlist'])

{'All Out 00s',
 'All Out 80s',
 'All Out 90s',
 'Alternative R&B',
 'Chilled R&B',
 'Global Top 50',
 'Rock Classics',
 'Soft Pop Hits',
 'Today’s Top Hits Presents… Best Pop Songs of the 2010s'}

# Analysis

In [8]:
import plotly.express as px
import plotly.figure_factory as ff
import seaborn as sns
import matplotlib.pyplot as plt

In [9]:
fig = px.scatter_3d(music_features, x='loudness', y='liveness', z='energy', opacity=0.5, width=800, height=800)
fig.update_layout(margin=dict(l=10, r=10, b=100, t=0))
fig.write_image('charts/loudness_liveness_energy.svg')
fig.show()

In [10]:
fig = px.scatter_matrix(music_features, dimensions=processing.feature_names,width=1920, height=1080)
fig.write_image('charts/scatter_matrix.svg')
fig.show()

In [11]:
def plot_feature_dist(feature, bin_size=0.05, show=True):
    fig = ff.create_distplot([music_features[feature]], [feature], bin_size=bin_size, curve_type='kde', histnorm='probability density', show_rug=False)

    fig.update_layout(
        showlegend=False,
        title=feature.capitalize() + " Distribution",
        xaxis_title=feature.capitalize(),
        yaxis_title="Probability Density",
        font=dict(
            #family="Courier New, monospace",
            size=12,
            color="#7f7f7f"
        )
    )

    fig.write_image('charts/' + feature + '_dist.svg')

    return fig

In [12]:
plot_feature_dist('energy')

In [13]:
plot_feature_dist('danceability')

In [14]:
plot_feature_dist('loudness', bin_size=1)

In [15]:
plot_feature_dist('speechiness', bin_size=0.004)

In [16]:
plot_feature_dist('acousticness', bin_size=0.025)

In [17]:
plot_feature_dist('instrumentalness', bin_size=0.025)

In [18]:
plot_feature_dist('liveness', bin_size=0.01)

In [19]:
plot_feature_dist('valence', bin_size=0.025)

In [20]:
plot_feature_dist('tempo', bin_size=5)

In [21]:
fig = plot_feature_dist('duration', bin_size=10, show=False)
fig.update_layout(xaxis_title='Duration (seconds)')
fig.write_image('charts/duration_dist.svg')
fig.show()

In [22]:
fig = px.histogram(music_features, y='mode', histnorm='probability', title='Mode Distribution', labels={'mode':'Mode'}, orientation='h')
fig.update_layout(xaxis_title='Frequency')
fig.write_image('charts/mode_dist.svg')
fig.show()

In [23]:
fig = px.histogram(music_features, y='key', histnorm='probability', title='Mode Distribution', labels={'key':'Key'}, orientation='h')
fig.update_yaxes(categoryorder='array', categoryarray=['B', 'A#/B♭', 'A', 'G#/A♭', 'G', 'F#/G♭', 'F', 'E', 'D#/E♭', 'D', 'C#/D♭', 'C', 'No Key Detected'])
fig.update_layout(xaxis_title='Frequency')
fig.write_image('charts/key_dist.svg')
fig.show()