In [2]:
import pandas as pd
import os
import json
import sys
sys.path.append('..')
from spotify_api import SpotifyAPI

In [2]:
songs_df = pd.read_csv('../datasets/hot_100_rank_1_with_id_unique.csv')
songs_df.head()

Unnamed: 0,Song,Artist,Track ID
0,Poor Little Fool,Ricky Nelson,5ayybTSXNwcarDtxQKqvWX
1,Nel Blu Dipinto Di Blu (Volare),Domenico Modugno,006Ndmw2hHxvnLbJsBFnPx
2,Little Star,The Elegants,6xupOaBWORbDmakCdQwMRG
3,It's All In The Game,Tommy Edwards,2tvt5K7y1gndmCgtIoLo1f
4,It's Only Make Believe,Conway Twitty,1xVOttVNT27FBTD8iHjOfU


In [3]:
api = SpotifyAPI(secrets_path='../secrets.json')

In [11]:
len(songs_df)

1175

---

## Track Data

In [10]:
track_data_json = await api.get_tracks_data(songs_df['Track ID'], retries=3, delay=3)
len(track_data_json)

Generated Access Token
1175


In [7]:
output_file = "../assets/track_data.json"
os.makedirs(os.path.dirname(output_file), exist_ok=True)

with open(output_file, "w") as f:
    json.dump(track_data_json, f, indent=4)

---

## Audio Features

In [8]:
audio_features_json = await api.get_tracks_audio_features(songs_df['Track ID'], retries=3, delay=3)
len(audio_features_json)

Generated Access Token


1175

In [28]:
audio_features_df = pd.DataFrame(audio_features_json)
audio_features_df.to_csv('../datasets/audio_features.csv')
audio_features_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.474,0.338,0,-11.528,1,0.0299,0.648,0.0,0.13,0.81,154.596,audio_features,5ayybTSXNwcarDtxQKqvWX,spotify:track:5ayybTSXNwcarDtxQKqvWX,https://api.spotify.com/v1/tracks/5ayybTSXNwca...,https://api.spotify.com/v1/audio-analysis/5ayy...,153933,4
1,0.518,0.06,10,-14.887,1,0.0441,0.987,8e-06,0.161,0.336,127.87,audio_features,006Ndmw2hHxvnLbJsBFnPx,spotify:track:006Ndmw2hHxvnLbJsBFnPx,https://api.spotify.com/v1/tracks/006Ndmw2hHxv...,https://api.spotify.com/v1/audio-analysis/006N...,216373,4
2,0.417,0.463,9,-8.027,1,0.03,0.762,0.0,0.223,0.627,72.568,audio_features,6xupOaBWORbDmakCdQwMRG,spotify:track:6xupOaBWORbDmakCdQwMRG,https://api.spotify.com/v1/tracks/6xupOaBWORbD...,https://api.spotify.com/v1/audio-analysis/6xup...,160240,4
3,0.471,0.337,3,-11.964,1,0.0267,0.509,6.7e-05,0.216,0.634,70.49,audio_features,2tvt5K7y1gndmCgtIoLo1f,spotify:track:2tvt5K7y1gndmCgtIoLo1f,https://api.spotify.com/v1/tracks/2tvt5K7y1gnd...,https://api.spotify.com/v1/audio-analysis/2tvt...,158520,4
4,0.461,0.466,11,-9.627,1,0.0598,0.86,2.8e-05,0.135,0.251,128.537,audio_features,1xVOttVNT27FBTD8iHjOfU,spotify:track:1xVOttVNT27FBTD8iHjOfU,https://api.spotify.com/v1/tracks/1xVOttVNT27F...,https://api.spotify.com/v1/audio-analysis/1xVO...,132027,4


---

## Audio Analysis

In [12]:
audio_analysis_json = await api.get_tracks_audio_analysis(songs_df['Track ID'], retries=3, delay=3)
len(audio_analysis_json)

1175

In [23]:
track_keys = [
    'num_samples', 'duration', 'analysis_sample_rate',
    'analysis_channels', 'end_of_fade_in', 'start_of_fade_out',
    'loudness', 'tempo', 'tempo_confidence',
    'time_signature', 'time_signature_confidence', 'key',
    'key_confidence', 'mode', 'mode_confidence'
]
audio_analysis_clean_json = []
for track in audio_analysis_json:
    clean_json = {key: track['track'].get(key) for key in track_keys if key in track['track']}
    audio_analysis_clean_json.append(clean_json)

In [31]:
audio_analysis_df = pd.DataFrame(audio_analysis_clean_json)
audio_analysis_df.head()

Unnamed: 0,num_samples,duration,analysis_sample_rate,analysis_channels,end_of_fade_in,start_of_fade_out,loudness,tempo,tempo_confidence,time_signature,time_signature_confidence,key,key_confidence,mode,mode_confidence
0,3394230,153.93333,22050,1,0.38998,146.40181,-11.528,154.596,0.621,4,0.189,0,0.594,1,0.669
1,4771032,216.37334,22050,1,0.47016,198.64091,-14.887,127.87,0.143,4,0.842,10,0.552,1,0.549
2,3533292,160.24,22050,1,0.34304,156.64761,-8.027,72.568,0.092,4,0.95,9,0.297,1,0.438
3,3495366,158.52,22050,1,1.00426,153.68707,-11.964,70.49,0.266,4,0.923,3,0.345,1,0.467
4,2911188,132.02667,22050,1,1.0327,120.33161,-9.627,128.537,0.206,4,0.13,11,0.563,1,0.458


In [39]:
combined_df = pd.concat([audio_features_df, audio_analysis_df], axis=1)

song_artist_df = pd.read_csv('../datasets/hot_100_rank_1_with_id_unique.csv')
song_artist_df = song_artist_df.rename(columns={'Track ID': 'id'})
combined_df = pd.merge(combined_df, song_artist_df, on='id', how='left')
columns = ['Song', 'Artist'] + [col for col in combined_df.columns if col not in ['Song', 'Artist']]
combined_df = combined_df[columns]

columns = ['uri'] + [col for col in combined_df.columns if col != 'uri']
combined_df = combined_df[columns]
combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]
combined_df = combined_df.drop(columns=['type'])
combined_df.head()

Unnamed: 0,uri,Song,Artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,track_href,analysis_url,duration_ms,time_signature,num_samples,duration,analysis_sample_rate,analysis_channels,end_of_fade_in,start_of_fade_out,tempo_confidence,time_signature_confidence,key_confidence,mode_confidence
0,spotify:track:5ayybTSXNwcarDtxQKqvWX,Poor Little Fool,Ricky Nelson,0.474,0.338,0,-11.528,1,0.0299,0.648,0.0,0.13,0.81,154.596,5ayybTSXNwcarDtxQKqvWX,https://api.spotify.com/v1/tracks/5ayybTSXNwca...,https://api.spotify.com/v1/audio-analysis/5ayy...,153933,4,3394230,153.93333,22050,1,0.38998,146.40181,0.621,0.189,0.594,0.669
1,spotify:track:006Ndmw2hHxvnLbJsBFnPx,Nel Blu Dipinto Di Blu (Volare),Domenico Modugno,0.518,0.06,10,-14.887,1,0.0441,0.987,8e-06,0.161,0.336,127.87,006Ndmw2hHxvnLbJsBFnPx,https://api.spotify.com/v1/tracks/006Ndmw2hHxv...,https://api.spotify.com/v1/audio-analysis/006N...,216373,4,4771032,216.37334,22050,1,0.47016,198.64091,0.143,0.842,0.552,0.549
2,spotify:track:6xupOaBWORbDmakCdQwMRG,Little Star,The Elegants,0.417,0.463,9,-8.027,1,0.03,0.762,0.0,0.223,0.627,72.568,6xupOaBWORbDmakCdQwMRG,https://api.spotify.com/v1/tracks/6xupOaBWORbD...,https://api.spotify.com/v1/audio-analysis/6xup...,160240,4,3533292,160.24,22050,1,0.34304,156.64761,0.092,0.95,0.297,0.438
3,spotify:track:2tvt5K7y1gndmCgtIoLo1f,It's All In The Game,Tommy Edwards,0.471,0.337,3,-11.964,1,0.0267,0.509,6.7e-05,0.216,0.634,70.49,2tvt5K7y1gndmCgtIoLo1f,https://api.spotify.com/v1/tracks/2tvt5K7y1gnd...,https://api.spotify.com/v1/audio-analysis/2tvt...,158520,4,3495366,158.52,22050,1,1.00426,153.68707,0.266,0.923,0.345,0.467
4,spotify:track:1xVOttVNT27FBTD8iHjOfU,It's Only Make Believe,Conway Twitty,0.461,0.466,11,-9.627,1,0.0598,0.86,2.8e-05,0.135,0.251,128.537,1xVOttVNT27FBTD8iHjOfU,https://api.spotify.com/v1/tracks/1xVOttVNT27F...,https://api.spotify.com/v1/audio-analysis/1xVO...,132027,4,2911188,132.02667,22050,1,1.0327,120.33161,0.206,0.13,0.563,0.458
