# Getting data 3 - audio features of top100/top20 hip hop tracks

Now that I got the cleanded weekly top100 tracks for 2010-2021, I need to get the audio features of said tracks.

I got a csv with the following data:
- artist
- track title
- label
- rank
- week

<br/>

Next steps are:
- getting audio features from tracks (top100 & top20 hip hop)

In [2]:
# importing needed libraries

import pandas as pd
import numpy as np
import getpass
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [3]:
# importing data csv

top100 = pd.read_csv('../data/top100_tracks_V3.csv')

# Getting audio features

To get the audio features I will use audio_features(tracks=[]) function. To use this function I need the track ids.

In [4]:
len(top100)

59400

In [5]:
top100_remove_dups = top100.sort_values('title', ascending=False)
top100_remove_dups = top100_remove_dups.drop_duplicates(subset='title', keep='first')

len(top100_remove_dups)

5564

In [6]:
# setting passwords

client_id = getpass.getpass('client_id?')
client_secret = getpass.getpass('client_secret?')

client_id?········
client_secret?········


In [7]:
# connection to spotify API

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id ,
                                                           client_secret=client_secret))

In [21]:
# getting the id from the track title for 1 example

track = 'drei uhr nachts'
artist = 'mark forster'

track_id = sp.search(q='artist:' + artist + ' track:' + track, type='track')

#### preparing for looping to get info from all tracks

In [8]:
# removing duplicates in title

track_names = list(top100_remove_dups['title'])

In [9]:
# removing duplicates in artist

track_artists = list(top100_remove_dups['artist'])

In [10]:
# zpping both lists together to dict

track_dict = dict(zip(track_names,track_artists))

In [11]:
# looping over dict to get all track info I need from Spotify

track_id_dict = []

for key, value in track_dict.items():
    track_id_dict.append(sp.search(q='artist:' + value + ' track:' + key, type='track'))

In [150]:
# extract the id for 1 example

track_id_dict[4]['tracks']['items'][0]['id']

'7ue6QuZluV20e3lKVhsAy4'

In [119]:
# extract the id for 1 example

track_id_dict[4]['tracks']['items'][0]['name']

'Über uns ist nur der Himmel - Remastered 2017'

In [123]:
# extract the artist for 1 example

track_id_dict[4]['tracks']['items'][0]['artists'][0]['name']

'Jürgen Drews'

In [12]:
# gettin the ids for all tracks

track_ids = []
track_title = []
track_explicit = []
track_artist = []

for dic in range(len(track_id_dict)):
    for track in track_id_dict[dic]['tracks']['items']:
        track_ids.append(track['id'])
        track_title.append(track['name'])
        track_explicit.append(track['explicit'])
        track_artist.append(track['artists'][0]['name'])

In [13]:
len(track_ids), len(track_title), len(track_artist), len(track_explicit)

(17663, 17663, 17663, 17663)

In [15]:
# creating a df from track id, title and artist to add later to the audio feautres again

top100_trackid_title_artist_df = pd.DataFrame({'track_ids' : track_ids,
                                'track_title' : track_title,
                                'track_explicit' : track_explicit,
                                'track_artist' : track_artist }, 
                                columns=['track_ids','track_title','track_explicit','track_artist'])
len(top100_trackid_title_artist_df)

17663

In [16]:
# renaming the column name track_ids to id, so its matches the id from spotify

top100_trackid_title_artist_df.columns = ['id','track_title','track_explicit','track_artist']

In [17]:
# saving df as csv

top100_trackid_title_artist_df.to_csv('../data/top100_trackid_title_artist.csv')

#### now getting the auio features

In [18]:
# getting audio features for 1 track

audio_features_test = sp.audio_features('7ue6QuZluV20e3lKVhsAy4')

In [19]:
# building a function to put the song ids in track_ids into chunks
# the audio feature function can only process 50 tracks at the time

def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

In [21]:
# getting the audio features from the top100 track ids
# iterating over the top100 track list with the chunk function

top100_track_audiofeatures = []

for chunk in list(chunks(track_ids, 50)):
    top100_track_audiofeatures.append(sp.audio_features(chunk))

In [22]:
# turning the audio feature list into a df

pd.DataFrame(top100_track_audiofeatures)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,40,41,42,43,44,45,46,47,48,49
0,"{'danceability': 0.66, 'energy': 0.695, 'key':...","{'danceability': 0.366, 'energy': 0.552, 'key'...","{'danceability': 0.497, 'energy': 0.93, 'key':...","{'danceability': 0.5, 'energy': 0.954, 'key': ...","{'danceability': 0.699, 'energy': 0.579, 'key'...","{'danceability': 0.499, 'energy': 0.727, 'key'...","{'danceability': 0.385, 'energy': 0.868, 'key'...","{'danceability': 0.366, 'energy': 0.552, 'key'...","{'danceability': 0.583, 'energy': 0.829, 'key'...","{'danceability': 0.546, 'energy': 0.695, 'key'...",...,"{'danceability': 0.7, 'energy': 0.694, 'key': ...","{'danceability': 0.7, 'energy': 0.694, 'key': ...","{'danceability': 0.768, 'energy': 0.499, 'key'...","{'danceability': 0.753, 'energy': 0.406, 'key'...","{'danceability': 0.727, 'energy': 0.478, 'key'...","{'danceability': 0.798, 'energy': 0.711, 'key'...","{'danceability': 0.798, 'energy': 0.711, 'key'...","{'danceability': 0.798, 'energy': 0.711, 'key'...","{'danceability': 0.497, 'energy': 0.589, 'key'...","{'danceability': 0.489, 'energy': 0.589, 'key'..."
1,"{'danceability': 0.45, 'energy': 0.801, 'key':...","{'danceability': 0.266, 'energy': 0.755, 'key'...","{'danceability': 0.576, 'energy': 0.884, 'key'...","{'danceability': 0.443, 'energy': 0.807, 'key'...","{'danceability': 0.598, 'energy': 0.43, 'key':...","{'danceability': 0.49, 'energy': 0.761, 'key':...","{'danceability': 0.678, 'energy': 0.761, 'key'...","{'danceability': 0.727, 'energy': 0.869, 'key'...","{'danceability': 0.685, 'energy': 0.89, 'key':...","{'danceability': 0.638, 'energy': 0.896, 'key'...",...,"{'danceability': 0.793, 'energy': 0.651, 'key'...","{'danceability': 0.594, 'energy': 0.829, 'key'...","{'danceability': 0.543, 'energy': 0.948, 'key'...","{'danceability': 0.714, 'energy': 0.747, 'key'...","{'danceability': 0.452, 'energy': 0.685, 'key'...","{'danceability': 0.725, 'energy': 0.749, 'key'...","{'danceability': 0.654, 'energy': 0.872, 'key'...","{'danceability': 0.725, 'energy': 0.749, 'key'...","{'danceability': 0.725, 'energy': 0.749, 'key'...","{'danceability': 0.725, 'energy': 0.749, 'key'..."
2,"{'danceability': 0.725, 'energy': 0.749, 'key'...","{'danceability': 0.726, 'energy': 0.749, 'key'...","{'danceability': 0.726, 'energy': 0.749, 'key'...","{'danceability': 0.725, 'energy': 0.749, 'key'...","{'danceability': 0.726, 'energy': 0.749, 'key'...","{'danceability': 0.545, 'energy': 0.942, 'key'...","{'danceability': 0.665, 'energy': 0.867, 'key'...","{'danceability': 0.423, 'energy': 0.936, 'key'...","{'danceability': 0.87, 'energy': 0.544, 'key':...","{'danceability': 0.692, 'energy': 0.816, 'key'...",...,"{'danceability': 0.559, 'energy': 0.67, 'key':...","{'danceability': 0.541, 'energy': 0.186, 'key'...","{'danceability': 0.747, 'energy': 0.948, 'key'...","{'danceability': 0.631, 'energy': 0.704, 'key'...","{'danceability': 0.615, 'energy': 0.694, 'key'...","{'danceability': 0.621, 'energy': 0.729, 'key'...","{'danceability': 0.543, 'energy': 0.176, 'key'...","{'danceability': 0.628, 'energy': 0.737, 'key'...","{'danceability': 0.541, 'energy': 0.186, 'key'...","{'danceability': 0.603, 'energy': 0.845, 'key'..."
3,"{'danceability': 0.623, 'energy': 0.914, 'key'...","{'danceability': 0.63, 'energy': 0.706, 'key':...","{'danceability': 0.649, 'energy': 0.925, 'key'...","{'danceability': 0.627, 'energy': 0.722, 'key'...","{'danceability': 0.813, 'energy': 0.678, 'key'...","{'danceability': 0.617, 'energy': 0.846, 'key'...","{'danceability': 0.582, 'energy': 0.719, 'key'...","{'danceability': 0.832, 'energy': 0.409, 'key'...","{'danceability': 0.499, 'energy': 0.724, 'key'...","{'danceability': 0.61, 'energy': 0.653, 'key':...",...,"{'danceability': 0.521, 'energy': 0.946, 'key'...","{'danceability': 0.628, 'energy': 0.853, 'key'...","{'danceability': 0.397, 'energy': 0.784, 'key'...","{'danceability': 0.596, 'energy': 0.814, 'key'...","{'danceability': 0.607, 'energy': 0.589, 'key'...","{'danceability': 0.606, 'energy': 0.854, 'key'...","{'danceability': 0.715, 'energy': 0.655, 'key'...","{'danceability': 0.553, 'energy': 0.652, 'key'...","{'danceability': 0.715, 'energy': 0.655, 'key'...","{'danceability': 0.553, 'energy': 0.652, 'key'..."
4,"{'danceability': 0.799, 'energy': 0.539, 'key'...","{'danceability': 0.799, 'energy': 0.539, 'key'...","{'danceability': 0.799, 'energy': 0.539, 'key'...","{'danceability': 0.795, 'energy': 0.56, 'key':...","{'danceability': 0.802, 'energy': 0.546, 'key'...","{'danceability': 0.798, 'energy': 0.564, 'key'...","{'danceability': 0.799, 'energy': 0.538, 'key'...","{'danceability': 0.579, 'energy': 0.865, 'key'...","{'danceability': 0.317, 'energy': 0.636, 'key'...","{'danceability': 0.395, 'energy': 0.265, 'key'...",...,"{'danceability': 0.687, 'energy': 0.688, 'key'...","{'danceability': 0.591, 'energy': 0.585, 'key'...","{'danceability': 0.663, 'energy': 0.791, 'key'...","{'danceability': 0.386, 'energy': 0.529, 'key'...","{'danceability': 0.23, 'energy': 0.357, 'key':...","{'danceability': 0.236, 'energy': 0.352, 'key'...","{'danceability': 0.102, 'energy': 0.531, 'key'...","{'danceability': 0.255, 'energy': 0.364, 'key'...","{'danceability': 0.234, 'energy': 0.334, 'key'...","{'danceability': 0.771, 'energy': 0.671, 'key'..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349,"{'danceability': 0.538, 'energy': 0.549, 'key'...","{'danceability': 0.531, 'energy': 0.637, 'key'...","{'danceability': 0.747, 'energy': 0.705, 'key'...","{'danceability': 0.587, 'energy': 0.732, 'key'...","{'danceability': 0.747, 'energy': 0.705, 'key'...","{'danceability': 0.59, 'energy': 0.709, 'key':...","{'danceability': 0.765, 'energy': 0.473, 'key'...","{'danceability': 0.718, 'energy': 0.544, 'key'...","{'danceability': 0.75, 'energy': 0.477, 'key':...","{'danceability': 0.765, 'energy': 0.473, 'key'...",...,"{'danceability': 0.809, 'energy': 0.62, 'key':...","{'danceability': 0.71, 'energy': 0.85, 'key': ...","{'danceability': 0.71, 'energy': 0.85, 'key': ...","{'danceability': 0.697, 'energy': 0.649, 'key'...","{'danceability': 0.876, 'energy': 0.902, 'key'...","{'danceability': 0.677, 'energy': 0.726, 'key'...","{'danceability': 0.523, 'energy': 0.742, 'key'...","{'danceability': 0.528, 'energy': 0.746, 'key'...","{'danceability': 0.395, 'energy': 0.845, 'key'...","{'danceability': 0.406, 'energy': 0.838, 'key'..."
350,"{'danceability': 0.754, 'energy': 0.704, 'key'...","{'danceability': 0.36, 'energy': 0.49, 'key': ...","{'danceability': 0.66, 'energy': 0.458, 'key':...","{'danceability': 0.541, 'energy': 0.518, 'key'...","{'danceability': 0.652, 'energy': 0.86, 'key':...","{'danceability': 0.652, 'energy': 0.86, 'key':...","{'danceability': 0.63, 'energy': 0.694, 'key':...","{'danceability': 0.746, 'energy': 0.587, 'key'...","{'danceability': 0.746, 'energy': 0.587, 'key'...","{'danceability': 0.784, 'energy': 0.313, 'key'...",...,"{'danceability': 0.5, 'energy': 0.647, 'key': ...","{'danceability': 0.639, 'energy': 0.735, 'key'...","{'danceability': 0.484, 'energy': 0.673, 'key'...","{'danceability': 0.665, 'energy': 0.792, 'key'...","{'danceability': 0.354, 'energy': 0.307, 'key'...","{'danceability': 0.643, 'energy': 0.851, 'key'...","{'danceability': 0.856, 'energy': 0.54, 'key':...","{'danceability': 0.756, 'energy': 0.978, 'key'...","{'danceability': 0.471, 'energy': 0.853, 'key'...","{'danceability': 0.81, 'energy': 0.552, 'key':..."
351,"{'danceability': 0.731, 'energy': 0.771, 'key'...","{'danceability': 0.818, 'energy': 0.803, 'key'...","{'danceability': 0.763, 'energy': 0.708, 'key'...","{'danceability': 0.809, 'energy': 0.807, 'key'...","{'danceability': 0.818, 'energy': 0.804, 'key'...","{'danceability': 0.818, 'energy': 0.804, 'key'...","{'danceability': 0.818, 'energy': 0.804, 'key'...","{'danceability': 0.818, 'energy': 0.804, 'key'...","{'danceability': 0.818, 'energy': 0.804, 'key'...","{'danceability': 0.564, 'energy': 0.745, 'key'...",...,"{'danceability': 0.625, 'energy': 0.691, 'key'...","{'danceability': 0.625, 'energy': 0.691, 'key'...","{'danceability': 0.864, 'energy': 0.826, 'key'...","{'danceability': 0.874, 'energy': 0.717, 'key'...","{'danceability': 0.468, 'energy': 0.178, 'key'...","{'danceability': 0.881, 'energy': 0.562, 'key'...","{'danceability': 0.864, 'energy': 0.826, 'key'...","{'danceability': 0.322, 'energy': 0.814, 'key'...","{'danceability': 0.881, 'energy': 0.562, 'key'...","{'danceability': 0.684, 'energy': 0.836, 'key'..."
352,"{'danceability': 0.881, 'energy': 0.562, 'key'...","{'danceability': 0.881, 'energy': 0.562, 'key'...","{'danceability': 0.691, 'energy': 0.691, 'key'...","{'danceability': 0.577, 'energy': 0.756, 'key'...","{'danceability': 0.787, 'energy': 0.818, 'key'...","{'danceability': 0.562, 'energy': 0.761, 'key'...","{'danceability': 0.525, 'energy': 0.323, 'key'...","{'danceability': 0.635, 'energy': 0.319, 'key'...","{'danceability': 0.462, 'energy': 0.695, 'key'...","{'danceability': 0.495, 'energy': 0.702, 'key'...",...,"{'danceability': 0.741, 'energy': 0.347, 'key'...","{'danceability': 0.582, 'energy': 0.835, 'key'...","{'danceability': 0.606, 'energy': 0.814, 'key'...","{'danceability': 0.489, 'energy': 0.76, 'key':...","{'danceability': 0.555, 'energy': 0.745, 'key'...","{'danceability': 0.56, 'energy': 0.765, 'key':...","{'danceability': 0.56, 'energy': 0.757, 'key':...","{'danceability': 0.56, 'energy': 0.765, 'key':...","{'danceability': 0.457, 'energy': 0.877, 'key'...","{'danceability': 0.545, 'energy': 0.953, 'key'..."


In [23]:
# we need to unpack those dicts

top100_track_audiofeatures_df = pd.DataFrame()

for i in range(len(top100_track_audiofeatures)):
    try:
        top100_track_audiofeatures_df = top100_track_audiofeatures_df.append(pd.DataFrame(top100_track_audiofeatures[i]))
    except:
        continue
        
top100_track_audiofeatures_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,0.66,0.695,9,-6.896,0,0.029,0.0466,0.145,0.0854,0.506,129.972,audio_features,3Vn9oCZbdI1EMO7jxdz2Rc,spotify:track:3Vn9oCZbdI1EMO7jxdz2Rc,https://api.spotify.com/v1/tracks/3Vn9oCZbdI1E...,https://api.spotify.com/v1/audio-analysis/3Vn9...,382387,4
1,0.366,0.552,6,-8.35,1,0.0317,0.206,0.0595,0.178,0.114,97.98,audio_features,0Fao855T3klV3REFRFHRF3,spotify:track:0Fao855T3klV3REFRFHRF3,https://api.spotify.com/v1/tracks/0Fao855T3klV...,https://api.spotify.com/v1/audio-analysis/0Fao...,597933,4
2,0.497,0.93,5,-7.024,1,0.0652,0.109,0.0756,0.327,0.403,120.007,audio_features,42bQZnfDGmejgB5elWY3QA,spotify:track:42bQZnfDGmejgB5elWY3QA,https://api.spotify.com/v1/tracks/42bQZnfDGmej...,https://api.spotify.com/v1/audio-analysis/42bQ...,347280,4
3,0.5,0.954,5,-4.514,1,0.115,0.00409,0.0578,0.0947,0.466,132.04,audio_features,0v60rM2HLSatgUyVZDrLwm,spotify:track:0v60rM2HLSatgUyVZDrLwm,https://api.spotify.com/v1/tracks/0v60rM2HLSat...,https://api.spotify.com/v1/audio-analysis/0v60...,292627,4
4,0.699,0.579,10,-7.633,0,0.0536,0.28,0.000222,0.285,0.339,137.059,audio_features,6N9PAf91qP6aJIzT8bVoof,spotify:track:6N9PAf91qP6aJIzT8bVoof,https://api.spotify.com/v1/tracks/6N9PAf91qP6a...,https://api.spotify.com/v1/audio-analysis/6N9P...,291947,4


In [24]:
len(top100_track_audiofeatures_df), len(top100_trackid_title_artist_df)

(17513, 17663)

In [None]:
# add columns to df:
# genre
# artist name
# track title

In [25]:
top100_trackid_title_artist_df.columns

Index(['id', 'track_title', 'track_explicit', 'track_artist'], dtype='object')

In [26]:
top100_track_audiofeatures_df_merged = pd.merge(top100_trackid_title_artist_df, top100_track_audiofeatures_df, how='left', on=['id'])
top100_track_audiofeatures_df_merged.tail()

Unnamed: 0,id,track_title,track_explicit,track_artist,danceability,energy,key,loudness,mode,speechiness,...,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature
17730,27QQvaTZKDprt8HcQzVXJU,#SELFIE,False,The Chainsmokers,0.805,0.892,0.0,-3.295,1.0,0.244,...,1.7e-05,0.06,0.718,127.995,audio_features,spotify:track:27QQvaTZKDprt8HcQzVXJU,https://api.spotify.com/v1/tracks/27QQvaTZKDpr...,https://api.spotify.com/v1/audio-analysis/27QQ...,184880.0,4.0
17731,6slV7KFNT8BC784wnRWwAB,#SELFIE,False,The Chainsmokers,0.789,0.863,5.0,-4.47,1.0,0.252,...,0.0002,0.414,0.615,127.984,audio_features,spotify:track:6slV7KFNT8BC784wnRWwAB,https://api.spotify.com/v1/tracks/6slV7KFNT8BC...,https://api.spotify.com/v1/audio-analysis/6slV...,188907.0,4.0
17732,2zaCP2x8Ok3cLAQkxcFBMi,#SELFIE,False,The Chainsmokers,0.805,0.892,0.0,-3.295,1.0,0.244,...,1.7e-05,0.06,0.718,127.995,audio_features,spotify:track:2zaCP2x8Ok3cLAQkxcFBMi,https://api.spotify.com/v1/tracks/2zaCP2x8Ok3c...,https://api.spotify.com/v1/audio-analysis/2zaC...,184880.0,4.0
17733,4mI0sGmXHviRF9QbY4bfq7,#niemalsantäuschen,True,Farid Bang,0.886,0.596,1.0,-4.926,1.0,0.0619,...,0.0,0.0637,0.474,135.967,audio_features,spotify:track:4mI0sGmXHviRF9QbY4bfq7,https://api.spotify.com/v1/tracks/4mI0sGmXHviR...,https://api.spotify.com/v1/audio-analysis/4mI0...,197347.0,4.0
17734,0hzcLCW6F1F0jauTFAhOQP,#niemalsantäuschen,True,Farid Bang,0.886,0.596,1.0,-4.926,1.0,0.0619,...,0.0,0.0637,0.474,135.967,audio_features,spotify:track:0hzcLCW6F1F0jauTFAhOQP,https://api.spotify.com/v1/tracks/0hzcLCW6F1F0...,https://api.spotify.com/v1/audio-analysis/0hzc...,197347.0,4.0


In [27]:
len(top100_track_audiofeatures_df_merged)

17735

In [28]:
# saving df as csv

top100_track_audiofeatures_df_merged.to_csv('../data/top100_audio_features.csv')