# Lab - API Wrappers

Creating a collection of songs with their audio features - as large as possible.

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from pandas import json_normalize
import pandas as pd
from random import randint
from time import sleep

In [2]:
secrets_file = open("C:\\Users\\Acer\Desktop\\Ironhack Berlin\\ZAJECIA\\week6\\secrets.txt","r") 

string = secrets_file.read()

secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0:
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

In [3]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# initializing SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['clientid'],
                                                           client_secret=secrets_dict['clientsecret']))

In [4]:
### FUNCTIONS 


def get_spotify_playlist_data(playlist_url):
    # function to fetch tracks from Spotify playlist
    def fetch_playlist_tracks(playlist_id):
        results = sp.user_playlist_tracks("spotify", playlist_id)
        tracks = results['items']
        while results['next'] is not None:
            results = sp.next(results)
            tracks.extend(results['items'])
            sleep(randint(1, 3000) / 1000)  # Adding a delay to avoid rate limiting
        return tracks

    # fetching all tracks from the specified URL
    all_playlist_tracks = fetch_playlist_tracks(playlist_url)

    # creating DataFrame for tracks
    tracks_df = pd.json_normalize(all_playlist_tracks)

    # function to expand artist information
    def expand_artists_info(row):
        artists_info_df = pd.json_normalize(row['track.artists'])
        artists_info_df['song_id'] = row['track.id']
        return artists_info_df

    # creating a new column artists_info
    tracks_df['artists_info'] = tracks_df.apply(expand_artists_info, axis=1)

    # combining the artists_info DataFrames into one
    artists_df = pd.concat(tracks_df['artists_info'].tolist(), ignore_index=True)

    # merging main DataFrame with the artists DataFrame
    merged_df = pd.merge(left=tracks_df,
                         right=artists_df,
                         how='inner',
                         left_on='track.id',
                         right_on='song_id')

    # selecting final columns for the DataFrame
    final_df = merged_df[['track.name', 'name', 'song_id']]

    # splitting the DataFrame into chunks of 100 for audio features retrieval
    chunks = [(i, i + 100) for i in range(0, len(final_df), 100)]

    # initializing a list to store audio features
    audio_features_list = []
    for chunk in chunks:
        id_list100 = final_df['song_id'][chunk[0]:chunk[1]]
        audio_features_list.extend(sp.audio_features(id_list100))
        sleep(randint(1, 3000) / 1000)  # Adding a delay to avoid rate limiting

    # creating DataFrame for audio features
    audio_features_df = pd.json_normalize(audio_features_list)

    # Merging the final DataFrame with audio features
    result_df = pd.merge(left=final_df,
                         right=audio_features_df,
                         how='inner',
                         left_on='song_id',
                         right_on='id')

    return result_df


######


def process_dataframes(*dataframes):
    # concatenating dataframes if there are two or more
    if len(dataframes) > 1:
        df_concatenated = pd.concat(dataframes, ignore_index=True)
    else:
        df_concatenated = dataframes[0]

    # dropping unnecessary columns
    df_concatenated = df_concatenated.drop(columns=['type', 'analysis_url', 'id', 'uri', 'track_href'])

    # removing duplicates based on song_id
    df_concatenated = df_concatenated.drop_duplicates(subset=['song_id', 'name'])

    # creating artist column 
    df_concatenated['artist'] = df_concatenated.groupby('song_id')['name'].transform(lambda x: ', '.join(x))

    # removing duplicates based on song_id
    df_no_duplicates = df_concatenated.drop_duplicates(subset='song_id', keep='first')

    # dropping the original name column
    df_no_duplicates = df_no_duplicates.drop(columns=['name'])

    # resetting index for the final dataframe
    df_no_duplicates = df_no_duplicates.reset_index(drop=True)

    # defining new order
    new_order = ['song_id', 'track.name', 'artist', 'danceability', 'energy', 'key',
                 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness',
                 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']

    df_no_duplicates = df_no_duplicates[new_order]

    # renaming track.name column
    df_no_duplicates = df_no_duplicates.rename(columns={'track.name': 'song_name'})

    return df_no_duplicates


######


def merge_and_remove_duplicates(df1, df2):
    # concatenating dfs
    df_combined = pd.concat([df1, df2], ignore_index=True)

    # removing duplicates based on song_id
    df_no_duplicates = df_combined.drop_duplicates(subset='song_id', keep='first')

    # resetting index
    df_no_duplicates = df_no_duplicates.reset_index(drop=True)

    return df_no_duplicates


In [5]:
# first playlist
df1 = get_spotify_playlist_data("5xqpyfZyS1DVysoevdVyEn")

print(df1.shape)
df1.head(3)

(16749, 21)


Unnamed: 0,track.name,name,song_id,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,If I Ever Feel Better,Phoenix,3AA8xNhDC0MpqwkGX3EP5V,0.755,0.553,6,-9.449,0,0.107,0.0492,...,0.0646,0.836,119.7,audio_features,3AA8xNhDC0MpqwkGX3EP5V,spotify:track:3AA8xNhDC0MpqwkGX3EP5V,https://api.spotify.com/v1/tracks/3AA8xNhDC0Mp...,https://api.spotify.com/v1/audio-analysis/3AA8...,266600,4
1,Dance To The Underground,Radio 4,2i8lRhmZNKaM1ypHwRybuD,0.667,0.667,7,-6.267,1,0.0388,0.00013,...,0.0711,0.843,130.321,audio_features,2i8lRhmZNKaM1ypHwRybuD,spotify:track:2i8lRhmZNKaM1ypHwRybuD,https://api.spotify.com/v1/tracks/2i8lRhmZNKaM...,https://api.spotify.com/v1/audio-analysis/2i8l...,243213,4
2,How We Do,Mount Sims,4flxgPC0426CEeSrzQIic0,0.826,0.601,10,-3.966,0,0.0515,0.195,...,0.07,0.948,126.971,audio_features,4flxgPC0426CEeSrzQIic0,spotify:track:4flxgPC0426CEeSrzQIic0,https://api.spotify.com/v1/tracks/4flxgPC0426C...,https://api.spotify.com/v1/audio-analysis/4flx...,234000,4


In [6]:
# second playlist
df2 = get_spotify_playlist_data("1QXdIdYRAkzwo3UsEYoTAs")

print(df2.shape)
df2.head(3)

(11863, 21)


Unnamed: 0,track.name,name,song_id,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,Cigarette Ahegao,Penelope Scott,6UYJYgY8VRo6mKrhgTe1CF,0.651,0.286,5,-13.399,0,0.0365,0.095,...,0.101,0.165,83.007,audio_features,6UYJYgY8VRo6mKrhgTe1CF,spotify:track:6UYJYgY8VRo6mKrhgTe1CF,https://api.spotify.com/v1/tracks/6UYJYgY8VRo6...,https://api.spotify.com/v1/audio-analysis/6UYJ...,183992,4
1,Lotta True Crime,Penelope Scott,6Dqs6JddZc16yl27MDzIWY,0.519,0.413,8,-14.462,1,0.21,0.331,...,0.106,0.404,159.398,audio_features,6Dqs6JddZc16yl27MDzIWY,spotify:track:6Dqs6JddZc16yl27MDzIWY,https://api.spotify.com/v1/tracks/6Dqs6JddZc16...,https://api.spotify.com/v1/audio-analysis/6Dqs...,202320,4
2,American Healthcare (Glitzy),Penelope Scott,3GNTTTs9CVmr8RTrilIBOS,0.548,0.508,5,-11.994,1,0.0939,0.531,...,0.211,0.301,119.434,audio_features,3GNTTTs9CVmr8RTrilIBOS,spotify:track:3GNTTTs9CVmr8RTrilIBOS,https://api.spotify.com/v1/tracks/3GNTTTs9CVmr...,https://api.spotify.com/v1/audio-analysis/3GNT...,269752,4


In [7]:
final_dataframe = process_dataframes(df1, df2)

In [8]:
final_dataframe.shape

(18845, 16)

In [9]:
final_dataframe.head(10)

Unnamed: 0,song_id,song_name,artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,3AA8xNhDC0MpqwkGX3EP5V,If I Ever Feel Better,Phoenix,0.755,0.553,6,-9.449,0,0.107,0.0492,0.00607,0.0646,0.836,119.7,266600,4
1,2i8lRhmZNKaM1ypHwRybuD,Dance To The Underground,Radio 4,0.667,0.667,7,-6.267,1,0.0388,0.00013,0.00385,0.0711,0.843,130.321,243213,4
2,4flxgPC0426CEeSrzQIic0,How We Do,Mount Sims,0.826,0.601,10,-3.966,0,0.0515,0.195,3e-06,0.07,0.948,126.971,234000,4
3,1XHFob24QklIXtLRopKirJ,Fuck the Pain Away,Peaches,0.905,0.945,4,-2.63,1,0.137,0.0354,0.00529,0.0707,0.944,131.597,250693,4
4,4JB847zlgViLq8tJIzRsZy,Street Surgery 2,Zongamin,0.797,0.536,11,-8.267,0,0.266,0.0315,2.9e-05,0.17,0.704,102.881,224800,4
5,3vyKSb9sAdXl0kQ1KnS9fY,Emerge,Fischerspooner,0.607,0.734,10,-7.622,1,0.0445,0.0906,0.812,0.0925,0.569,147.941,288307,4
6,3iDK8BAaBUatPR84gdfa9g,Such Great Heights,The Postal Service,0.653,0.818,5,-8.125,1,0.047,0.00907,0.23,0.0703,0.198,174.984,266347,4
7,5Bf5C7RkyXTqE1h3Q8KZJ7,The District Sleeps Alone Tonight,The Postal Service,0.621,0.775,0,-7.662,1,0.0513,0.129,0.0106,0.115,0.216,153.963,284293,4
8,0LEsGGRUrZZhLPFoxeioHr,Sleepin In,The Postal Service,0.715,0.635,0,-6.345,1,0.0384,0.234,0.000542,0.157,0.679,128.997,261253,4
9,6LtIW2Y5NbZnMlUJbiviQ6,Danger! High Voltage - Soulchild Radio Mix,Electric Six,0.66,0.698,11,-4.722,0,0.0302,1.9e-05,0.166,0.0517,0.511,123.005,214600,4


In [10]:
# third playlist
df3 = get_spotify_playlist_data("4rnleEAOdmFAbRcNCgZMpY")

print(df3.shape)
df3.head(3)

(14771, 21)


Unnamed: 0,track.name,name,song_id,danceability,energy,key,loudness,mode,speechiness,acousticness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,Take Me To Church,Hozier,7dS5EaCoMnN7DzlpT6aRn2,0.566,0.664,4,-5.303,0,0.0464,0.634,...,0.116,0.437,128.945,audio_features,7dS5EaCoMnN7DzlpT6aRn2,spotify:track:7dS5EaCoMnN7DzlpT6aRn2,https://api.spotify.com/v1/tracks/7dS5EaCoMnN7...,https://api.spotify.com/v1/audio-analysis/7dS5...,241688,4
1,Cooler Than Me - Single Mix,Mike Posner,2V4bv1fNWfTcyRJKmej6Sj,0.768,0.82,7,-4.63,0,0.0474,0.179,...,0.689,0.625,129.965,audio_features,2V4bv1fNWfTcyRJKmej6Sj,spotify:track:2V4bv1fNWfTcyRJKmej6Sj,https://api.spotify.com/v1/tracks/2V4bv1fNWfTc...,https://api.spotify.com/v1/audio-analysis/2V4b...,213293,4
2,Cooler Than Me - Single Mix,Mike Posner,2V4bv1fNWfTcyRJKmej6Sj,0.768,0.82,7,-4.63,0,0.0474,0.179,...,0.689,0.625,129.965,audio_features,2V4bv1fNWfTcyRJKmej6Sj,spotify:track:2V4bv1fNWfTcyRJKmej6Sj,https://api.spotify.com/v1/tracks/2V4bv1fNWfTc...,https://api.spotify.com/v1/audio-analysis/2V4b...,213293,4


In [11]:
df_processed3 = process_dataframes(df3)

In [12]:
final_dataframe = merge_and_remove_duplicates(final_dataframe, df_processed3)

final_dataframe.shape

(23766, 16)

In [13]:
final_dataframe.head(10)

Unnamed: 0,song_id,song_name,artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,3AA8xNhDC0MpqwkGX3EP5V,If I Ever Feel Better,Phoenix,0.755,0.553,6,-9.449,0,0.107,0.0492,0.00607,0.0646,0.836,119.7,266600,4
1,2i8lRhmZNKaM1ypHwRybuD,Dance To The Underground,Radio 4,0.667,0.667,7,-6.267,1,0.0388,0.00013,0.00385,0.0711,0.843,130.321,243213,4
2,4flxgPC0426CEeSrzQIic0,How We Do,Mount Sims,0.826,0.601,10,-3.966,0,0.0515,0.195,3e-06,0.07,0.948,126.971,234000,4
3,1XHFob24QklIXtLRopKirJ,Fuck the Pain Away,Peaches,0.905,0.945,4,-2.63,1,0.137,0.0354,0.00529,0.0707,0.944,131.597,250693,4
4,4JB847zlgViLq8tJIzRsZy,Street Surgery 2,Zongamin,0.797,0.536,11,-8.267,0,0.266,0.0315,2.9e-05,0.17,0.704,102.881,224800,4
5,3vyKSb9sAdXl0kQ1KnS9fY,Emerge,Fischerspooner,0.607,0.734,10,-7.622,1,0.0445,0.0906,0.812,0.0925,0.569,147.941,288307,4
6,3iDK8BAaBUatPR84gdfa9g,Such Great Heights,The Postal Service,0.653,0.818,5,-8.125,1,0.047,0.00907,0.23,0.0703,0.198,174.984,266347,4
7,5Bf5C7RkyXTqE1h3Q8KZJ7,The District Sleeps Alone Tonight,The Postal Service,0.621,0.775,0,-7.662,1,0.0513,0.129,0.0106,0.115,0.216,153.963,284293,4
8,0LEsGGRUrZZhLPFoxeioHr,Sleepin In,The Postal Service,0.715,0.635,0,-6.345,1,0.0384,0.234,0.000542,0.157,0.679,128.997,261253,4
9,6LtIW2Y5NbZnMlUJbiviQ6,Danger! High Voltage - Soulchild Radio Mix,Electric Six,0.66,0.698,11,-4.722,0,0.0302,1.9e-05,0.166,0.0517,0.511,123.005,214600,4


In [14]:
final_dataframe.to_csv('biggest_playlist.csv', index=False)