## Extracting Spotify Data

In [1]:
import pandas as pd

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
client_id = 'ID here'
client_secret = 'Key here'

In [3]:
# Authentication - without user
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [4]:
playlist_link = "https://open.spotify.com/playlist/1inW0f6Fp6qmtYo9MJSHwn" # Insert your own playlist here


playlist_URI = playlist_link.split("/")[-1].split("?")[0]
track_uris = [x["track"]["uri"] for x in sp.playlist_tracks(playlist_URI)["items"]]

In [5]:
data = {
    "Track URI": [],
    "Track Name": [],
    "Artist Name": [],
    "Artist Popularity": [],
    "Artist Genres": [],
    "Album": [],
    "Track Popularity": [],
    "Danceability": [],
    "Energy": [],
    "Key": [],
    "Loudness": [],
    "Mode": [],
    "Speechiness": [],
}


In [6]:
offset = 0
limit = 100 # In order to get all playlists, so multiple calls to the api

In [7]:
while True:
    playlist_tracks = sp.playlist_tracks(playlist_URI, offset=offset, limit=limit)

    # If there are no more tracks, break the loop
    if not playlist_tracks["items"]:
        break

    for track in playlist_tracks["items"]:
        # URI
        track_uri = track["track"]["uri"]

        # Track name
        track_name = track["track"]["name"]

        # Main Artist
        artist_uri = track["track"]["artists"][0]["uri"]
        artist_info = sp.artist(artist_uri)

        # Name, popularity, genre
        artist_name = track["track"]["artists"][0]["name"]
        artist_pop = artist_info["popularity"]
        artist_genres = artist_info["genres"]

        # Album
        album = track["track"]["album"]["name"]

        # Popularity of the track
        track_pop = track["track"]["popularity"]

        # Audio Features
        audio_features = sp.audio_features(track_uri)[0]

        # Check if audio_features is None, and skip this entry if it is
        if audio_features is None:
            audio_features = {
                "danceability": 0,
                "energy": 0,
                "key": 0,
                "loudness": 0,
                "mode": 0,
                "speechiness": 0
            }
        # Append data to lists
        data["Track URI"].append(track_uri)
        data["Track Name"].append(track_name)
        data["Artist Name"].append(artist_name)
        data["Artist Popularity"].append(artist_pop)
        data["Artist Genres"].append(artist_genres)
        data["Album"].append(album)
        data["Track Popularity"].append(track_pop)
        
        # Extract specific audio features
        data["Danceability"].append(audio_features["danceability"])
        data["Energy"].append(audio_features["energy"])
        data["Key"].append(audio_features["key"])
        data["Loudness"].append(audio_features["loudness"])
        data["Mode"].append(audio_features["mode"])
        data["Speechiness"].append(audio_features["speechiness"])

    # Increment the offset to fetch the next batch of tracks
    offset += limit

In [8]:
data["Danceability"].append(0)
data["Energy"].append(0)
data["Key"].append(0)
data["Loudness"].append(0)
data["Mode"].append(0)
data["Speechiness"].append(0)

In [10]:
list_length = len(data["Track URI"])


In [11]:
for feature_name, feature_values in data.items():
    feature_length = len(feature_values)
    print(f"Length of '{feature_name}': {feature_length}")

Length of 'Track URI': 1000
Length of 'Track Name': 1000
Length of 'Artist Name': 1000
Length of 'Artist Popularity': 1000
Length of 'Artist Genres': 1000
Length of 'Album': 1000
Length of 'Track Popularity': 1000
Length of 'Danceability': 1001
Length of 'Energy': 1001
Length of 'Key': 1001
Length of 'Loudness': 1001
Length of 'Mode': 1001
Length of 'Speechiness': 1001


In [12]:
data["Track URI"].append(0)
data["Track Name"].append(0)
data["Artist Name"].append(0)
data["Artist Popularity"].append(0)
data["Artist Genres"].append(0)
data["Album"].append(0)
data["Track Popularity"].append(0)

In [13]:
for feature_name, feature_values in data.items():
    feature_length = len(feature_values)
    print(f"Length of '{feature_name}': {feature_length}")

Length of 'Track URI': 1001
Length of 'Track Name': 1001
Length of 'Artist Name': 1001
Length of 'Artist Popularity': 1001
Length of 'Artist Genres': 1001
Length of 'Album': 1001
Length of 'Track Popularity': 1001
Length of 'Danceability': 1001
Length of 'Energy': 1001
Length of 'Key': 1001
Length of 'Loudness': 1001
Length of 'Mode': 1001
Length of 'Speechiness': 1001


In [14]:
df = pd.DataFrame(data)
df = df[(df != 0).any(axis=1)] #Drop the rows with 0

df.to_csv("Playlist_2023.csv", index=False)

In [19]:
path = "../Playlist_2023.csv"

In [20]:
df = pd.read_csv(path)
df.head(1)

Unnamed: 0,Track URI,Track Name,Artist Name,Artist Popularity,Artist Genres,Album,Track Popularity,Danceability,Energy,Key,Loudness,Mode,Speechiness
0,spotify:track:2qxmye6gAegTMjLKEBoR3d,Let Me Down Slowly,Alec Benjamin,74,"['alt z', 'pop', 'pov: indie']",Narrated For You,86,0.652,0.557,1,-5.714,0,0.0318


In [21]:
drop = ['Track Name','Track URI','Artist Name']
df.drop(drop, axis=1)

Unnamed: 0,Artist Popularity,Artist Genres,Album,Track Popularity,Danceability,Energy,Key,Loudness,Mode,Speechiness
0,74,"['alt z', 'pop', 'pov: indie']",Narrated For You,86,0.652,0.557,1,-5.714,0,0.0318
1,59,['sad lo-fi'],"Living Life, In The Night",78,0.624,0.499,0,-9.962,0,0.0495
2,49,[],The Very Very Very Strongest! (Epic Version),36,0.529,0.806,7,-6.772,1,0.0346
3,65,"['irish pop', 'singer-songwriter pop']",Overwhelmed (Ryan Mack Remix),76,0.658,0.445,0,-8.749,0,0.1540
4,62,"['anthem worship', 'bossbeat']",Cinematic Songs (Vol. 6),69,0.581,0.803,2,-4.935,0,0.1520
...,...,...,...,...,...,...,...,...,...,...
995,38,"['brostep', 'electro house']",Signals (Christian Reindl Remix),34,0.558,0.867,4,-7.193,0,0.0775
996,26,[],Music Inspired By The Witcher 3: Wild Hunt,37,0.498,0.443,10,-15.061,1,0.0334
997,48,['rebel blues'],Eye of the Storm,38,0.537,0.878,5,-7.861,1,0.0926
998,49,['bass trap'],Solo,60,0.442,0.847,10,-4.111,0,0.1070


In [None]:
# Some basic cleaning
import ast

def extract_main_genre(genres_str):
    try:
        genres_list = ast.literal_eval(genres_str)
        if genres_list:
            return genres_list[0]
        else:
            return None
    except (SyntaxError, ValueError):
        return None

df['Main Genre'] = df['Artist Genres'].apply(lambda x: extract_main_genre(x))

In [23]:
df.head()

Unnamed: 0,Track URI,Track Name,Artist Name,Artist Popularity,Artist Genres,Album,Track Popularity,Danceability,Energy,Key,Loudness,Mode,Speechiness,Main Genre
0,spotify:track:2qxmye6gAegTMjLKEBoR3d,Let Me Down Slowly,Alec Benjamin,74,"['alt z', 'pop', 'pov: indie']",Narrated For You,86,0.652,0.557,1,-5.714,0,0.0318,alt z
1,spotify:track:2kx5sQgmjYug0GiDh1a4sU,"Living Life, In The Night",Cheriimoya,59,['sad lo-fi'],"Living Life, In The Night",78,0.624,0.499,0,-9.962,0,0.0495,sad lo-fi
2,spotify:track:6KDLF8dSIrhW0snVS4KK1m,The Very Very Very Strongest! - Epic Version,Carameii,49,[],The Very Very Very Strongest! (Epic Version),36,0.529,0.806,7,-6.772,1,0.0346,
3,spotify:track:03eyNjBM2mpx28H6kdaufN,Overwhelmed (Ryan Mack Remix),Ryan Mack,65,"['irish pop', 'singer-songwriter pop']",Overwhelmed (Ryan Mack Remix),76,0.658,0.445,0,-8.749,0,0.154,irish pop
4,spotify:track:5X3T8jHNIJSXviiDmiBdz0,Enemy,Tommee Profitt,62,"['anthem worship', 'bossbeat']",Cinematic Songs (Vol. 6),69,0.581,0.803,2,-4.935,0,0.152,anthem worship
