# An Introduction to Spotipy

This notebook serves as supplementary material for my medium article called "How to Create Large Music Datasets in 3 Steps Using Spotipy".

## 1. Imports and Authorization

Imports

In [1]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import spotipy.util as util
import pandas as pd # for later

Authorization

In [2]:
CLIENT_ID = "afca96ce23be42ae896a2de3d4009146" # enter your own here
CLIENT_SECRET = "a269e7a530fe421d8648221efb15f201" # enter your own here

In [3]:

client_credentials_manager = SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

## 2. Find Playlist to Analyze

In [4]:
playlist_creator = "spotify"
playlist_id = "4mHW0hBLWGaqnq77Ps5dr0"

## 3. Analyze Playlist Tracks

### 3.1 Analyze a Single Playlist

Define the analysis function.

In [5]:
def analyze_playlist(creator, playlist_id):
    
    # Create empty dataframe
    playlist_features_list = ["track_name","artist", "album", "track_id",
                             "danceability", "energy", "key", "loudness", "mode", "speechiness",
                             "instrumentalness", "liveness", "valence", "tempo", "duration_ms", "time_signature"]
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    # Create empty dict
    playlist_features = {}
    
    # Loop through every track in the playlist, extract features and append the features to the playlist df
    playlist = sp.user_playlist_tracks(creator, playlist_id)["items"]
    for track in playlist:
        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
        
    return playlist_df

In [6]:
playlist_df = analyze_playlist(playlist_creator, playlist_id)

In [7]:
playlist_df.head(100)

Unnamed: 0,track_name,artist,album,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,"Aankh Marey (From ""Simmba"")",Various Artists,"Aankh Marey (From ""Simmba"")",63MvWd6T6yoS7h4AJ4Hjrm,0.796,0.978,5,-1.266,0,0.0955,0.002540,0.3460,0.800,129.999,212741,4
1,"Ghungroo (From ""War"")",Arijit Singh,"Ghungroo (From ""War"")",0WdbnNKO0Jt4BZACSDQh44,0.695,0.727,6,-6.274,0,0.0323,0.000175,0.5330,0.864,118.034,302935,4
2,"Illegal Weapon 2.0 (From ""Street Dancer 3D"")",Various Artists,"Illegal Weapon 2.0 (From ""Street Dancer 3D"")",5fXslGZPI5Cco6PKHzlSL3,0.805,0.919,1,-1.294,1,0.0938,0.003430,0.0598,0.494,94.993,188606,4
3,"Muqabla (From ""Street Dancer 3D"")",Yash Narvekar,"Muqabla (From ""Street Dancer 3D"")",28veUNu4veN0LOBVa0nFw8,0.804,0.913,0,-3.135,1,0.0933,0.000028,0.3910,0.544,119.993,176125,4
4,Sauda Khara Khara,Various Artists,"Sauda Khara Khara (From ""Good Newwz"")",4LLvxxkWtt818FNO3cbsdo,0.687,0.919,10,-3.280,1,0.0635,0.000004,0.1050,0.877,145.037,211034,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,"Lift Teri Bandh Hai (From ""Judwaa 2"")",Anu Malik,"Oonchi Hai Building 2.0 (From ""Judwaa 2"")",3ElJ4RSdrweHUb1jsPEOF8,0.829,0.760,1,-5.552,1,0.1060,0.000010,0.3360,0.773,107.010,204550,4
96,The Wakhra Song,Various Artists,"The Wakhra Song (From ""Judgementall Hai Kya"")",6N6mJlsjYRV0yoMSPS5JiQ,0.788,0.894,5,-2.661,0,0.1980,0.000253,0.0649,0.588,89.945,196000,4
97,Dilbar,Various Artists,Satyameva Jayate,0Ms1V2flsPzr2bVqImelCB,0.725,0.912,9,-3.665,0,0.0851,0.000077,0.1070,0.674,104.054,184433,4
98,The Jawaani Song,R. D. Burman,The Jawaani Song (Student of the Year 2),5beP0jhGEjYjd485eBczvZ,0.715,0.799,7,-4.897,1,0.0794,0.000000,0.0651,0.851,145.021,253000,4


In [None]:
playlist_df.to_csv("filename.csv", index = False)

### 3.2 Analyze Multiple Playlists

In [8]:
playlist_dict = {
    "warm_fuzzy_" : ("spotify", "37i9dQZF1DX5IDTimEWoTd"), 
    "love_songs" : ("indiemono", "5KbTzqKBqxQRD8OBtJTZrS"),
    "romance_songs" : ("Susan Doles", "7sAUK3XK8NHH1s5vGcTBkF"),
    "bollywood_songs" : ("spotify", "4mHW0hBLWGaqnq77Ps5dr0"),
    "sad_songs" : ("spotify", "37i9dQZF1DX8xfQRRX1PDm"),
    "ar_rahman" :("spotify", "7B1y9FLBW2ksYziqrrtBxo"),
    "sukh" :("spotify", "3qZOyF7ChQdvjmU6qiwMNu"),
    "sonu": ("spotify", "37i9dQZF1E4EdUG3VYVONS"),
    "arjit": ("spotify", "5NiKo8Js2fBlzIdtLz2TN2"),
    "shreya":("spotify", "37i9dQZF1DXc0BeAqkMlJ0"),
    "2000s" :("spotify", "5vPQOVcCzEuHkrkmNTmPfI"),
    "2010": ("spotify", "3hTXmx14jU1FkOx8CAfeth"),
    "2018": ("spotify", "49g8Aqji0Lw9rTeI5Av4Va"),
    "2020":("spotify", "5g8vHznbv4PXihJDyWjMAo"),
    "shankar": ("spotify","37i9dQZF1DWYztMONFqfvX"),
    "amit": ("spotify","37i9dQZF1E4Fb8vTae2C4x"),
    "rock": ("spotify", "37i9dQZF1DZ06evO4yULNR"),
    "kk":("spotify", "37i9dQZF1DZ06evO2uagOI"),
    "himesh" :("spotify", "37i9dQZF1DZ06evO0cTZQZ"),
    "sonu": ("spotify", "37i9dQZF1DXcQT2feNAtRw"),
    "neha": ("spotify", "37i9dQZF1DX0F7Z7ZkOInS"),
    "rap":("spotify", "3IHyDX6NyD5ytThYKHIE2s"),
    "wed": ("spotify","1o1RakJXDw6WtHNmaCLn4e"),
    "indian": ("spotify","2VHdhD77ZP0A3GuJ1MVEfQ"),
    "romance": ("spotify", "09nWEs2d4zbyjGXMaA8oiQ"),
    "top eng":("spotify", "16pwrq517Pxka7tzjOgcbv"),
    "pritam":("spotify", "37i9dQZF1DX654XADbS8bL"),
    "pop" :("spotify", "5KRBZT2yL1PclwlhEYiMzM"),
    "rock": ("spotify", "6QL5elniCSP080758ZVLZi"),
    "fusion": ("spotify", "4ZdnhaXYwqfaM9mFCWFpgy"),
    "top 100":("spotify", "5ABHKGoOzxkaa28ttQV9sE"),
    "2019": ("spotify","28L4AgPkL6b98m8aO16tTG"),
    "2020": ("spotify","4lln8bUrz3GfQo9i5bDiXh"),
    "top 50": ("spotify", "37i9dQZEVXbMDoHDwVN2tF")
    
    
    
}

In [10]:
def analyze_playlist_dict(playlist_dict):
    
    # Loop through every playlist in the dict and analyze it
    for i, (key, val) in enumerate(playlist_dict.items()):
        playlist_df = analyze_playlist(*val)
        # Add a playlist column so that we can see which playlist a track belongs too
        playlist_df["playlist"] = key
        # Create or concat df
        if i == 0:
            playlist_dict_df = playlist_df
        else:
            playlist_dict_df = pd.concat([playlist_dict_df, playlist_df], ignore_index = True)
            
    return playlist_dict_df

In [None]:
multiple_playlist_df = analyze_playlist_dict(playlist_dict)

In [None]:
multiple_playlist_df.head()

In [None]:
multiple_playlist_df["playlist"].value_counts()

In [113]:
multiple_playlist_df.to_csv("filename.csv", index = False)