# An Introduction to Spotipy

This notebook serves as supplementary material for my medium article called "How to Create Large Music Datasets in 3 Steps Using Spotipy".

## 1. Imports and Authorization

Imports

In [12]:
import spotipy
import spotipy.util as util
import pandas as pd # for later

Authorization

In [13]:
CLIENT_ID = "ClientID" # enter your own here
CLIENT_SECRET = "SecretClientID" # enter your own here

In [14]:
token = util.oauth2.SpotifyClientCredentials(client_id=CLIENT_ID, client_secret=CLIENT_SECRET)
cache_token = token.get_access_token()
sp = spotipy.Spotify(cache_token)

## 2. Find Playlist to Analyze

In [15]:
playlist_creator = "spotify"
playlist_id = "37i9dQZF1DX5IDTimEWoTd"

## 3. Analyze Playlist Tracks

### 3.1 Analyze a Single Playlist

Use the sp.user_playlist_tracks function.

In [16]:
sp.user_playlist_tracks(playlist_creator, playlist_id)

{'href': 'https://api.spotify.com/v1/playlists/37i9dQZF1DX5IDTimEWoTd/tracks?offset=0&limit=100',
 'items': [{'added_at': '2020-04-22T15:52:26Z',
   'added_by': {'external_urls': {'spotify': 'https://open.spotify.com/user/'},
    'href': 'https://api.spotify.com/v1/users/',
    'id': '',
    'type': 'user',
    'uri': 'spotify:user:'},
   'is_local': False,
   'primary_color': None,
   'track': {'album': {'album_type': 'single',
     'artists': [{'external_urls': {'spotify': 'https://open.spotify.com/artist/4IWBUUAFIplrNtaOHcJPRM'},
       'href': 'https://api.spotify.com/v1/artists/4IWBUUAFIplrNtaOHcJPRM',
       'id': '4IWBUUAFIplrNtaOHcJPRM',
       'name': 'James Arthur',
       'type': 'artist',
       'uri': 'spotify:artist:4IWBUUAFIplrNtaOHcJPRM'}],
     'available_markets': ['AD',
      'AE',
      'AR',
      'AT',
      'AU',
      'BE',
      'BG',
      'BH',
      'BO',
      'BR',
      'CA',
      'CH',
      'CL',
      'CO',
      'CR',
      'CY',
      'CZ',
      'D

Define the analysis function.

In [17]:
def analyze_playlist(creator, playlist_id):
    
    # Create empty dataframe
    playlist_features_list = ["artist", "album", "track_name", "track_id", 
                             "danceability", "energy", "key", "loudness", "mode", "speechiness",
                             "instrumentalness", "liveness", "valence", "tempo", "duration_ms", "time_signature"]
    playlist_df = pd.DataFrame(columns = playlist_features_list)
    
    # Create empty dict
    playlist_features = {}
    
    # Loop through every track in the playlist, extract features and append the features to the playlist df
    playlist = sp.user_playlist_tracks(creator, playlist_id)["items"]
    for track in playlist:
        # Get metadata
        playlist_features["artist"] = track["track"]["album"]["artists"][0]["name"]
        playlist_features["album"] = track["track"]["album"]["name"]
        playlist_features["track_name"] = track["track"]["name"]
        playlist_features["track_id"] = track["track"]["id"]
        # Get audio features
        audio_features = sp.audio_features(playlist_features["track_id"])[0]
        for feature in playlist_features_list[4:]:
            playlist_features[feature] = audio_features[feature]
        
        # Concat the dfs
        track_df = pd.DataFrame(playlist_features, index = [0])
        playlist_df = pd.concat([playlist_df, track_df], ignore_index = True)
        
    return playlist_df

In [18]:
playlist_df = analyze_playlist(playlist_creator, playlist_id)

In [24]:
playlist_df.head()

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,James Arthur,Falling Like The Stars,Falling Like The Stars,15xWRPHQMeqgdkGzInx3PY,0.296,0.351,6,-9.538,1,0.0351,2e-06,0.11,0.19,177.596,212720,3
1,Lauv,Spotify Singles,I Like Me Better - Recorded at Spotify Studios...,2uQNcZYa0SEbfW4cj0d9OS,0.454,0.151,9,-13.444,1,0.06,0.0,0.0792,0.362,90.187,202685,4
2,Taylor Swift,Lover (Remix) [feat. Shawn Mendes],Lover (Remix) [feat. Shawn Mendes],3i9UVldZOE0aD0JnyfAZZ0,0.448,0.603,7,-7.176,1,0.064,0.0,0.0862,0.422,205.272,221307,3
3,Ed Sheeran,Best Part of Me (feat. YEBBA),Best Part of Me (feat. YEBBA),5ug4vqGZ3eisGhY1IsziNX,0.565,0.242,2,-8.367,1,0.0318,9e-06,0.285,0.237,81.657,243267,4
4,Niall Horan,Put A Little Love On Me,Put A Little Love On Me,1hCTxutVVYvggAQcUUUZAT,0.584,0.465,10,-4.88,1,0.0262,0.0,0.106,0.299,95.969,224586,4


In [None]:
playlist_df.to_csv("filename.csv", index = False)

### 3.2 Analyze Multiple Playlists

In [25]:
playlist_dict = {
    "warm_fuzzy_feeling" : ("spotify", "37i9dQZF1DX5IDTimEWoTd"), 
    "love_songs_heart" : ("indiemono", "5KbTzqKBqxQRD8OBtJTZrS"),
    "romance_songs" : ("Susan Doles", "7sAUK3XK8NHH1s5vGcTBkF")
}

In [26]:
def analyze_playlist_dict(playlist_dict):
    
    # Loop through every playlist in the dict and analyze it
    for i, (key, val) in enumerate(playlist_dict.items()):
        playlist_df = analyze_playlist(*val)
        # Add a playlist column so that we can see which playlist a track belongs too
        playlist_df["playlist"] = key
        # Create or concat df
        if i == 0:
            playlist_dict_df = playlist_df
        else:
            playlist_dict_df = pd.concat([playlist_dict_df, playlist_df], ignore_index = True)
            
    return playlist_dict_df

In [23]:
multiple_playlist_df = analyze_playlist_dict(playlist_dict)

In [27]:
multiple_playlist_df.head()

Unnamed: 0,artist,album,track_name,track_id,danceability,energy,key,loudness,mode,speechiness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,playlist
0,James Arthur,Falling Like The Stars,Falling Like The Stars,15xWRPHQMeqgdkGzInx3PY,0.296,0.351,6,-9.538,1,0.0351,1.5e-06,0.11,0.19,177.596,212720,3,warm_fuzzy_feeling
1,Lauv,Spotify Singles,I Like Me Better - Recorded at Spotify Studios...,2uQNcZYa0SEbfW4cj0d9OS,0.454,0.151,9,-13.444,1,0.06,0.0,0.0792,0.362,90.187,202685,4,warm_fuzzy_feeling
2,Taylor Swift,Lover (Remix) [feat. Shawn Mendes],Lover (Remix) [feat. Shawn Mendes],3i9UVldZOE0aD0JnyfAZZ0,0.448,0.603,7,-7.176,1,0.064,0.0,0.0862,0.422,205.272,221307,3,warm_fuzzy_feeling
3,Ed Sheeran,Best Part of Me (feat. YEBBA),Best Part of Me (feat. YEBBA),5ug4vqGZ3eisGhY1IsziNX,0.565,0.242,2,-8.367,1,0.0318,9.3e-06,0.285,0.237,81.657,243267,4,warm_fuzzy_feeling
4,Niall Horan,Put A Little Love On Me,Put A Little Love On Me,1hCTxutVVYvggAQcUUUZAT,0.584,0.465,10,-4.88,1,0.0262,0.0,0.106,0.299,95.969,224586,4,warm_fuzzy_feeling


In [28]:
multiple_playlist_df["playlist"].value_counts()

love_songs_heart      100
romance_songs          77
warm_fuzzy_feeling     70
Name: playlist, dtype: int64

In [None]:
playlist_df.to_csv("filename.csv", index = False)