# Spotify Functions

This notebook uses the Spotify API and the Python library spotipy to generate a dataset for an individual playlist. To access the Spotify API you will need client credentials. More info on how to get access to the API can be found <a href="https://developer.spotify.com/documentation/web-api/quick-start/"> here</a> in the API documentation. 

For more info on how to use spotipy, check out the documentation <a href="https://developer.spotify.com/documentation/web-api/quick-start/"> here</a> or take a look at these examples in <a href="https://github.com/plamere/spotipy/tree/master/examples" > this</a> GitHub repository. 

This script will allow you to generate all audio features by inserting the spotify playlist URI (which you can find in the Spotify app) and save the resulting file to a .csv. The audio features are metrics generated by the Spotify API and a full explanation of what the mean can be found in the documentation <a href="https://developer.spotify.com/documentation/web-api/reference/tracks/get-audio-features/"> here</a>. The capabilities of this script may be limited by restrictions imposed by the API and by Spotify. 

In [3]:
# Import relevant libraries 
import numpy as np
import pandas as pd 

import requests 
import json 
from pprint import pprint

import datetime
import os

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [4]:
# Set environmental variables
os.environ['SPOTIPY_CLIENT_ID'] = '466ace8af16e4eb4baed9ee8c0069872'
os.environ['SPOTIPY_CLIENT_SECRET'] = '0fba4f15c4c846358e4061ce76905d9f'

In [5]:
# Set credentials 
sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials())

In [6]:
# Get track id                                      
def get_top_50_id(playlist_id):
    ids = []
    offset = 0
    while True:
        response = sp.playlist_tracks(playlist_id,
                                      offset=offset,
                                      fields='items.track.id,total')
        for item in response['items']: 
            if item != None: 
                ids.append(item)
                continue
        
        offset = offset + len(response['items'])

        if len(response['items']) == 0:
            break
            
    ids = [item['track'] for item in ids]
    ids = [item['id'] for item in ids]
    
    return pd.Series(ids)

In [7]:
# Get track names
def get_top_50_name(playlist_id):
    names = []
    offset = 0
    while True:
        response = sp.playlist_tracks(playlist_id,
                                      offset=offset,
                                      fields='items.track.name,total')
        for item in response['items']: 
            if item != None: 
                names.append(item)
                continue
            
        offset = offset + len(response['items'])

        if len(response['items']) == 0:
            break
    
    names = [item['track'] for item in names]
    names = [item['name'] for item in names]
    
    return pd.Series(names)

In [8]:
# Get artist name 
def get_artist(playlist_id): 
    list1 = []
    list2 = []
    list3 = []

    playlist = sp.playlist_tracks(playlist_id, fields='items.track.artists')
    playlist = playlist['items']

    for item in playlist: 
        list1.append(item['track'])
    
    for item in list1: 
        list2.append(item['artists'])
        
    for item in list2: 
        if (len(item) == 1): 
            list3.append(item[0]['name'])
        else: 
            list3.append([artist['name'] for artist in item])
            
    return pd.Series(list3) 

In [9]:
# Get track length 
def get_track_length(track_ids): 
    lengths = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
        
        if features != None:
            lengths.append(features['duration_ms'])
            continue
    
    lengths = [(value/1000) for value in lengths]
    
    return pd.Series(lengths)

In [10]:
# Get 'danceability' 
def get_danceability(track_ids): 
    dance_ratings = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
                
        if features != None:
            dance_ratings.append(features['danceability'])
            continue 
    
    return pd.Series(dance_ratings)

In [11]:
# Get 'energy'
def get_energy(track_ids): 
    energies = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
                
        if features != None:
            energies.append(features['energy'])
            continue 
    
    return pd.Series(energies)

In [12]:
# Get key 
def get_key(track_ids): 
    keys = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
        
        if features != None:
            keys.append(features['key'])
            continue 
    
    return pd.Series(keys)

In [13]:
# Get loudness
def get_loudness(track_ids): 
    how_loud = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
                
        if features != None:
            how_loud.append(features['loudness'])
            continue 
    
    return pd.Series(how_loud)

In [14]:
# Get mode 
def get_mode(track_ids): 
    modes = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
                
        if features != None:
            modes.append(features['mode'])
            continue 
    
    return pd.Series(modes)

In [15]:
# Get 'speechiness'
def get_speechiness(track_ids): 
    speechy_ratings = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
                
        if features != None:
            speechy_ratings.append(features['speechiness'])
            continue 
    
    return pd.Series(speechy_ratings)

In [16]:
# Get 'acousticness'
def get_acousticness(track_ids): 
    acoustic_ratings = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
                
        if features != None:
            acoustic_ratings.append(features['acousticness'])
            continue 
    
    return pd.Series(acoustic_ratings)

In [17]:
# Get 'instrumentalness'
def get_instrumentalness(track_ids): 
    instrumental_ratings = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
        
        if features != None:
            instrumental_ratings.append(features['instrumentalness'])
            continue 
    
    return pd.Series(instrumental_ratings)

In [18]:
# Get liveness
def get_liveness(track_ids): 
    live_ratings = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
                
        if features != None:
            live_ratings.append(features['liveness'])
            continue 
    
    return pd.Series(live_ratings)

In [19]:
# Get moods/valence
def get_valence(track_ids): 
    moods = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
                
        if features != None:
            moods.append(features['valence'])
            continue 
    
    return pd.Series(moods)

In [20]:
# Get tempo 
def get_tempos(track_ids): 
    tempos = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
                
        if features != None:
            tempos.append(features['tempo'])
            continue 
    
    return pd.Series(tempos)

In [21]:
# Get time signature 
def get_time_signature(track_ids): 
    time_sigs = []
    
    for track_id in track_ids: 
    
        features = sp.audio_features(track_id)[0]
        
        if features != None:
            time_sigs.append(features['time_signature'])
            continue 
    
    return pd.Series(time_sigs)

In [22]:
# Get popularity
def get_top_50_popularity(playlist_id):
    rankings = []
    offset = 0
    while True:
        response = sp.playlist_tracks(playlist_id,
                                      offset=offset,
                                      fields='items.track.popularity,total')
        for item in response['items']: 
            if item != None: 
                rankings.append(item)
                continue             
        
        offset = offset + len(response['items'])

        if len(response['items']) == 0:
            break
            
    rankings = [item['track'] for item in rankings]
    rankings = [item['popularity'] for item in rankings]
    
    return pd.Series(rankings)

In [23]:
# Get playlist name 
def get_name(playlist_id, track_ids): 
    names = []
    playlist = sp.playlist(playlist_id)
    name = (playlist['name'])
    for item in range(len(track_ids)): 
        names.append(name)
    return pd.Series(name)

In [24]:
# Add date column         
def get_date(track_ids): 
    dates = []
    date = datetime.date.today()
    for item in range(len(track_ids)): 
        dates.append(date)
    return pd.Series(dates)

In [25]:
# Combine all features in one DataFrame 
def create_dataset(playlist_id):
    dataset = []
    
    playlist = playlist_id
    track_ids = get_top_50_id(playlist)
    track_names = get_top_50_name(playlist)
    artists = get_artist(playlist)
    track_lengths = get_track_length(track_ids)
    
    danceability = get_danceability(track_ids)
    energy = get_energy(track_ids)
    key = get_key(track_ids)
    loudness = get_loudness(track_ids) 
    mode = get_mode(track_ids)
    speechiness = get_speechiness(track_ids)
    acousticness = get_acousticness(track_ids)
    instrumentalness = get_instrumentalness(track_ids)
    liveness = get_liveness(track_ids)
    mood = get_valence(track_ids)
    tempo = get_tempos(track_ids)
    time_signature = get_time_signature(track_ids)
    
    popularity = get_top_50_popularity(playlist)
    name = get_name(playlist, track_ids)
    date = get_date(track_ids)
    
    dataset = pd.concat([track_ids, track_names, artists, track_lengths,
                        danceability, energy, key, loudness, mode, speechiness, acousticness,
                        instrumentalness, liveness, mood, tempo, time_signature,
                        popularity, name, date], axis=1)
    
    dataset.rename(columns={0:'id', 1:'song_title', 2:'artist/s', 3:'duration', 4:'danceability', 5:'energy', 
                            6:'key', 7:'loudness', 8:'mode', 9:'speechiness', 10:'acousticness', 
                            11:'instrumentalness', 12:'liveness', 13:'mood', 14:'tempo', 15:'time_signature', 
                            16:'popularity', 17:'playlist_name', 18:'date'}, inplace=True)
    
    return dataset

In [28]:
# Save new DataFrame to .csv
def save_csv(playlist_df): 
    path = r'D:\_aaPractice'   # Or change this to path where you want to save your file 
    os.chdir(path)
    file_name = '{}.csv'.format(playlist_df.playlist_name[0]) 
    playlist_df.to_csv(file_name, encoding='ISO 8859-1')      # Change the encoding for you .csv

In [29]:
# Execute the functions here to generate a new dataset and save it to .csv
playlist_uri = 'spotify:playlist:37i9dQZF1DWWhBhYl3ZMvY'
new_df = create_dataset(playlist_uri)                      
save_csv(new_df)                     # Change this URI depending on the playlist you want to analyse

retrying after...3secs
retrying after...1secs
retrying after...3secs
