In [2]:
import pandas as pd
from urllib.parse import urlencode
import requests; import webbrowser; import base64
import json; import yaml

In [3]:
with open('../../config/secret.yml') as f:
    private = yaml.safe_load(f)

CLIENT_ID = private['Spotify']['CLIENT_ID']
CLIENT_SECRET = private['Spotify']['CLIENT_SECRET']

Below is the code used to "unfold" the results of previous API scrape for Playlists that did not use the method shown in this notebook.
The track_ids are used as inputs for the subsequent API calls.

In [5]:
with open('../raw/playlist_tracks.json') as f:
    playlist_tracks_json = json.load(f)
    
playlist_tracks_data = []
for playlist in playlist_tracks_json:
    playlist_id = playlist['playlist_id']
    for track in playlist['items']:
        track_id = track['track']['id']
        playlist_tracks_data.append([playlist_id, track_id, track['added_at'], track['track']['popularity']])

playlist_tracks = pd.DataFrame(playlist_tracks_data, columns=['playlist_id', 'track_id', 'added_at', 'popularity'])

The code in the URL of the browser opened by the snippet below is needed to generate an access token

In [6]:
auth_header = urlencode({
    'client_id': CLIENT_ID,
    'response_type': 'code',
    'redirect_uri': 'http://localhost:3000'
})
webbrowser.open('https://accounts.spotify.com/authorize?' + auth_header)

True

In [19]:
# Pasted from browser URL
CODE = '''
http://localhost:3000/?code=AQBb_G2cpAF4JjW0Am0qXZcnrMadQFJ4MMgWjurZqwkTgQcTEtBEW-xqPP_-tOsWyPP2NXb1GsXKezEWGVhLQCasvQWJzqvaWkT1SNTL0J_TPIXPJ3g64hnj0mUlDYFn0I0KN7o7YoKItGtzdr1fEe4uhOtVz986Fw
'''[29:-1]

In [21]:
encoded_credentials = base64.b64encode(CLIENT_ID.encode() + b':' + CLIENT_SECRET.encode()).decode("utf-8")
token_headers = {
    "Authorization": "Basic " + encoded_credentials,
    "Content-Type": "application/x-www-form-urlencoded"
}
token_data = {
    "grant_type": "authorization_code",
    "code": CODE,
    "redirect_uri": "http://localhost:3000"
}

r = requests.post("https://accounts.spotify.com/api/token", data=token_data, headers=token_headers)
ACCESS_TOKEN = r.json()['access_token']

In [22]:
class Accumulator:
    def __init__(self):
        self.reset()

    def reset(self):
        self.successes = []
        self.failures = []
        self.output = []

    def bundled_requests(self, access_token, base_url, api_args, transformer=lambda x: x,  max_retries=3):
        headers = {
            "Authorization": "Bearer " + access_token,
            "Content-Type": "application/json"
        }
        for i, arg in enumerate(api_args):
            if arg not in self.successes:
                for attempt in range(max_retries):
                    try:
                        print(f'[{i}] Accessing endpoint with arg {arg}... (Attempt {attempt+1})', end='\r')
                        response = requests.get(base_url+arg, headers=headers).json()
                        self.output.append(transformer(response))
                        self.successes.append(arg)
                        break
                    except Exception as e:
                        if attempt == max_retries-1:
                            self.failures.append([i, arg, str(e)])


In [25]:
def transform_track_json(response):
    rid = response['id']
    name = response['name']
    artists = {i['id']: i['name'] for i in response['artists']}
    pop = response['popularity']
    rdate = response['album']['release_date']
    return [rid, name, artists, pop, rdate]

def transform_track_audio_json(response):
    track_audio_features = ['id', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness',
    'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']
    return [response[i] for i in track_audio_features]

def transform_artist_json(response):
    aid = response['id']
    name = response['name']
    followers = response['followers']['total']
    genres = response['genres']
    pop = response['popularity']
    return [aid, name, genres, followers, pop]

Sample use of the Accumulator object in the next two cells to pull Artist information. The "track_master.csv" was built by calling the Tracks API and supplying the transform_track_json function to the Accumulator.bundled_requests method.

In [None]:
tm = pd.read_csv('track_master.csv')
artists = {}
for row in tm.artists:
    for i, a in eval(row).items():
        artists[i] = a

accum = Accumulator()

In [26]:

accum.bundled_requests(
    access_token=ACCESS_TOKEN,
    base_url='https://api.spotify.com/v1/artists/',
    api_args=list(artists.keys()),
    transformer=transform_artist_json,
    max_retries=5
)

[2079] Accessing endpoint with arg 0Yy9u86cq66Se2pB9fYaiW... (Attempt 1)