# Fetch Playlists from Spotify API

In [1]:
import json
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from tqdm import tqdm
from multiprocessing.pool import ThreadPool

## Load Data

In [2]:
# Load data from spotify links
with open('../data/spotify_links.json') as f:
    data = json.load(f)
    links = list(data.items())

# Links consists of tuples of (genre, link)
print('Example:', links[0])

Example: ('italian violin', 'spotify:playlist:6Vf6P26sYyUqljb9e8Jy5b')


In [3]:
SPOTIPY_CLIENT_ID = "f96a1f0e67bf41afa9c09b2da29d8c7c"
SPOTIPY_CLIENT_SECRET = "3c726b5c74c643de83771882afc67d2f"

spotify = spotipy.Spotify(
    client_credentials_manager=SpotifyClientCredentials(SPOTIPY_CLIENT_ID,SPOTIPY_CLIENT_SECRET),
    requests_timeout=5)

In [4]:
# Fetch playlists by batch

def getPlaylist(links, playlists, failures, threadName):
    with tqdm(total=len(links), position=1, desc=threadName) as pbar:
        for i in range(len(links)):
            try:
                playlist = spotify.playlist(links[i][1])
                playlists.append(playlist)
            except:
                failures.append(links[i][0])
            if(i > 0 and (i+1) % 200 == 0):
                pbar.update(500)
            elif(i == len(links) - 1):
                pbar.update(len(links) % 200)

In [5]:
#Divide links into 6 chunks
chunk_indices = []

for i in range(0, len(links) - len(links) % 1000, 1000):
    chunk_indices.append((i, i + 1000))

chunk_indices.append((len(links) - len(links) % 1000, len(links)))

In [9]:
#Create a list of the playlists
playlists = []
failures = []

#Begin thread pool
pool = ThreadPool(len(chunk_indices))

#Thread creations, each thread runs on a chunk of links
for t in range(len(chunk_indices)):
    pool.apply_async(getPlaylist, args=(links[chunk_indices[t][0]: chunk_indices[t][1]], playlists, failures, "Thread {}".format(t+1)))

#Start the threads in pool
pool.close()
pool.join()

#Takes about 8 minutes


Thread 1:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 2:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 5:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 3:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 6:   0%|          | 0/71 [00:00<?, ?it/s][A
Thread 4:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 6: 100%|██████████| 71/71 [00:35<00:00,  2.02it/s]

Thread 1:  50%|█████     | 500/1000 [01:36<01:36,  5.19it/s][A
Thread 2:  50%|█████     | 500/1000 [01:37<01:37,  5.14it/s][A
Thread 4:  50%|█████     | 500/1000 [01:37<01:37,  5.13it/s][A
Thread 3:  50%|█████     | 500/1000 [01:37<01:37,  5.12it/s][A
Thread 5:  50%|█████     | 500/1000 [01:43<01:43,  4.85it/s][A
Thread 2: 100%|██████████| 1000/1000 [03:15<00:00,  5.13it/s][A
Thread 1: 100%|██████████| 1000/1000 [03:16<00:00,  5.14it/s][A
Thread 3: 100%|██████████| 1000/1000 [03:16<00:00,  5.11it/s][A
Thread 4: 100%|██████████| 1000/1000 [03:16<00:00,  5.10it/s][A
Thread 5: 100%|██████████| 1000/1000 [

In [10]:
## Format and save playlists to json
newPlaylists = []
failedPlaylists = []

for i, pl in enumerate(playlists):
    try:
        newData = dict()
        newData['name'] = pl['name']
        newData['genre'] = links[i][0]
        newData['uri'] = links[i][1]
        newData['num_tracks'] = len(pl['tracks']['items'])
        newData['tracks'] =  []
        for item in pl['tracks']['items']:
            newTrack = dict()
            newTrack['artists'] = item['track']['artists']
            newTrack['name'] = item['track']['name']
            newTrack['uri'] = item['track']['uri']
            newData['tracks'].append(newTrack)
        newPlaylists.append(newData)      
        if (i + 1) % 200 == 0:
            begin = (i + 1) - 200
            with open('../data/genre_playlists/playlists.{}-{}.json'.format(begin, i), 'w') as outfile:
                json.dump(newPlaylists, outfile, indent=4)
            newPlaylists = []
        elif i == len(playlists) - 1:
            begin = i - (i + 1) % 200
            with open('../data/genre_playlists/playlists.{}-{}.json'.format(begin, i), 'w') as outfile:
                json.dump(newPlaylists, outfile, indent=4)
            newPlaylists = []
    except:
        failedPlaylists.append(i)

In [11]:
print('Fetch failures:', failures)
print('Playlists that failed to format:', failedPlaylists)
print('Total playlists failed to format:', len(failedPlaylists))

Fetch failures: []
Playlists that failed to format: [49, 332, 386, 1148, 1210, 1266, 1477, 1638, 1695, 1704, 1912, 2072, 2158, 2330, 2461, 2597, 3081, 3177, 3338, 3433, 3558, 3603, 3668, 3858, 4111, 4118, 4223, 4252, 4311, 4608, 4618, 4708, 4755, 5012, 5028, 5033]
Total playlists failed to format: 36
