# Fetch Playlists from Spotify API

In [25]:
import json
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from tqdm import tqdm
from multiprocessing.pool import ThreadPool

In [26]:
# Load data from spotify links
with open('../data/spotify_links.json') as f:
    data = json.load(f)
    links = list(data.items())

# Links consists of tuples of (genre, link)
print('Example:', links[0])

Example: ('italian violin', 'spotify:playlist:6Vf6P26sYyUqljb9e8Jy5b')


## Connect to Spotify API using Spotipy

In [27]:
SPOTIPY_CLIENT_ID = "f96a1f0e67bf41afa9c09b2da29d8c7c"
SPOTIPY_CLIENT_SECRET = "3c726b5c74c643de83771882afc67d2f"

spotify = spotipy.Spotify(
    client_credentials_manager=SpotifyClientCredentials(SPOTIPY_CLIENT_ID,SPOTIPY_CLIENT_SECRET),
    requests_timeout=5)

In [28]:
# Fetch playlists by batch

def getPlaylist(links, playlists, failures, threadName):
    with tqdm(total=len(links), position=1, desc=threadName) as pbar:
        for i in range(len(links)):
            try:
                playlist = spotify.playlist(links[i][1])
                playlists.append(playlist)
            except:
                failures.append(links[i][0])
            if(i > 0 and (i+1) % 50 == 0):
                pbar.update(50)
            elif(i == len(links) - 1):
                pbar.update(len(links) % 50)

In [29]:
#Divide links into 6 chunks
chunk_indices = []

for i in range(0, len(links) - len(links) % 1000, 1000):
    chunk_indices.append((i, i + 1000))

chunk_indices.append((len(links) - len(links) % 1000, len(links)))

In [30]:
#Create a list of the playlists
playlists = []
failures = []

#Begin thread pool
pool = ThreadPool(len(chunk_indices))

#Thread creations, each thread runs on a chunk of links
for t in range(len(chunk_indices)):
    pool.apply_async(getPlaylist, args=(links[chunk_indices[t][0]: chunk_indices[t][1]], playlists, failures, "Thread {}".format(t+1)))

#Start the threads in pool
pool.close()
pool.join()


Thread 1:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 2:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 6:   0%|          | 0/71 [00:00<?, ?it/s][A
Thread 4:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 5:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 3:   0%|          | 0/1000 [00:00<?, ?it/s][A
Thread 4:   5%|▌         | 50/1000 [00:16<05:21,  2.96it/s][A
Thread 3:   5%|▌         | 50/1000 [00:16<05:21,  2.96it/s][A
Thread 6:  70%|███████   | 50/71 [00:17<00:07,  2.89it/s][A
Thread 1:   5%|▌         | 50/1000 [00:18<05:42,  2.78it/s][A
Thread 5:   5%|▌         | 50/1000 [00:24<07:44,  2.04it/s][A
Thread 2:   5%|▌         | 50/1000 [00:25<08:02,  1.97it/s][A
Thread 6: 100%|██████████| 71/71 [00:27<00:00,  2.54it/s]

Thread 3:  10%|█         | 100/1000 [00:46<06:13,  2.41it/s][A
Thread 5:  10%|█         | 100/1000 [00:46<07:09,  2.10it/s][A
Thread 4:  10%|█         | 100/1000 [00:46<06:15,  2.40it/s][A
Thread 1:  10%|█         | 100/1000 [00:47<06:24, 

In [31]:
## Format and save playlists to json
newPlaylists = []
failedPlaylists = []

for i, pl in enumerate(playlists):
    try:
        newData = dict()
        newData['name'] = pl['name']
        newData['genre'] = links[i][0]
        newData['uri'] = links[i][1]
        newData['num_tracks'] = len(pl['tracks']['items'])
        newData['tracks'] =  []
        for item in pl['tracks']['items']:
            newTrack = dict()
            newTrack['artists'] = item['track']['artists']
            newTrack['name'] = item['track']['name']
            newTrack['uri'] = item['track']['uri']
            newData['tracks'].append(newTrack)
        newPlaylists.append(newData)      
        if (i + 1) % 200 == 0:
            begin = (i + 1) - 200
            with open('../data/genre_playlists/playlists.{}-{}.json'.format(begin, i), 'w') as outfile:
                json.dump(newPlaylists, outfile, indent=4)
            newPlaylists = []
        elif i == len(playlists) - 1:
            begin = i - (i + 1) % 200
            with open('../data/genre_playlists/playlists.{}-{}.json'.format(begin, i), 'w') as outfile:
                json.dump(newPlaylists, outfile, indent=4)
            newPlaylists = []
    except:
        failedPlaylists.append(i)

In [33]:
print('Fetch failures:', failures)
print('Playlists that failed to format:', failedPlaylists)
print('Total playlists failed to format:', len(failedPlaylists))

[]
[50, 320, 380, 1118, 1242, 1295, 1479, 1647, 1706, 1707, 1920, 2063, 2165, 2335, 2448, 2625, 3088, 3188, 3382, 3414, 3511, 3558, 3643, 3836, 4102, 4115, 4198, 4277, 4332, 4581, 4604, 4807, 4852, 4976, 4996, 5015]
36
