In [2]:
# Imports
import spotipy
from spotipy import util
from spotipy import oauth2
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
from os import path

In [8]:
# Shared Variables and Functions
# User token if necessary
scope = 'user-follow-read playlist-modify-private playlist-modify-public user-follow-modify'
user_id = '1241528689'
token = util.prompt_for_user_token(
    user_id,
    scope=scope,
    client_id='9b41900f606c4e55855524f448917d64',
    client_secret='3277c16b708548369ce1f42deed974ea',
    redirect_uri='http://localhost/')

# Authorized spotipy object
sp = spotipy.Spotify(auth=token)

# Get followed Artists
def get_followed_artists():
    lim = 20
    response = sp.current_user_followed_artists(limit=lim, after=None)
    next_batch = response['artists']['items'][-1]['id']
    more_artists = True

    while more_artists:

        batch = sp.current_user_followed_artists(limit=lim, after=next_batch)
        response['artists']['items'].extend(batch['artists']['items'])
        next_batch = batch['artists']['items'][-1]['id']

        if len(batch['artists']['items']) < lim:
            more_artists = False

    artists = pd.DataFrame(response['artists']['items'])
    return artists

def get_artist_albums(date, artist_ids):
    # Get relevant albums - Modern (singles and albums)
    if path.exists('artist_temp.csv')
        scraped = pd.read_csv('album_temp.csv')
        artist_ids = [aid if aid in scraped]
    chunks = np.array_split(artist_ids, 100)
    lim = 50
    albums = pd.DataFrame(columns = ['album_group', 'album_type', 'artists', 'available_markets',
               'external_urls', 'href', 'id', 'images', 'name', 'release_date',
               'release_date_precision', 'total_tracks', 'type', 'uri'])
    
    for chunk in tqdm(chunks):

        for artist_id in tqdm(chunk, leave=False):
            response = sp.artist_albums(artist_id, limit=lim, album_type='album')
            offset = 0
            more_albums = True

            while more_albums:

                batch = sp.artist_albums(artist_id, limit=lim, offset=offset, album_type='album')
                response['items'].extend(batch['items'])
                offset += lim

                if len(batch['items']) < lim:
                        more_albums = False

            response_df = pd.DataFrame(response['items'])
            response_df = response_df
            albums = pd.concat([albums, response_df], axis=0)
        albums

    os.remove('album_temp.csv')
    return albums[albums.release_date >= date]

# Get Modern Tracks
def get_album_tracks(album_ids):
    lim = 50
    tracks = pd.DataFrame(columns = ['artists', 'available_markets', 'disc_number', 'duration_ms',
           'explicit', 'external_urls', 'href', 'id', 'is_local', 'name',
           'preview_url', 'track_number', 'type', 'uri'])

    for album_id in tqdm(album_ids):
        response = sp.album_tracks(album_id, limit=lim)
        offset = 0
        more_tracks = True
        if len(response['items']) < lim:
                more_tracks = False

        while more_tracks:

            batch = sp.album_tracks(album_id, limit=lim, offset=offset)
            response['items'].extend(batch['items'])
            offset += lim

            if len(batch['items']) < lim:
                more_tracks = False

        response_df = pd.DataFrame(response['items'])
        tracks = pd.concat([tracks, response_df], axis=0)

    return(tracks)

def add_tracks_to_playlist(user_id, playlist_id, track_ids, replace=True):
    lim = 50
    split_tracks = np.array_split(track_ids, np.ceil(len(track_ids)/lim))
    
    if replace:
        sp.user_playlist_replace_tracks(user_id, playlist_id, split_tracks[0])
        for track_list in tqdm(split_tracks[1:]):
            sp.user_playlist_add_tracks(user_id, playlist_id, track_list)
    else:
        for track_list in tqdm(split_tracks):
            sp.user_playlist_add_tracks(user_id, playlist_id, track_list)
    
        
# Get Playlist Tracks
def get_playlist_tracks(playlist_id, user_id):
    lim = 50
    more_tracks = True
    offset=0

    playlist_results = sp.user_playlist_tracks(user_id, playlist_id, limit=lim, offset=offset)
    if len(playlist_results['items']) < lim:
            more_tracks = False

    while more_tracks:

        offset += lim
        batch = sp.user_playlist_tracks(user_id, playlist_id, limit=lim, offset=offset)
        playlist_results['items'].extend(batch['items'])

        if len(batch['items']) < lim:
            more_tracks = False

    response_df = pd.DataFrame(playlist_results['items'])
    return response_df

# Get all Tracks in the playlist
def get_track_artists(track_df):
    
    artist_ids = []
    for track in track_df:
        try:
            artists = dict(track)['artists']
        except:
            continue

        for artist in artists:
            if artist['id'] not in artist_ids:
                artist_ids.append(artist['id'])
    return(artist_ids)

def filter_classical(artist_ids):
    
    output_list = []
    for artist in tqdm(artist_ids):
        artist_info = sp.artist(artist)
        
        if 'classical' not in artist_info['genres']:
            output_list.append(artist)
            
    return output_list

def generateStorm(input_playlist, output_playlist, user_id, date, classical=False, stash=False):
    
    print("Obtaining a list of Tracks from Playlist . . .")
    playlist_df = get_playlist_tracks(input_playlist, user_id)
    
    print("Finding Artists . . .")
    artist_ids = get_track_artists(playlist_df['track'])
                  
    if not classical:
        print("Removing Classical Artists.")
        artist_ids = filter_classical(artist_ids)
    
    print("Finding new album releases for artists . . .")
    album_df = get_artist_albums(date, artist_ids).reset_index()
    new_tracks = get_album_tracks(album_df['id']).reset_index()
    
    print("Writing back to playlist.")
    if len(new_tracks) < 1:
        print("Playlist Empty, no new releases. Try again later.")
    else:
        add_tracks_to_playlist(user_id, output_playlist, new_tracks['id'].unique())
        
    if stash:
        print("Stashing batch to input playlist as well.")
        add_tracks_to_playlist(user_id, input_playlist, new_tracks['id'].unique())
        
def generateTheStorm(user_id, date):
    
    # Ouput
    output = '7fnvajjUoWBQDo8iFNMH3s'
    
    # Inputs
    inputs = {'Much Needed':'7N3pwZE1N38wcdiuLxiPvq',
              'Room on the Boat':'1SZS16UcW0XOzgh6UWXA9S',
              'Refuge':'3K9no6AflSDYiiMzignAm7',
              'Safety':'0R1gw1JbcOFD0r8IzrbtYP',
              'Shelter from the Storm':'2yueH0i9C2daBRawYIc9P8',
              'Soundtracked':'37i9dQZF1DWW7gj0FcGEx6',
              'Refuge':'3K9no6AflSDYiiMzignAm7',
              'Soundtrack for Study':'0hZNf3tcMT4x03FyjKYJ3M',
              'Film Music - Movie Scores':'5GhatXsZVNYxrhqEAfZPLR',
              'Video Game Music':'37i9dQZF1DXdfOcg1fm0VG',
              'Video Game Soundtracks':'3Iwd2RiXCzmm1AMUpRAaHO',
              'Video Game Music Unofficial':'3aI7ztMmDhMHhYe1KOPFLG',
              'Fantasy - Music Writing':'0zjGLygoBmpafEsGtk5n1a',
              'OST Game Playlist':'3Iwd2RiXCzmm1AMUpRAaHO'}
              
    # Artists
    artist_ids = []
    print("Obtaining a track list.")
    if path.exists('artistids_'+date+'.csv'):
        print("Artists already scraped, reading in . . .")
        artist_ids = pd.read_csv('artistids_'+date+'.csv')['artists'].values
    else:
        print("Artists not scraped, scraping now . . .")
        for pl in inputs.keys():
            print("Obtaining a list of Tracks from Playlist . . ." + pl)
            playlist_df = get_playlist_tracks(inputs[pl], user_id)

            print("Finding Artists . . .")
            artist_ids.extend(get_track_artists(playlist_df['track']))

        stashed = pd.read_csv('Storm Artists.csv')
        artist_ids.extend(stashed.artists.values.tolist())
        
        print("Removing Classical Artists.")
        artist_ids = filter_classical(artist_ids)
        
        print("Stashing artists")
        pd.DataFrame(artist_ids, columns=['artists']).to_csv('artistids_'+date+'.csv', index=False)
        pd.DataFrame(artist_ids, columns=['artists']).to_csv('Storm Artists.csv', index=False)

    # Album Releases
    album_df = pd.DataFrame()
    print("Finding new album releases for artists . . .")
    if path.exists('albumdf_'+date+'.csv'):
        print("Albums already scraped, reading in . . .")
        album_df = pd.read_csv('albumdf_'+date+'.csv')
    else:
        print("Albums not scraped, scraping now . . .")
        album_df = get_artist_albums(date, artist_ids).reset_index()
        album_df.to_csv('albumdf_'+date+'.csv', index=False)
    
    # Tracks
    new_tracks = pd.DataFrame()
    print("Obtaining a track list.")
    if path.exists('newtracks_'+date+'.csv'):
        print("Tracks already scraped, reading in . . .")
        new_tracks = pd.read_csv('newtracks_'+date+'.csv')
    else:
        print("Tracks not scraped, scraping now . . .")
        new_tracks = get_album_tracks(album_df['id']).reset_index()
        new_tracks.to_csv('newtracks_'+date+'.csv', index=False)
        
    # Blacklists
    print("Removing Blacklisted Artists.")
    blacklist = pd.read_csv('blacklist.csv').reset_index()
    max_artist = 0
    
    # Remove tracks with bad artists
    new_tracks['bl'] = 0
    for index in tqdm(new_tracks.index):
        for bla in blacklist.artist:
            if str(bla) in str(new_tracks.loc[index, 'artists']):
                new_tracks.loc[index, 'bl'] = 1
    new_tracks = new_tracks[new_tracks.bl == 0]
    new_tracks.reset_index(inplace=True)
    
    # Stash artists
    print("Writing back to playlist.")
    if len(new_tracks) < 1:
        print("Playlist Empty, no new releases. Try again later.")
    else:
        add_tracks_to_playlist(user_id, output, new_tracks['id'].unique())

In [4]:
# Params - date from which to look
# Last did this on 2020-04-15
date = '2020-03-11'

# Input Playlists
archive = '0f6FAq46WiTMvhixj6wwoo'
good_stuff = '7ijVxjm7HU4x9r242Y8U3V'
tiaptp_archive = '3Rr0eHGWeCO2uOuVld6RpD'
much_needed = '7N3pwZE1N38wcdiuLxiPvq'
classica_buena = '5uEns0vv1dU4rVsrTNV6j5'
instrumental = '1Q8WS7Xj51WCHZctXGDsrp'

# Output Playlists
playlist_modern_id = '75lIJUDbzWFyIG6FIaJeJe'
playlist_classical_id = '4NUw6Lt9XLI4dAVjwaLhVx'
output_tiaptp_archive = '5wRSpTyhBd3xovasGyZH6l'
output_much_needed = '350FAK7sVjaaWx47AAmMiD'
output_instrumental = '7jAmYtUazkJGFXBCjTBBMh'

In [9]:
# Generate the Storm
generateTheStorm(user_id, date)

Obtaining a track list.
Artists already scraped, reading in . . .
Finding new album releases for artists . . .
Albums not scraped, scraping now . . .


HBox(children=(IntProgress(value=0, max=10259), HTML(value='')))

SpotifyException: http status: 401, code:-1 - https://api.spotify.com/v1/artists/3XPC0v9mtsGYMsdfkxkyFL/albums?album_type=album&limit=50&offset=0:
 The access token expired

In [11]:
# Generate Classical and Lyrical Storms
generateStorm(archive, playlist_modern_id, user_id, date)
generateStorm(classica_buena, playlist_classical_id, user_id, date, classical=True)

Obtaining a list of Tracks from Playlist . . .
Finding Artists . . .
Removing Classical Artists.


HBox(children=(IntProgress(value=0, max=867), HTML(value='')))

Finding new album releases for artists . . .


HBox(children=(IntProgress(value=0, max=867), HTML(value='')))

HBox(children=(IntProgress(value=0, max=18), HTML(value='')))

Writing back to playlist.


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))

Obtaining a list of Tracks from Playlist . . .
Finding Artists . . .
Finding new album releases for artists . . .


HBox(children=(IntProgress(value=0, max=388), HTML(value='')))

HBox(children=(IntProgress(value=0, max=108), HTML(value='')))

Writing back to playlist.


HBox(children=(IntProgress(value=0, max=20), HTML(value='')))