In [1]:
import requests
import json
import pandas as pd
import numpy as np

#### Create a playlist

In [2]:
# create a new playlist to store those recommendations
def create_plst(user_id, token, uris, name, description, public = False):
    endpoint_url = f"https://api.spotify.com/v1/users/{user_id}/playlists"
    request_body = json.dumps({
        "name": name,
        "description": description,
        "public": public })
    # create an empty new playlist
    response = requests.post(url = endpoint_url, data = request_body, headers={"Content-Type":"application/json", 
                        "Authorization":f"Bearer {token}"})
    url = response.json()['external_urls']['spotify']
    if response.status_code == 201:
        print('Playlist {} is successfully created!'.format(name))
        
    # fill the new playlist with the recommendations
    playlist_id = response.json()['id']
    endpoint_url = f"https://api.spotify.com/v1/playlists/{playlist_id}/tracks"
    request_body = json.dumps({ "uris" : uris })
    response = requests.post(url = endpoint_url, data = request_body, headers={"Content-Type":"application/json", 
                        "Authorization":f"Bearer {token}"})
    if response.status_code == 201:
        print('Playlist {} is successfully filled with recommendations!'.format(name))
        print(f'Your playlist is ready at {url}')

### Get a user's recently played songs
- `limit`: The maximum number of items to return. Default: `20`. Minimum: `1`. Maximum: `50`.
- `after`: A Unix timestamp in milliseconds. Returns all items after (but not including) this cursor position. If `after` is specified, `before` must not be specified.
- `before`: A Unix timestamp in milliseconds. Returns all items before (but not including) this cursor position. If `before` is specified, `after` must not be specified.

In [3]:
def get_recent_songs(user_id, token, **kwargs):
    query = "https://api.spotify.com/v1/me/player/recently-played"
    
    if len(kwargs.keys()) > 0:
        query += '?'
        lst = [str(x[0]) + '=' + str(x[1]) for x in zip(kwargs.keys(), kwargs.values())]
        query += '&'.join(lst) ## add all parameters from kwargs 
    #print(query)

    response = requests.get(query, 
                   headers={"Accept": "application/json",
                            "Content-Type":"application/json", 
                            "Authorization":f"Bearer {token}"})
    json_response = response.json()
    print("status:", response.status_code)
    song_ids = [json_response['items'][i]['track']['id'] for i in range(len(json_response['items']))]
    return song_ids

### Get a User's Top Artists and Tracks
- `time_range`: Over what time frame the affinities are computed. Valid values: `long_term` (calculated from several years of data and including all new data as it becomes available), `medium_term` (approximately last 6 months), `short_term` (approximately last 4 weeks). Default: `medium_term`
- `limit`: The maximum number of items to return. Default: `20`. Minimum: `1`. Maximum: `50`.
- `offset`: The index of the first entity to return. Default: `0` (i.e., the first track). Use with limit to get the next set of entities.

**Note: songs that have been played for once will not be considered as a top song.**

In [4]:
def get_top_songs(user_id, token, **kwargs):
    query = "https://api.spotify.com/v1/me/top/tracks?"
    
    if len(kwargs.keys()) > 0:
        lst = [str(x[0]) + '=' + str(x[1]) for x in zip(kwargs.keys(), kwargs.values())]
        query += '&'.join(lst) ## add all parameters from kwargs 
    #print(query)

    response = requests.get(query, 
                   headers={"Accept": "application/json",
                            "Content-Type":"application/json", 
                            "Authorization":f"Bearer {token}"})
    json_response = response.json()
    print("status:", response.status_code)
    song_ids = [json_response['items'][i]['id'] for i in range(len(json_response['items']))]
    return song_ids

### Get information about a song

- `market`: The market you’d like to request.

In [5]:
def get_song(user_id, token, song_id, market):
    
    ## get audio features of the song/track
    song_url = "https://api.spotify.com/v1/audio-features/"
    query = f'{song_url}{song_id}'
    success = False
    while not success:
        try:
            response = requests.get(query, 
                           headers={"Content-Type":"application/json", 
                                    "Authorization":f"Bearer {token}"})
            json_response = response.json()
            df_response = pd.json_normalize(json_response)
            success = True
        except Exception as e:
            pass
            
    ## get album information and the first artist
    base_url = "https://api.spotify.com/v1/tracks/"
    query = f'{base_url}{song_id}?market={market}'
    success = False
    while not success:
        try:
            response = requests.get(query, 
                           headers={"Accept": "application/json",
                                    "Content-Type":"application/json", 
                                    "Authorization":f"Bearer {token}"})
            json_response = response.json()
            album_id = json_response['album']['id'] if 'album' in json_response.keys() else None
            artist_id = json_response['artists'][0]['id'] if 'artists' in json_response.keys() else None
            success = True
        except Exception as e:
            pass
            
    if 'genre' in df_response.columns and df_response['genre'] is not None and len(df_response['genre']) > 0:
        return df_response.drop(['type', 'uri', 'track_href', 'analysis_url'], axis=1).set_index('id')
    else:            
        ## get genre of the album
        base_url = "https://api.spotify.com/v1/albums/"
        query = f'{base_url}{album_id}?market={market}'
        success = False
        while not success:
            try:
                response = requests.get(query, 
                               headers={"Accept": "application/json",
                                        "Content-Type":"application/json", 
                                        "Authorization":f"Bearer {token}"})
                json_response = response.json()
                genre = json_response['genres'] if 'genres' in json_response.keys() else None
                success = True
            except Exception as e:
                pass

        if genre is None or len(genre) < 1:
            ## get genre of the artist
            base_url = "https://api.spotify.com/v1/artists/"
            query = f'{base_url}{artist_id}'
            success = False
            while not success:
                try:
                    response = requests.get(query, 
                                   headers={"Accept": "application/json",
                                            "Content-Type":"application/json", 
                                            "Authorization":f"Bearer {token}"})
                    json_response = response.json()
                    genre = json_response['genres'] if 'genres' in json_response.keys() else None
                    success = True
                except Exception as e:
                    pass

    df_response['artist_id'] = artist_id if artist_id is not None and len(artist_id) >0 else None
    df_response['albuma_id'] = album_id if album_id is not None and len(album_id) >0 else None
    df_response['genre'] = [genre] if genre is not None and len(genre) > 0 else None
    try:
        return df_response.drop(['type', 'uri', 'track_href', 'analysis_url'], axis=1).set_index('id')
    except Exception:
        print("Cannot find features about this song: ", song_id)

## Generate a target playlist

- `workout_plan`: Workout plan. For example, `workout_plan=[(0, 0.2), (8, 0.9), (20, 0.4)]` means that in the first 8 minutes, to increase energy from 0.2 to 0.9 and then decrease energy to 0.4 in the next 12 minutes.
- `history_args`: Parameters used to extract recently played songs. For example, `history_args = {'limit': 15, 'after': 28885117}`.
- `start_song`: The first song in the target playlist. You can input the id of a song.
- `end_song`: The last song in the target playlist. You can input the id of a song.
- `pars_weight`: When calculating the difference between songs, we use a weighted average of the difference. 
- `genre`: If it is not None, we will filter songs based on genre.
- `recently`: If it is True, we will use recently played songs; otherwise, we will use top songs.

**Note 1. if `recently=True`, `history_args` can accept "limit", "after" (or "before"); otherwise, `history_args` can accept "limit", "time_range", "offset".** 

**Note 2. The required scopes of token is different for "top" and "recently played" songs.**

In [78]:
def generate_target(user_id, token, market, workout_plan, history_args = {}, start_song = None, end_song = None,
                    pars_weight = {'energy': 0.6, 'duration_ms': 0.4},
                    genre = None, recently = True, MAX_songs = 100):
    
    convert = lambda x: 60 * 1000 * x ## convert to ms
    reconvert = lambda x: x / 60 / 1000 ## convert to minute
    intersection = lambda lst1, lst2: [value for value in lst1 if value in lst2]
    def get_songs(my_offset):
        ## Note: for recently played songs, it cannot iterate to get more songs right now, it can be improved later.
        if recently:
            song_ids = get_recent_songs(user_id, token, **history_args)
        else:
            offset = max(my_offset, history_args.get('offset', 0))
            history_args['offset'] = offset
            song_ids = get_top_songs(user_id, token, **history_args)
        if len(song_ids) > 0:
            song_ids = set(song_ids) ## remove duplicate songs
            song_df = pd.concat([get_song(user_id, token, id, market) for id in song_ids])
            return song_df
        else:
            return None
    
    def filter_genre(song_df):
        if genre is not None:
            i = 0
            while i < song_df.shape[0]:
                if song_df.genre[i] is None or len(intersection(genre, song_df.genre[i])) < 1:
                    song_df = song_df.drop(index = song_df.index[i], axis = 0)
                else:
                    i += 1
        return song_df[pars_weight.keys()]

    my_offset = history_args.get('offset', 0)
    song_df = get_songs(my_offset)
    if song_df is None:
        print(" There is no avaliable top songs, please change `history_args`.")
    else:
        song_df = filter_genre(song_df)
    print("originally", song_df.shape[0], "song candidates", )
    while song_df.duration_ms.sum() / 60000 < workout_plan[-1][0] or song_df.shape[0] < MAX_songs:
        ## check total duration, if the sum of the duration of songs is smaller than the duration of the work out plan, 
        ## get more songs
        my_offset += history_args.get('limit', 20)
        new_song = get_songs(my_offset)
        if new_song is None:
            break
        else:
            new_song = filter_genre(new_song)
            candidate_df = pd.concat([song_df, new_song])
            candidate_df = candidate_df[~candidate_df.index.duplicated(keep='first')]
        if song_df.shape[0] == candidate_df.shape[0]:
            break
        else:
            print("add", candidate_df.shape[0]-song_df.shape[0], "song candidates")
            song_df = candidate_df
    if song_df.shape[0] > MAX_songs:
        song_df = song_df.iloc[range(MAX_songs), :]
    print("finally", song_df.shape[0], "song candidates")

    if song_df.shape[0] < 2:
        print(" There are no enough top songs, please change `history_args`.")
    else:
        songs = []
        total_durations = 0
        
        if isinstance(start_song, str) and start_song in song_df.index:
            song_df = song_df.drop(index = start_song, axis = 0)
        elif start_song is None:
            max_duration = workout_plan[0][0] * 60000
            df = song_df[song_df['duration_ms'] < max_duration]
            if df.shape[0] > 1:
                start_song = df.index[np.argmin((df['energy'] - workout_plan[0][1]).apply(abs))]
            else:
                start_song = song_df.index[np.argmin((song_df['energy'] - workout_plan[0][1]).apply(abs))]
            song_df = song_df.drop(index = start_song, axis = 0)
        
        if isinstance(end_song, str) and end_song in song_df.index:
            song_df = song_df.drop(index = end_song, axis = 0)  
        elif end_song is None:
            max_duration = (workout_plan[-1][0] - workout_plan[-2][0]) * 60000
            
            df = song_df[song_df['duration_ms'] < max_duration]
            if df.shape[0] > 1:
                end_song = df.index[np.argmin((df['energy'] - workout_plan[-1][1]).apply(abs))]
            else:
                end_song = song_df.index[np.argmin((song_df['energy'] - workout_plan[-1][1]).apply(abs))]
            song_df = song_df.drop(index = end_song, axis = 0)

        
        
        print('Start!')
        
        songs.append(start_song)
        current_energy = get_song(user_id, token, start_song, market)['energy'][0]
        total_durations += get_song(user_id, token, start_song, market)['duration_ms'][0]
        print('Add start song:', start_song, total_durations/60000)
        
        for i in range(1, len(workout_plan)):
            next_position = convert(workout_plan[i][0])
            if i == len(workout_plan)-1:
                next_position = next_position - get_song(user_id, token, end_song, market)['duration_ms'][0]
            next_energy = workout_plan[i][1]
            trend = '+' if next_energy - current_energy > 0 else '-'
            
            while total_durations < next_position:
                ### filter based on energy curve
                df = song_df[song_df.energy.between(current_energy, next_energy)] if trend == '+' else song_df[song_df.energy.between(next_energy, current_energy)]
                
                ### add a filter based on duration
                #target_duration = next_position - total_durations
                #df = df[df['duration_ms'].between(target_duration - time_interval*1000, target_duration + time_interval*1000)]
                
                print(df.shape[0], "songs that satisfy the energy trend", )
                if df.shape[0] < 1: ##### if no song satisfies the energy and duration requirement, what to do?
                    print(" There are no enough song candidates, please try to enlarge `MAX_songs`.")
                    break
                if any(next_position - total_durations >= df.duration_ms):
                    energy_weights = [0.5, 0.5]
                else:
                    energy_weights = [0.2, 0.8]

                energy_weights = np.array(energy_weights) * pars_weight['energy']
                weight = list(energy_weights) + [pars_weight['duration_ms']]
                diff_current = df['energy'] - current_energy
                diff_next = df['energy'] - next_energy
                diff_duration = reconvert(df['duration_ms'] - (next_position - total_durations))
                diff = ((pd.DataFrame(np.c_[np.array(diff_current), np.array(diff_next), np.array(diff_duration)], index = df.index).apply(abs))*weight).sum(axis = 1).sort_values()
                song_id = diff.index[0]
                total_durations += get_song(user_id, token, song_id, market)['duration_ms'][0]
                songs.append(song_id)
                print('Add song:', song_id, total_durations/60000)
                current_energy = get_song(user_id, token, song_id, market)['energy'][0]
                song_df = song_df.drop(index = song_id, axis = 0)
        
        durations_with_last = total_durations+get_song(user_id, token, end_song, market)['duration_ms'][0]
        if song_id is not None:
            durations_without_last = durations_with_last - get_song(user_id, token, song_id, market)['duration_ms'][0]
            if abs(durations_without_last-convert(workout_plan[i][0])) < abs(durations_with_last-convert(workout_plan[i][0])):
                songs.pop(-1)
            songs.append(end_song)
            total_durations += get_song(user_id, token, end_song, market)['duration_ms'][0]
            print('Add end song:', end_song, total_durations/60000)
        return songs

# Test

In [8]:
# settings
user_id = "pbwppse1hilahmk43ls424ao4" # "2272ss2bp3tznxg6k3v63kqxq"  

#### Test recently played songs without genre

In [9]:
token = "BQC7ERke6mhHX9C_-OnMGgDkMl4cGwomSWB2L9ENErXgnwAHJDpjCf12vH_ZIJ3mJIdyLt1z8_vmqFTgmFQCdedB0Nx5uqNL-vkYmv3J5eJqfUgABRkFPssOpQMzPenpYNki5vEEXLb3aB2HEGnOBOGNgBEK11N0h0ALB8ZU8EP18ZmDfLprSrVlbldSCt-ir6K9VplOSkI5rDktXFtj-MX7WGkgiBA-LIaYa0XOFsL1qyBPZSE8Uhn4dA"

In [56]:
market = 'US'
workout_plan = [(0, 0.6), (15, 0.95), (30, 0.8), (35, 0.25)]
pars_weight = {'energy': 0.65, 'duration_ms': 0.35}

history_args = {'limit': 7, 'after': 0} ## {} means using default values

In [None]:
songs = generate_target(user_id, token, market, workout_plan, history_args, pars_weight = pars_weight, 
                        recently = True, MAX_songs = 50)
songs

See the duration for each song and the total duration

In [None]:
durations = pd.concat([get_song(user_id, token, id, market)['duration_ms'] for id in songs])
durations / 60/ 1000, durations.sum() / 60/ 1000

See the energy for each song

In [None]:
pd.concat([get_song(user_id, token, id, market)['energy'] for id in songs])

Create a playlist with those songs

In [None]:
#create_plst(user_id, token, ['spotify:track:'+ str(x) for x in songs], 
#            name='Recently no genre', description='', public=False)

#### Test recently played songs with genre

You can go to https://developer.spotify.com/console/get-available-genre-seeds/ to find all avaliable genres.

In [None]:
#pars_weight = {'energy': 0.8, 'duration_ms': 0.2}

#history_args = {'limit': 15, 'after': 28885117} ## {} means using default values

genre = ['disco', 'deep house', 'disco house', 'funky house', 'groove room']

In [None]:
songs = generate_target(user_id, token, market, workout_plan, history_args, 
                    pars_weight = pars_weight, genre = genre)
songs

In [None]:
durations = pd.concat([get_song(user_id, token, id, market)['duration_ms'] for id in songs])
durations/ 60/ 1000, durations.sum() / 60/ 1000

In [None]:
pd.concat([get_song(user_id, token, id, market)['energy'] for id in songs])

Create a playlist with those songs

In [None]:
#create_plst(user_id, token, ['spotify:track:'+ str(x) for x in songs], 
#            name='Recently with a genre', description='', public=False)

#### Test top songs without genre

In [27]:
#token = "BQCoCfROM70s8JQFbbcCOAKdQmc90Ejf6KSsWlbZauDIqYDQk_WrD7X-Zxi-o2lnUDeTveTCS_0obQdU8Ysri4xi0IrqNiq46eY5uMKgpXqgVGU0YTZQ5Rl0cYDWf_AvZCuUqvCTdYXKM4lDlxDNn4k0MtsqhkknTZBY9q253Vy2uFnPUfm7ctrVoXFX83hE97XwUgaGIFVQX4PagPqkyPNbyk4LBjPm3jnRyiCEXu_I-GVo_z712A"

In [79]:
#pars_weight = {'energy': 0.8, 'duration_ms': 0.2}

history_args = {'limit': 15, 'time_range': 'medium_term', 'offset': 0}

In [80]:
songs = generate_target(user_id, token, market, workout_plan, history_args, 
                    pars_weight = pars_weight, recently = False, genre = None, MAX_songs = 25)
songs

status: 200
originally 15 song candidates
status: 200
add 13 song candidates
finally 25 song candidates
Start!
Add start song: 70M59uUigQFmXYImTnqjGp 4.9664166666666665
16 songs that satisfy the energy trend
Add song: 7nBR4Tt431p1MTgv3lVsmX 11.792383333333333
11 songs that satisfy the energy trend
Add song: 1uU95PBinoqgQVn6VVpF6q 14.960383333333333
2 songs that satisfy the energy trend
Add song: 0MjeQ5T5E50q0d3i72854N 17.602066666666666
5 songs that satisfy the energy trend
Add song: 1oWNMCtrpGV71z6dWRQAWV 24.061083333333332
0 songs that satisfy the energy trend
 There are no enough song candidates, please try to enlarge `MAX_songs`.
12 songs that satisfy the energy trend
Add song: 18MlLrfCkYOnKutil99GwY 31.475716666666667
2 songs that satisfy the energy trend
Add song: 0HDudjyoG3UpWIMAJrXIaB 35.395066666666665
Add end song: 29Q7DaDXdIibBLCivJwIqA 38.111783333333335


['70M59uUigQFmXYImTnqjGp',
 '7nBR4Tt431p1MTgv3lVsmX',
 '1uU95PBinoqgQVn6VVpF6q',
 '0MjeQ5T5E50q0d3i72854N',
 '1oWNMCtrpGV71z6dWRQAWV',
 '18MlLrfCkYOnKutil99GwY',
 '29Q7DaDXdIibBLCivJwIqA']

In [81]:
durations = pd.concat([get_song(user_id, token, id, market)['duration_ms'] for id in songs])
durations/ 60/ 1000, durations.sum() / 60/ 1000

(id
 70M59uUigQFmXYImTnqjGp    4.966417
 7nBR4Tt431p1MTgv3lVsmX    6.825967
 1uU95PBinoqgQVn6VVpF6q    3.168000
 0MjeQ5T5E50q0d3i72854N    2.641683
 1oWNMCtrpGV71z6dWRQAWV    6.459017
 18MlLrfCkYOnKutil99GwY    7.414633
 29Q7DaDXdIibBLCivJwIqA    2.716717
 Name: duration_ms, dtype: float64,
 34.192433333333334)

In [82]:
pd.concat([get_song(user_id, token, id, market)['energy'] for id in songs])

id
70M59uUigQFmXYImTnqjGp    0.584
7nBR4Tt431p1MTgv3lVsmX    0.685
1uU95PBinoqgQVn6VVpF6q    0.923
0MjeQ5T5E50q0d3i72854N    0.929
1oWNMCtrpGV71z6dWRQAWV    0.805
18MlLrfCkYOnKutil99GwY    0.534
29Q7DaDXdIibBLCivJwIqA    0.321
Name: energy, dtype: float64

Create a playlist with those songs

In [63]:
create_plst(user_id, token, ['spotify:track:'+ str(x) for x in songs], 
            name='Top no genre', description='', public=False)

Playlist Top no genre is successfully created!
Playlist Top no genre is successfully filled with recommendations!
Your playlist is ready at https://open.spotify.com/playlist/76cVJo7Hnkz58NESXL2tEu


#### Test top songs with genre

In [83]:
#pars_weight = {'energy': 0.8, 'duration_ms': 0.2}

history_args = {'limit': 15, 'time_range': 'medium_term', 'offset': 0}

genre = ['new french touch', 'trip hop', 'funky house']

In [84]:
songs = generate_target(user_id, token, market, workout_plan, history_args, 
                    pars_weight = pars_weight, recently = False, genre = genre, MAX_songs = 25)
songs

status: 200
originally 9 song candidates
status: 200
add 4 song candidates
status: 200
finally 13 song candidates
Start!
Add start song: 6ldJYmlEQYPPBmoxZjpMvr 4.784566666666667
7 songs that satisfy the energy trend
Add song: 7nBR4Tt431p1MTgv3lVsmX 11.610533333333333
5 songs that satisfy the energy trend
Add song: 4jHB8cLwEtbFzcbwymr0Do 14.862566666666666
0 songs that satisfy the energy trend
 There are no enough song candidates, please try to enlarge `MAX_songs`.
2 songs that satisfy the energy trend
Add song: 1oWNMCtrpGV71z6dWRQAWV 21.321583333333333
0 songs that satisfy the energy trend
 There are no enough song candidates, please try to enlarge `MAX_songs`.
6 songs that satisfy the energy trend
Add song: 1uXUZfJykefWuwj9VtyxIq 31.323616666666666
0 songs that satisfy the energy trend
 There are no enough song candidates, please try to enlarge `MAX_songs`.
Add end song: 2KklXplRtxMsBYo474Es0w 34.01231666666666


['6ldJYmlEQYPPBmoxZjpMvr',
 '7nBR4Tt431p1MTgv3lVsmX',
 '4jHB8cLwEtbFzcbwymr0Do',
 '1oWNMCtrpGV71z6dWRQAWV',
 '1uXUZfJykefWuwj9VtyxIq',
 '2KklXplRtxMsBYo474Es0w']

In [85]:
durations = pd.concat([get_song(user_id, token, id, market)['duration_ms'] for id in songs])
durations/ 60/ 1000, durations.sum() / 60/ 1000

(id
 6ldJYmlEQYPPBmoxZjpMvr     4.784567
 7nBR4Tt431p1MTgv3lVsmX     6.825967
 4jHB8cLwEtbFzcbwymr0Do     3.252033
 1oWNMCtrpGV71z6dWRQAWV     6.459017
 1uXUZfJykefWuwj9VtyxIq    10.002033
 2KklXplRtxMsBYo474Es0w     2.688700
 Name: duration_ms, dtype: float64,
 34.01231666666666)

In [86]:
pd.concat([get_song(user_id, token, id, market)['energy'] for id in songs])

id
6ldJYmlEQYPPBmoxZjpMvr    0.552
7nBR4Tt431p1MTgv3lVsmX    0.685
4jHB8cLwEtbFzcbwymr0Do    0.914
1oWNMCtrpGV71z6dWRQAWV    0.805
1uXUZfJykefWuwj9VtyxIq    0.484
2KklXplRtxMsBYo474Es0w    0.666
Name: energy, dtype: float64

Create a playlist with those songs

In [None]:
create_plst(user_id, token, ['spotify:track:'+ str(x) for x in songs], 
            name='Top with a genre', description='', public=False)