# Spotify Linear Programming Playlist

Let's use linear programming to solve an important business problem.. Making a banging 00's playlist using the Spotify API..

In [45]:
#import libraries
import spotipy
import pandas as pd
import pulp as pl
import re
from pulp import lpSum, LpStatus, LpStatusOptimal
from spotipy.oauth2 import SpotifyClientCredentials
import config

In [46]:
#register your project with Spotify to get these
cid = config.cid
secret = config.secret

In [47]:
#access the Spotify API
client_credentials_manager = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [48]:
#spotify all out 00's Playlist
playlist = sp.user_playlist_tracks(playlist_id='37i9dQZF1DX4o1oenSJRJd')

In [49]:
all_out_00 = {'name':[], 'id':[]}

#get our playlist data
for x in range (1, 100):
    all_out_00['name'].append(playlist['items'][x]['track']['name'])
    all_out_00['id'].append(playlist['items'][x]['track']['id'])
    

all_out_00df = pd.DataFrame.from_dict(all_out_00)
#we have the playlist of tracks & IDs
all_out_00df

Unnamed: 0,name,id
0,Umbrella,49FYlytm3dAAraYgpoJZux
1,Just Dance,5vQXxfGn1bjv5WESrYVVpw
2,Party In The U.S.A.,3E7dfMvvCLUddWissuqMwr
3,Hips Don't Lie (feat. Wyclef Jean),3ZFTkvIE7kyPt6Nu3PEa7V
4,Yeah!,2PkeVPcL32LA96cK5ySC3c
...,...,...
94,Rude Boy,60jzFy6Nn4M0iD1d94oteF
95,Breakeven,4bCIt1XfHHGWo1UHKM11y9
96,"One, Two Step (feat. Missy Elliott)",7uKcScNXuO3MWw6LowBjW1
97,Let Me Love You,3ibKnFDaa3GhpPGlOUj7ff


In [50]:
#get our artist refs
def get_artists(column_ref):
    artists = []
    for x in column_ref:
        artist = sp.search(x)['tracks']['items'][0]['artists'][0]['name']
        artists.append(artist)
    return artists

artists = get_artists(all_out_00df['name'])
all_out_00df['artist'] = artists

#get extra features from track id
danceability = []
energy = []
key = []
loudness = []
mode = []
speechiness = []
acousticness = []
instrumentalness = []
liveness = []
valence = []
tempo = []
duration_ms = []
time_signature = []

def audio_features(df):
    for x in df['id']:
        track = sp.audio_features(x)
        danceability.append(track[0]['danceability'])
        energy.append(track[0]['energy'])
        key.append(track[0]['key'])
        loudness.append(track[0]['loudness'])
        mode.append(track[0]['mode'])
        speechiness.append(track[0]['speechiness'])
        acousticness.append(track[0]['acousticness'])
        instrumentalness.append(track[0]['instrumentalness'])
        liveness.append(track[0]['liveness'])
        valence.append(track[0]['valence'])
        tempo.append(track[0]['tempo'])
        duration_ms.append(track[0]['duration_ms'])
        time_signature.append(track[0]['time_signature'])
    return 

#create our lists of features
audio_features(all_out_00df)


features = [danceability,energy,key,loudness,mode,
            speechiness,acousticness,instrumentalness,liveness,
            valence,tempo,duration_ms,time_signature]

columns = ['danceability','energy','key','loudness','mode',
            'speechiness','acousticness','instrumentalness','liveness',
            'valence','tempo','duration_ms','time_signature']


#create feature df
feat_dict = dict(zip(columns, features))
feat_df = pd.DataFrame.from_dict(feat_dict)


In [51]:
#join our frames
playlist_df = pd.concat([all_out_00df, feat_df], axis=1)

In [52]:
#we have our playlist
playlist_df

Unnamed: 0,name,id,artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Umbrella,49FYlytm3dAAraYgpoJZux,Rihanna,0.583,0.829,1,-4.603,1,0.1340,0.00864,0.000000,0.0426,0.575,174.028,275987,4
1,Just Dance,5vQXxfGn1bjv5WESrYVVpw,Lady Gaga,0.821,0.741,1,-4.570,0,0.0313,0.02780,0.000025,0.1900,0.741,118.984,244440,4
2,Party In The U.S.A.,3E7dfMvvCLUddWissuqMwr,Miley Cyrus,0.652,0.698,10,-4.667,0,0.0420,0.00112,0.000115,0.0886,0.470,96.021,202067,4
3,Hips Don't Lie (feat. Wyclef Jean),3ZFTkvIE7kyPt6Nu3PEa7V,Shakira,0.778,0.824,10,-5.892,0,0.0712,0.28400,0.000000,0.4050,0.756,100.024,218093,4
4,Yeah!,2PkeVPcL32LA96cK5ySC3c,Usher,0.895,0.795,2,-4.693,1,0.0977,0.02190,0.000000,0.0403,0.574,105.004,250067,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,Rude Boy,60jzFy6Nn4M0iD1d94oteF,Rihanna,0.563,0.750,11,-4.496,1,0.1270,0.11300,0.000000,0.0788,0.812,173.906,222920,4
95,Breakeven,4bCIt1XfHHGWo1UHKM11y9,The Script,0.630,0.698,10,-4.510,1,0.0244,0.17300,0.000000,0.0789,0.529,94.023,261427,4
96,"One, Two Step (feat. Missy Elliott)",7uKcScNXuO3MWw6LowBjW1,Ciara,0.944,0.511,5,-10.960,0,0.1610,0.04200,0.001070,0.0379,0.842,113.046,203787,4
97,Let Me Love You,3ibKnFDaa3GhpPGlOUj7ff,Mario,0.656,0.578,7,-8.970,0,0.0922,0.23500,0.000000,0.1180,0.556,94.514,256733,4


In [53]:
#clean up a little
playlist_df = playlist_df.drop_duplicates(subset=['name'])
playlist_df = playlist_df.reset_index()
playlist_df = playlist_df.drop(['index'], axis=1)

In [54]:
#we have our playlist df ready to work with
playlist_df

Unnamed: 0,name,id,artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,Umbrella,49FYlytm3dAAraYgpoJZux,Rihanna,0.583,0.829,1,-4.603,1,0.1340,0.00864,0.000000,0.0426,0.575,174.028,275987,4
1,Just Dance,5vQXxfGn1bjv5WESrYVVpw,Lady Gaga,0.821,0.741,1,-4.570,0,0.0313,0.02780,0.000025,0.1900,0.741,118.984,244440,4
2,Party In The U.S.A.,3E7dfMvvCLUddWissuqMwr,Miley Cyrus,0.652,0.698,10,-4.667,0,0.0420,0.00112,0.000115,0.0886,0.470,96.021,202067,4
3,Hips Don't Lie (feat. Wyclef Jean),3ZFTkvIE7kyPt6Nu3PEa7V,Shakira,0.778,0.824,10,-5.892,0,0.0712,0.28400,0.000000,0.4050,0.756,100.024,218093,4
4,Yeah!,2PkeVPcL32LA96cK5ySC3c,Usher,0.895,0.795,2,-4.693,1,0.0977,0.02190,0.000000,0.0403,0.574,105.004,250067,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,Rude Boy,60jzFy6Nn4M0iD1d94oteF,Rihanna,0.563,0.750,11,-4.496,1,0.1270,0.11300,0.000000,0.0788,0.812,173.906,222920,4
95,Breakeven,4bCIt1XfHHGWo1UHKM11y9,The Script,0.630,0.698,10,-4.510,1,0.0244,0.17300,0.000000,0.0789,0.529,94.023,261427,4
96,"One, Two Step (feat. Missy Elliott)",7uKcScNXuO3MWw6LowBjW1,Ciara,0.944,0.511,5,-10.960,0,0.1610,0.04200,0.001070,0.0379,0.842,113.046,203787,4
97,Let Me Love You,3ibKnFDaa3GhpPGlOUj7ff,Mario,0.656,0.578,7,-8.970,0,0.0922,0.23500,0.000000,0.1180,0.556,94.514,256733,4


In [55]:
#Problem: Create a Noughties playlists most songs in 90 mins
prob = pl.LpProblem('NoughtiesPlaylist', pl.LpMaximize)

In [56]:
#create decision variables - 
decision_variables = []
for rownum, row in playlist_df.iterrows():
    variable = str('x' + str(rownum))
    variable = pl.LpVariable(str(variable), lowBound = 0, upBound = 1, cat= 'Integer') #make variables binary
    decision_variables.append(variable)

print ("Total number of decision_variables: " + str(len(decision_variables)))
print(decision_variables)

Total number of decision_variables: 99
[x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16, x17, x18, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, x29, x30, x31, x32, x33, x34, x35, x36, x37, x38, x39, x40, x41, x42, x43, x44, x45, x46, x47, x48, x49, x50, x51, x52, x53, x54, x55, x56, x57, x58, x59, x60, x61, x62, x63, x64, x65, x66, x67, x68, x69, x70, x71, x72, x73, x74, x75, x76, x77, x78, x79, x80, x81, x82, x83, x84, x85, x86, x87, x88, x89, x90, x91, x92, x93, x94, x95, x96, x97, x98]


In [57]:
#create optimization function
total_tracks = ""
for i, track in enumerate(decision_variables):
    total_tracks += track

prob += total_tracks
print ("Optimization function: " + str(total_tracks))

Optimization function: x0 + x1 + x10 + x11 + x12 + x13 + x14 + x15 + x16 + x17 + x18 + x19 + x2 + x20 + x21 + x22 + x23 + x24 + x25 + x26 + x27 + x28 + x29 + x3 + x30 + x31 + x32 + x33 + x34 + x35 + x36 + x37 + x38 + x39 + x4 + x40 + x41 + x42 + x43 + x44 + x45 + x46 + x47 + x48 + x49 + x5 + x50 + x51 + x52 + x53 + x54 + x55 + x56 + x57 + x58 + x59 + x6 + x60 + x61 + x62 + x63 + x64 + x65 + x66 + x67 + x68 + x69 + x7 + x70 + x71 + x72 + x73 + x74 + x75 + x76 + x77 + x78 + x79 + x8 + x80 + x81 + x82 + x83 + x84 + x85 + x86 + x87 + x88 + x89 + x9 + x90 + x91 + x92 + x93 + x94 + x95 + x96 + x97 + x98


In [58]:
#total duration of our playlist in milliseconds
playlist_df['duration_ms'].sum()

22742571

In [59]:
#create lists for additional data to create constraints for our maximisation problem
tracks_duration = []
danceability = []
loudness =[]
mode = []
tempo = []

for i in range(0,99):
    track_total_dur = decision_variables[i]*playlist_df['duration_ms'][i]
    tracks_duration.append(track_total_dur)

for i in range(0,99):
    dability = decision_variables[i]*playlist_df['danceability'][i]
    danceability.append(dability)

for i in range(0,99):
    ldness = decision_variables[i]*playlist_df['loudness'][i]
    loudness.append(ldness)
    
for i in range(0,99):    
    mde = decision_variables[i]*playlist_df['mode'][i]
    mode.append(mde)
    
for i in range(0,99):    
    tpo = decision_variables[i]*playlist_df['tempo'][i]
    tempo.append(tpo)

In [60]:
#define our constraints
prob += lpSum(tracks_duration) <= 7200000, "PartyLengthMax"
prob += lpSum(tracks_duration) >= 5400000, "PartyLengthMin"
prob += lpSum(danceability) >= 15, "Danceability"
prob += lpSum(tempo) >= 900, "Tempo"

In [61]:
#our final problem with constraints
print(prob)
#write the problem to an LP file
prob.writeLP("SpotifyPlaylist.lp" )

NoughtiesPlaylist:
MAXIMIZE
1*x0 + 1*x1 + 1*x10 + 1*x11 + 1*x12 + 1*x13 + 1*x14 + 1*x15 + 1*x16 + 1*x17 + 1*x18 + 1*x19 + 1*x2 + 1*x20 + 1*x21 + 1*x22 + 1*x23 + 1*x24 + 1*x25 + 1*x26 + 1*x27 + 1*x28 + 1*x29 + 1*x3 + 1*x30 + 1*x31 + 1*x32 + 1*x33 + 1*x34 + 1*x35 + 1*x36 + 1*x37 + 1*x38 + 1*x39 + 1*x4 + 1*x40 + 1*x41 + 1*x42 + 1*x43 + 1*x44 + 1*x45 + 1*x46 + 1*x47 + 1*x48 + 1*x49 + 1*x5 + 1*x50 + 1*x51 + 1*x52 + 1*x53 + 1*x54 + 1*x55 + 1*x56 + 1*x57 + 1*x58 + 1*x59 + 1*x6 + 1*x60 + 1*x61 + 1*x62 + 1*x63 + 1*x64 + 1*x65 + 1*x66 + 1*x67 + 1*x68 + 1*x69 + 1*x7 + 1*x70 + 1*x71 + 1*x72 + 1*x73 + 1*x74 + 1*x75 + 1*x76 + 1*x77 + 1*x78 + 1*x79 + 1*x8 + 1*x80 + 1*x81 + 1*x82 + 1*x83 + 1*x84 + 1*x85 + 1*x86 + 1*x87 + 1*x88 + 1*x89 + 1*x9 + 1*x90 + 1*x91 + 1*x92 + 1*x93 + 1*x94 + 1*x95 + 1*x96 + 1*x97 + 1*x98 + 0
SUBJECT TO
PartyLengthMax: 275987 x0 + 244440 x1 + 204587 x10 + 199640 x11 + 211160 x12
 + 242293 x13 + 242373 x14 + 219720 x15 + 179640 x16 + 238627 x17 + 199693 x18
 + 188960 x19 + 20206

[x0,
 x1,
 x10,
 x11,
 x12,
 x13,
 x14,
 x15,
 x16,
 x17,
 x18,
 x19,
 x2,
 x20,
 x21,
 x22,
 x23,
 x24,
 x25,
 x26,
 x27,
 x28,
 x29,
 x3,
 x30,
 x31,
 x32,
 x33,
 x34,
 x35,
 x36,
 x37,
 x38,
 x39,
 x4,
 x40,
 x41,
 x42,
 x43,
 x44,
 x45,
 x46,
 x47,
 x48,
 x49,
 x5,
 x50,
 x51,
 x52,
 x53,
 x54,
 x55,
 x56,
 x57,
 x58,
 x59,
 x6,
 x60,
 x61,
 x62,
 x63,
 x64,
 x65,
 x66,
 x67,
 x68,
 x69,
 x7,
 x70,
 x71,
 x72,
 x73,
 x74,
 x75,
 x76,
 x77,
 x78,
 x79,
 x8,
 x80,
 x81,
 x82,
 x83,
 x84,
 x85,
 x86,
 x87,
 x88,
 x89,
 x9,
 x90,
 x91,
 x92,
 x93,
 x94,
 x95,
 x96,
 x97,
 x98]

In [62]:
#the problem is solved using PuLP's choice of Solver
prob.solve()

#the status of the solution is printed to the screen
print("status:", LpStatus[prob.status])

#each of the variables is printed with it's resolved optimum value
print ("decision_variables: ")
for v in prob.variables():
    print(v.name, "=", v.varValue)

status: Optimal
decision_variables: 
x0 = 0.0
x1 = 0.0
x10 = 1.0
x11 = 1.0
x12 = 1.0
x13 = 0.0
x14 = 0.0
x15 = 0.0
x16 = 1.0
x17 = 0.0
x18 = 1.0
x19 = 1.0
x2 = 1.0
x20 = 1.0
x21 = 0.0
x22 = 1.0
x23 = 1.0
x24 = 1.0
x25 = 0.0
x26 = 0.0
x27 = 0.0
x28 = 1.0
x29 = 0.0
x3 = 0.0
x30 = 1.0
x31 = 1.0
x32 = 0.0
x33 = 0.0
x34 = 0.0
x35 = 0.0
x36 = 0.0
x37 = 0.0
x38 = 0.0
x39 = 0.0
x4 = 0.0
x40 = 1.0
x41 = 0.0
x42 = 0.0
x43 = 0.0
x44 = 0.0
x45 = 1.0
x46 = 0.0
x47 = 0.0
x48 = 0.0
x49 = 1.0
x5 = 0.0
x50 = 0.0
x51 = 0.0
x52 = 1.0
x53 = 1.0
x54 = 1.0
x55 = 1.0
x56 = 0.0
x57 = 0.0
x58 = 1.0
x59 = 0.0
x6 = 0.0
x60 = 0.0
x61 = 0.0
x62 = 0.0
x63 = 0.0
x64 = 0.0
x65 = 1.0
x66 = 0.0
x67 = 0.0
x68 = 0.0
x69 = 1.0
x7 = 0.0
x70 = 0.0
x71 = 0.0
x72 = 0.0
x73 = 0.0
x74 = 0.0
x75 = 0.0
x76 = 0.0
x77 = 1.0
x78 = 0.0
x79 = 0.0
x8 = 0.0
x80 = 1.0
x81 = 1.0
x82 = 0.0
x83 = 1.0
x84 = 1.0
x85 = 0.0
x86 = 0.0
x87 = 1.0
x88 = 0.0
x89 = 0.0
x9 = 1.0
x90 = 0.0
x91 = 1.0
x92 = 1.0
x93 = 1.0
x94 = 0.0
x95 = 0.0
x96 = 1.0
x97

In [63]:
#reorder results
variable_name = []
variable_value = []

for v in prob.variables():
    variable_name.append(v.name)
    variable_value.append(v.varValue)

df = pd.DataFrame({'variable': variable_name, 'value': variable_value})
for rownum, row in df.iterrows():
    value = re.findall(r'(\d+)', row['variable'])
    df.loc[rownum, 'variable'] = int(value[0])

df = df.sort_index()


#append results
for rownum, row in playlist_df.iterrows():
     for results_rownum, results_row in df.iterrows():
        if rownum == results_row['variable']:
            playlist_df.loc[rownum, 'decision'] = results_row['value']

In [64]:
#function to convert milliseconds to mins:secs per standard song duration formats
def song_duration(millis):
    seconds=(millis/1000)%60
    minutes=(millis/(1000*60))%60
    return "%d:%d" % (minutes, seconds)

In [65]:
#Apply time conversion to the dataframe
playlist_df['duration'] = playlist_df['duration_ms'].apply(song_duration)
playlist_df

Unnamed: 0,name,id,artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,decision,duration
0,Umbrella,49FYlytm3dAAraYgpoJZux,Rihanna,0.583,0.829,1,-4.603,1,0.1340,0.00864,0.000000,0.0426,0.575,174.028,275987,4,0.0,4:35
1,Just Dance,5vQXxfGn1bjv5WESrYVVpw,Lady Gaga,0.821,0.741,1,-4.570,0,0.0313,0.02780,0.000025,0.1900,0.741,118.984,244440,4,0.0,4:4
2,Party In The U.S.A.,3E7dfMvvCLUddWissuqMwr,Miley Cyrus,0.652,0.698,10,-4.667,0,0.0420,0.00112,0.000115,0.0886,0.470,96.021,202067,4,1.0,3:22
3,Hips Don't Lie (feat. Wyclef Jean),3ZFTkvIE7kyPt6Nu3PEa7V,Shakira,0.778,0.824,10,-5.892,0,0.0712,0.28400,0.000000,0.4050,0.756,100.024,218093,4,0.0,3:38
4,Yeah!,2PkeVPcL32LA96cK5ySC3c,Usher,0.895,0.795,2,-4.693,1,0.0977,0.02190,0.000000,0.0403,0.574,105.004,250067,4,0.0,4:10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,Rude Boy,60jzFy6Nn4M0iD1d94oteF,Rihanna,0.563,0.750,11,-4.496,1,0.1270,0.11300,0.000000,0.0788,0.812,173.906,222920,4,0.0,3:42
95,Breakeven,4bCIt1XfHHGWo1UHKM11y9,The Script,0.630,0.698,10,-4.510,1,0.0244,0.17300,0.000000,0.0789,0.529,94.023,261427,4,0.0,4:21
96,"One, Two Step (feat. Missy Elliott)",7uKcScNXuO3MWw6LowBjW1,Ciara,0.944,0.511,5,-10.960,0,0.1610,0.04200,0.001070,0.0379,0.842,113.046,203787,4,1.0,3:23
97,Let Me Love You,3ibKnFDaa3GhpPGlOUj7ff,Mario,0.656,0.578,7,-8.970,0,0.0922,0.23500,0.000000,0.1180,0.556,94.514,256733,4,0.0,4:16


In [66]:
#selecting only tracks that made the final playlist
playlist_df[playlist_df['decision'] == 1]

Unnamed: 0,name,id,artist,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,decision,duration
2,Party In The U.S.A.,3E7dfMvvCLUddWissuqMwr,Miley Cyrus,0.652,0.698,10,-4.667,0,0.042,0.00112,0.000115,0.0886,0.47,96.021,202067,4,1.0,3:22
9,Down,7LP4Es66zdY7CyjepqmvAg,Jay Sean,0.657,0.695,2,-4.493,1,0.0321,0.0108,0.0,0.0822,0.683,65.997,212107,4,1.0,3:32
10,Sex on Fire,395C2pn0PdOYPzM4B1jLoO,Kings of Leon,0.544,0.903,9,-5.651,1,0.0545,0.00166,0.00632,0.1,0.432,153.629,204587,4,1.0,3:24
11,Bye Bye Bye,2yiZyjMEByt9sJBZWnWaDR,*NSYNC,0.571,0.87,8,-3.877,0,0.0641,0.0198,7e-06,0.0564,0.813,172.716,199640,4,1.0,3:19
12,Oops!...I Did It Again,6naxalmIoLFWR0siv8dnQQ,Britney Spears,0.751,0.834,1,-5.444,0,0.0437,0.3,1.8e-05,0.355,0.894,95.053,211160,4,1.0,3:31
16,I Kissed A Girl,14iN3o8ptQ8cFVZTEmyQRV,Katy Perry,0.699,0.76,5,-3.173,1,0.0677,0.00223,0.0,0.132,0.696,129.996,179640,4,1.0,2:59
18,TiK ToK,0HPD5WQqrq7wPWR7P7Dw1i,Kesha,0.755,0.837,2,-2.718,0,0.142,0.0991,0.0,0.289,0.714,120.028,199693,4,1.0,3:19
19,Since U Been Gone,3iJTbZDSnZ676y5qObWDm0,Kelly Clarkson,0.664,0.741,0,-5.391,1,0.0343,0.00169,0.0502,0.113,0.409,131.006,188960,4,1.0,3:8
20,Replay,4E5P1XyAFtrjpiIxkydly4,Iyaz,0.706,0.751,9,-6.323,1,0.0708,0.173,0.0,0.168,0.195,91.031,182307,4,1.0,3:2
22,Toxic,4fbaKWFRghusXd4bSBvvfN,Britney Spears,0.774,0.838,5,-3.914,0,0.114,0.0249,0.025,0.242,0.924,143.04,198800,4,1.0,3:18


Acknowledgements:
Anna Nicanorova: Optimizing Life Everyday Problems Solved with Linear Programing in Python
https://www.youtube.com/watch?v=7yZ5xxdkTb8
https://github.com/AnnaNican

PULP Doc:
https://coin-or.github.io/pulp/
Some great starter examples in this lot

M373 Optimization:
http://www.open.ac.uk/courses/modules/m373
Great course from the OU that I undertook in my 3rd year degree, goes through many applications of linear/integer programming.