# Spotify Mood Session

## 1. Algorithm

a. **First Selection**. User selects a song $s_0$ from the set of all songs $S$ using some kind of search method (by name / artist) and plays that song.

b. **Played List**.  Let $P$ be a new list of played songs and let $P \gets s_0$   

c. **Cold Start**. Create a bootstrap random selection of $n$ songs from $S$ called $R$. Calculate the distance between each song and the first selection and select the closest to the chosen song . Formally:

$$
\forall s \in R \text{ dist}(s_i, s_0)
$$

*potential parameter: use a value to add the closest n songs to the initial bootstrap played list*

d. **Rating**. For every song that a user plays for more than 30 seconds, the song is rated $\text{pos}$, songs that are skipped within this time period are rated $\text{neg}$.  Songs that have not been played are rated $\text{non}$

e. **Recommending**.  Take the current set of vectors and select the next song based on some sort of agregated mean or nearest neighbour system.

repeat (d) and (e) ad infinitum.

Formally:

$$
\text{let: } s_0 \gets \text{ user}(S) \\
\text{let: } P = \{ s_0 \} \\
\text{let: } R \subset S \\
\text{let: } D = \emptyset \\ 
\text{for } s_i \text{ in } R:\\
D_i = \text{dist}(s_i, s_0) \\
\text{end for }\\
\text{select}(\text{max}(D))\\
$$

In [1]:
import json
import pandas as pd
import os
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials as cc
import numpy as np
import pandas as pd
from IPython.display import display
import warnings
import time

# create list of playlists
playlists = []
path = 'data'
count = 5
for filename in os.listdir(path):
    count -= 1
    if count < 0:
        break
    f = os.path.join(path, filename)
    if os.path.isfile(f):
        d = json.load(open(f))
        playlists.append(pd.DataFrame(d['playlists']))

playlists = pd.concat(playlists)
playlists = playlists.reset_index(drop=True)
print(playlists.shape)

(5000, 12)


In [2]:
# Add all songs from the playlists into a single list of songs
songs = [] 
for i in range(len(playlists)):
    tracks = playlists.iloc[i].loc['tracks']
    for track in tracks:
        songs.append(track)

songs_df = pd.DataFrame(songs)
print(songs_df.shape)
songs_df.head()

(334487, 8)


Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name
0,0,Missy Elliott,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,226863,The Cookbook
1,1,Britney Spears,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,198800,In The Zone
2,2,Beyoncé,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,235933,Dangerously In Love (Alben für die Ewigkeit)
3,3,Justin Timberlake,spotify:track:1AWQoqb9bSvzTjaLralEkT,spotify:artist:31TPClRtHm23RisEBtV3X7,Rock Your Body,spotify:album:6QPkyl04rXwTGlGlcYaRoW,267266,Justified
4,4,Shaggy,spotify:track:1lzr43nnXAijIGYnCT8M8H,spotify:artist:5EvFsr3kj42KNv97ZEnqij,It Wasn't Me,spotify:album:6NmFmPX56pcLBOFMhIiKvF,227600,Hot Shot


In [3]:
# Remove duplicate songs by URI
songs_df = songs_df.drop_duplicates('track_uri')

In [4]:
# Add score placeholder to the songs
classification = [0]*len(songs_df)
songs_df.insert(8, "class", classification) # Append data frame by one column

In [5]:
# Print song data to ensure the score was added
print(songs_df.iloc[0])

pos                                                     0
artist_name                                 Missy Elliott
track_uri            spotify:track:0UaMYEvWZi0ZqiDOoHU3YI
artist_uri          spotify:artist:2wIVse2owClT7go1WT98tk
track_name     Lose Control (feat. Ciara & Fat Man Scoop)
album_uri            spotify:album:6vV5UrXcfyQD1wu4Qo2I9K
duration_ms                                        226863
album_name                                   The Cookbook
class                                                   0
Name: 0, dtype: object


In [24]:
#DO NOT RUN THE AUDIO FEATURES HAVE ALREADY BEEN STORED IN THE AUDIO_FEATURES FILE
def run_features():
    #CREATE DATASET WITH SONG AUDIO FEATURES FROM THE URIS OF songs_df
    client_id = '898dd71dc932407e85921f0ac79f0127'
    client_secret = '16d994f070064371beb8758f32d64180'
    #https://developer.spotify.com/dashboard/applications/898dd71dc932407e85921f0ac79f0127
    client_cc = cc(client_id=client_id,client_secret=client_secret)
    sp = spotipy.Spotify(client_credentials_manager=client_cc)


    sleep_min = 1
    sleep_max = 2
    start_time = time.time()
    request_count = 0
    tracks_w_features = pd.DataFrame()
    for i in range(0,3344):
        if request_count % 13 == 0:
            time.sleep(np.random.uniform(sleep_min, sleep_max))
        else:
            tracks_to_get = songs_df.iloc[request_count:request_count+100]["track_uri"] #get the uri of these songs to get audio features
            tracks = pd.DataFrame(sp.audio_features(tracks_to_get))
            tracks_w_features = tracks_w_features.append(tracks)
        request_count+=100

    #remove useless-categorical columns, can use index to get song information later
    tracks_w_features = tracks_w_features.drop(columns=['mode','key','time_signature', 'duration_ms','analysis_url','track_href','id','type']).copy()
    warnings.filterwarnings("ignore")
    warnings.filterwarnings(action='once')

    from pathlib import Path
    filepath = Path('recommender-systems/audio_features.csv')
    filepath.parent.mkdir(parents=True, exist_ok=True)
    tracks_w_features.to_csv(filepath)

  and should_run_async(code)


Unnamed: 0,0
0,


KeyError: "['mode' 'key' 'time_signature' 'duration_ms' 'analysis_url' 'track_href'\n 'id' 'type'] not found in axis"

In [7]:
# Search for a particular song 
print("Search for a song name")
s = input()
pattern = [s]

# filter for rows that contain the partial string inputted by the user
results = songs_df.copy()
results[songs_df.track_name.str.contains('|'.join(pattern))]

  and should_run_async(code)


Search for a song name


Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name,class
10459,6,The Mamas & The Papas,spotify:track:09jDQcg0LkTWH9NEVtYB43,spotify:artist:1bs7HoMkSyQwcobCpE9KpN,Snowqueen Of Texas,spotify:album:5w0UgI5gekErM0j3Y79sCg,156760,People Like Us,0


In [14]:
import max_marg_relevance

# Ask the user to enter a track URI for the first song they want to play
print("Select your first song by copying the track_URI from above (i.e. spotify:track:2wSAWEYUHkt92X4SBAPqZE)")
#s = input()
s = "spotify:track:6bLopGnirdrilrpdVB6Um1"

# Mark this song as "Played" with a positive rating (as the user chose it)
index = tracks_w_features.index[tracks_w_features['uri'] == s]     # This returns the song with the uri of 's'

def getRecommendation(songs_df, liked_song_features):

    liked_songs_nofeatures = songs_df.loc[songs_df['class'] == 1]
    display(liked_songs_nofeatures)
    liked_songs_uris = liked_songs_nofeatures['one'].to_numpy()

    average_session_songs = liked_song_features[['danceability','energy','loudness','speechiness',
                                                      'acousticness','instrumentalness','liveness','valence','tempo']].mean()
    # TODO song 'id' not working in max_marg_relevance.py

    return max_marg_relevance.recommend(average_session_songs, tracks_w_features)

continue_ = True
current_song_index = songs_df.index[songs_df['track_uri'] == s].tolist()[0]

while continue_:
    current_song_artist = songs_df.iloc[current_song_index]["artist_name"]
    current_song_name = songs_df.iloc[current_song_index]["track_name"]
    s = input(current_song_artist + "-" + current_song_name + "\n" + "Skip (y) or listen (n) or quit (q)?")
    if s == "y":
        print("skipped")
        songs_df.at[current_song_index, "class"] = -1 # negative rating
    elif s == "n":
        print("not skipped")
        songs_df.at[current_song_index, "class"] = 1  # positive rating
    elif s == "q":
        continue_ = False
    else:
        print("Invalid input")
    current_song_index = getRecommendation(songs_df,tracks_w_features)  # TODO: Get next song from the recommender system

  and should_run_async(code)


Select your first song by copying the track_URI from above (i.e. spotify:track:2wSAWEYUHkt92X4SBAPqZE)
skipped


Unnamed: 0,pos,artist_name,track_uri,artist_uri,track_name,album_uri,duration_ms,album_name,class


KeyError: 'one'