## Deployment Ready Recommender Code

After dataset curation and model training, we obtained deployment-ready code optimized for the MuseX web application. The deployment version is available as a .py file.

In [2]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity

from surprise import SVD
from surprise import accuracy
from surprise import dump

import time

# Package Dependencies:
# numpy==1.20.3
# scikit-surprise==1.1.1
# scipy==1.5.4
# File dependencies:
# songs_pool.csv
# SPF_user_song_score.csv
# recommender_model_final.pkl

def get_user_song_df(saved_songs):
    """
    saved_songs: a list of Spotify song ids for user saved songs
    return: a user song preference pd.DataFrame for find_sim_user
    """
    user_data = pd.DataFrame.from_dict({'user_id': ['TEMP' for i in range(len(saved_songs))], 'song_id_SPF': saved_songs, 
                                        'score': [10 for i in range(len(saved_songs))]})
    return user_data
    
def get_new_songs(saved_songs):
    """
    get a list of tracks the user hasn't liked as the pool of tracks for recommendation
    saved_songs: a list of Spotify song ids for user saved songs
    return: a list of Spotify song ID
    """
    songs_pool = pd.read_csv('songs_pool.csv').iloc[:,0]
    all_songs = set(songs_pool.to_list())
    new_songs = list(all_songs - set(saved_songs))
    
    return new_songs
    

def get_sim_user(user_data, song_id_user_csv='SPF_user_song_score.csv'):
    """
    user_data: pd.DataFrame with weighted user saved songs
    song_id_user_csv: filemane for curated user id - song id - rating dataset
    returns: user_id of most similar user in the training set
    """
    num_entries = 800000 # dataset to large to be used in full, has to cap # of entries
    df_song_id_user = pd.read_csv(song_id_user_csv).iloc[:num_entries,:10]
    df_song_id_user.append(user_data)

    song_user = df_song_id_user.pivot(index='user_id', columns='song_id_SPF', values='score').fillna(0)

    # obtain a sparse matrix
    song_user_mat = csr_matrix(song_user.values)
    # calculate pairwise cosine similarity between users based on listening preference
    cos_sim = cosine_similarity(song_user_mat)
    # get most similar user
    sim_score = cos_sim[-1,:-1].max()
    user_index = cos_sim[-1,:-1].argmax()
    
    return song_user.index[user_index], sim_score
    

def generate_rec_songs(user_id=None, top=20, pool=None, model='recommender_model_final.pkl'):
    """
    user_id: MSD user_id in training set
    top: number of songs to recommend
    pool: a list of Spotify song ID to recommend from
    return: (top_songs, top_scores) a list of top song_ids and a list of predicted ratings(preference) to these songs
    """
    
    # load trained collaborative filtering model
    (predictions, final_algorithm) = dump.load(model)

    top_ten = []
    pred_scores = []
    for song in pool:
        pred = final_algorithm.predict(user_id, song)
        pred_scores.append((pred.iid, pred.est))

    pred_scores.sort(key=lambda tup: tup[1], reverse=True)
    top_songs = [song for song, score in pred_scores[:top]]
    top_scores = [score for song, score in pred_scores[:top]]
    
    return top_songs, top_scores
    

In [None]:
import spotipy
from spotify_interface import *
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials

%env SPOTIPY_CLIENT_ID='69cdb7d01ce249819049f54f9fcd6d04'
%env SPOTIPY_CLIENT_SECRET='a83bbffc2a724df485e77c9798b65e3a'
%env SPOTIPY_REDIRECT_URI='http://127.0.0.1:8090/'

username = '3162mvuk5iwnol4qwjc6voyrrpae'
scope = "user-library-read"
redirect_url = 'http://127.0.0.1:5000'
client_id = '69cdb7d01ce249819049f54f9fcd6d04'
client_secret = 'a83bbffc2a724df485e77c9798b65e3a'

# sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials())
sp = get_spotify_token(username, scope, redirect_url, client_id, client_secret)

sp.track('4puHMOaNsxXs6s45g7NkJI')

In [None]:
sp.track('4puHMOaNsxXs6s45g7NkJI')

In [2]:
# create dummy user data: 20 saved song IDs
saved_songs = ['4puHMOaNsxXs6s45g7NkJI', '1EGIeHEvMZh3MNkIn2itUy', '1oZYaztSjkVQ7PeKwUbdab', '5YuXkLn4gXk9JYD1del0qR',
               '5pWBLvVsEhKiWba3wvYIXB', '2xrNC0YeIdMjXItS1V0QnS', '47XIBIhxwEUX6o0gPMZ2mN', '5yZzLyY9SMI3ZjdqpXWYDp',
               '4ZpXHlV2vQVfPXUvbDSZ92', '2LIUQiR8QfZbWT0qOTzBVD', '5h3M1h279xrxl1WtMS71Jb', '5vn6pVxzLeWJweGa9zqxGD',
               '2UjGyCvayHjGihV3oLhD8a', '0R8Danl4L4Tq00OiYFygSM', '0AFZnXDUT5qbJboJMZ6zlp', '34tIBWjEV2F27FRz34HhHy',
               '3rheINeddqah49dElHlqbZ', '0gOdikUSCMDS90TypUuNbT', '4IZxal0c4bUnY5vn2yR580', '0rT9f5BSZrZ6DvzfBEckG0']
user_data = get_user_song_df(saved_songs)

# Find most similar user in train datset
start = time.perf_counter()

sim_user_id = get_sim_user(user_data) 

end = time.perf_counter()
print('Most similar user in trainset: ', sim_user_id, '\nTime_lapsed: ', end-start)


Most similar user in trainset:  ('0d60bef01b8a0b962f0c962212023cce2ee73ac8', 0.48559712229005425) 
Time_lapsed:  39.0654726177454


In [5]:
# Get top songs recommendations
start = time.perf_counter()

new_songs = get_new_songs(saved_songs)
top_songs, top_scores = generate_rec_songs(user_id=sim_user_id, top=20, pool=new_songs)
print(top_songs)
print(top_scores)

end = time.perf_counter()
print('Songs recommendation finished. \nTime_lapsed: ', end-start)

['3JpzqRcjxif901JZKAPLn6', '5RqKo9P4DLA0bVyY70cWkr', '2hXgZdNNnIOOfXmWDzF05n', '3ExouBnPEYYinN7pS3imZK', '53I7CviVHhNYEKth717RLG', '4T2KvPdG4BZ7Zmw2MpHTcy', '15F4r63NiDNsqFETgtGzoo', '3afeZ81GDy59tATovKyH6Z', '1LPVD9574cuaAmL70FfEqa', '7sKHpszyRFBIAT378e6Tkr', '38YgZVHPWOWsKrsCXz6JyP', '3jCLGbY1yiTKGyDztuHc2j', '2uGDqQsfkKgBH4IsTu23I3', '2mCoPoJKtIoUuqkvTsJs3Z', '5LbLX9SlqtEOnvaIZsiUqd', '4m2RLN7imvsaLL1ZUGfFFw', '6m3Rrv81DPaSPw4azvVUid', '4vqp9GaO7RVkinyrYY5W6R', '7xAoWHiFihACP9v2kJyMxm', '3xpSbo5aiJUhcD1dpWYQ3A']
[6.818022442936565, 6.778378636849805, 6.053737396578466, 5.8803146616925, 5.153653336473598, 5.1400221957066705, 5.007889239706664, 4.952775674861218, 4.869618630496209, 4.854786169747419, 4.801219443137677, 4.752101745377255, 4.72915737384187, 4.703328873902105, 4.689196121560688, 4.673797568152525, 4.657363878653149, 4.656706326677075, 4.620231044880466, 4.563310456443027]
Songs recommendation finished. 
Time_lapsed:  17.02789306640625
