SONG RECOMMENDER - WORKFLOW

In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import time
import pickle
from credentials import *
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))

In [2]:
songs = pd.read_csv('song_recommender.csv')
songs

FileNotFoundError: [Errno 2] No such file or directory: 'song_recommender.csv'

In [None]:
songs.isnull().sum()
songs = songs.dropna()
songs

In [None]:
# Change all string values in the dataframe to lowercase
songs[['Artist', 'Title']] = songs[['Artist', 'Title']].applymap(lambda s: s.lower() if type(s) == str else s)
songs

In [None]:
import pandas as pd

# assume df is your DataFrame
def search_song(title, artist):
    
    query = "tracks: "+title+" artist: "+artist

    try:
        result = sp.search(query, limit=1)
        song_id = result['tracks']['items'][0]['id']
    except:
        song_id = "not_found"

    return song_id 

def song_is_hot(df: pd.DataFrame, song_id:str):
    if (song_id in df['id'].values):
        return "Yes"
    else:
        return "No"

def get_audio_features(song_id):
    
    # Retrieve the audio features for each chunk
    audio_features = sp.audio_features(song_id)[0]
    
    audio_features_dict = { key: [audio_features[key]] for key in list(audio_features.keys())}
    
    df = pd.DataFrame(audio_features_dict)

    return df

def scale_audio_features(df: pd.DataFrame, filename="scaler.pickle"):
    
    try:
        with open(filename, "rb") as file:
            scaler = pickle.load(file)
    except:
        print("Scaler not found!!!")
    
    scaled_audio_features_np = scaler.transform(df[['energy','key','acousticness','instrumentalness','liveness','tempo','duration_ms']])

    scaled_audio_features_df = pd.DataFrame(scaled_audio_features_np, columns=['energy','key','acousticness','instrumentalness','liveness','tempo','duration_ms'])

    return scaled_audio_features_df


def get_user_song_cluster(scaled_audio_features_df: pd.DataFrame, filename="kmeans_12.pickle"):
    
    try:
        with open(filename, "rb") as file:
            model = pickle.load(file)
    except FileNotFound:
        print("Model not found!!!")
              
    user_song_cluster = model.predict(scaled_audio_features_df)[0]
              
    return user_song_cluster
    
def recommend_song(df):
    
    song_title = input("Enter the song title: ")
    artist_name = input("Enter the artist name: ")
        
    # Get user's song id
    song_id = search_song(song_title, artist_name)
        
    # Determine if the user's song is hot
    is_hot = song_is_hot(df, song_id)
        
    # Get the audio_features of user's song
    user_song_audio_features_df = get_audio_features([song_id])
    
    # Scale the user's song audio features
    user_song_scaled_audio_features_df = scale_audio_features(user_song_audio_features_df)
        
    # Determine the user's song cluster
    user_song_cluster = get_user_song_cluster(user_song_scaled_audio_features_df)

    if ( is_hot == "Yes" ): # Recommend another hot song from the the same cluster:
        subset = df[(df['set'] == "H") & (df['K12'] == user_song_cluster)]
        if len(subset) < 5:
            rec_song = df[(df['set'] == "H") & (df['K12'] == user_song_cluster) ].sample(len(subset))
        else:
            rec_song = df[(df['set'] == "H") & (df['K12'] == user_song_cluster) ].sample(5)
    else: # Recommend another not hot song from the same cluster
        subset = df[(df['set'] == "H") & (df['K12'] == user_song_cluster)]
        if len(subset) < 5:
            rec_song = df[(df['set'] == "N") &(df['K12'] == user_song_cluster)].sample(len(subset))
        else:
            rec_song = df[(df['set'] == "N") &(df['K12'] == user_song_cluster)].sample(5)
        
    rec_song['Listen to Your Songs Now'] = 'https://open.spotify.com/track/' + rec_song['id']
    
    print("We recommend you the following songs")
              
    with pd.option_context('display.max_colwidth', None):
        display(rec_song[['Title','Artist','Listen to Your Songs Now']])
              

def ask_user_for_another_song():

    answer = input("Do you want another recommendation? (yes/no): ")
    answer = answer.lower()
              
    while answer not in ["yes","no"]:
        print("Invalid input. Please enter 'yes' or 'no'.")
        print()
        answer = input("Do you want another recommendation? (yes/no): ")
        answer = answer.lower()
    
    return answer

            
def song_recommender(df):
              
    answer = "yes"
    while answer == "yes":
        recommend_song(df)
        answer = ask_user_for_another_song()


In [None]:
song_recommender(songs)