In [1]:
from credentials import *
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials
import numpy as np
import pandas as pd
from time import sleep
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=Client_ID,
                                                           client_secret=Client_secret))

In [204]:
def search_song(artist_name, track_title):
    search_query = f"artist:{artist_name} track:{track_title}"
    results = sp.search(q=search_query, type='track', limit=10)
    track_names_list = []
    artist_list = []
    album_name_list = []
    id_list = []
    if results['tracks']['items']:
        for track in results['tracks']['items']:
            track_name = track['name']
            artists = ", ".join([artist['name'] for artist in track['artists']])
            album_name = track['album']['name']
            track_uri = track['uri']
            track_id = track['id']
            track_href = track['href']
            track_names_list.append(track_name)
            artist_list.append(artists)
            album_name_list.append(album_name)
            id_list.append(track_id)
    else:
        print(f"No results found for '{track_title}' by '{artist_name}'.")
        raise Exception()
    results_df = pd.DataFrame({"Title": track_names_list, "Artist": artist_list, "Album": album_name_list, "ID": id_list})
    return results_df

In [7]:
search_song("Vasco Rossi", "Albachiara")

Unnamed: 0,Title,Artist,Album,ID
0,Albachiara - Remastered 2019,Vasco Rossi,Non siamo mica gli americani! 40° RPLAY Specia...,4P5Z3iEngFfaVe0qkv4Pdl
1,Albachiara,Vasco Rossi,Albachiara,53MYtv4tzLYg1hbu13Qbc0
2,Albachiara,Vasco Rossi,L'Alba di Vasco,5xBZz1dSUm1gMjl3UravF1
3,Albachiara - Live,Vasco Rossi,Vasco Modena Park,2dsMwXaVHJBCxT3Uwj37Yr
4,Albachiara,Vasco Rossi,Vasco Rossi,7MNfDKGSgISSb26uAzf4ZJ
5,Albachiara,Vasco Rossi,Il meglio della musica italiana anni '70,6BzHVaOQHOI6p5bkO2Kqkl
6,Albachiara - Live,Vasco Rossi,VASCO LIVE Roma Circo Massimo,429a0dLY37hZE578PSwtJD
7,"Albachiara - Live From Milan,Italy/1989",Vasco Rossi,Fronte Del Palco: Live,3EzwHOi0zBR6OlJwMy7DzH
8,Albachiara - Live,Vasco Rossi,"Va bene, va bene così (Live) (Original Master)",0P6R6jKTu0eix5GyQiTlLT
9,Albachiara - Live,Vasco Rossi,VASCO NONSTOP LIVE (Live),3TJ1OEbInhYuxfHfOyLM81


In [None]:
def songs_ids(df):
    '''
    This function gets a dataframe that contains song titles and artists as input,
    and returns a dataframe with a new column with the songs IDs obtained from Spotify.
    
    Inputs:
    df: pd.DataFrame
    
    Output:
    DataFrame with a new column containing the song ids.
    '''

    id_song = []

    for i in range(0, len(df), 50):
        chunk = df.iloc[i:i+50]

        for index, row in chunk.iterrows():
            title = row["Song"]
            artist = row["Artist"]
            query = " track: " + title + "artist: " + artist        

            try:
                results = sp.search(q=query, limit=1)
                song_id = results["tracks"]["items"][0]["id"]
                id_song.append(song_id)
                
            except:
                song_id = np.nan
                id_song.append(song_id)
                print(f"ID not found for {row['Song']} by {row['Artist']}")
                
        sleep(20)
        
    df['ids'] = id_song

    return df

In [None]:
songs_ids(billboard_top_100)

In [18]:
def get_audio_features(list_of_songs_ids):
    '''
    This function gets a list that contains song ids as input,
    and returns a dataframe with all the audio features of each song id. 
    
    Inputs:
    list_of_songs_ids: list
    
    Output:
    DataFrame with the audio features of each song id.
    '''
    
    df = pd.DataFrame()
    
    for song_id in list_of_songs_ids:
        my_dict = sp.audio_features(song_id)[0]
        my_dict_new = { key : [my_dict[key]] for key in list(my_dict.keys()) }
        df = pd.concat([df, pd.DataFrame(my_dict_new)], axis=0)
    
    return df

In [None]:
audio_features_df = get_audio_features(list(billboard_top_100['ids']))
audio_features_df = audio_features_df.reset_index(drop=True)
audio_features_df.head()

In [23]:
def add_audio_features(df, audio_features_df):
    '''
    This function gets two dataframes as input,
    and returns a concatenated dataframe. 
    
    Inputs:
    df: pd.DataFrame
    audio_features_df: pd.DataFrame
    
    Output:
    DataFrame with both dataframes concatenated.
    '''
    df_features = pd.concat([df, audio_features_df], axis=1)
    return df_features

In [None]:
billboard_top_100_features = add_audio_features(billboard_top_100,audio_features_df)
billboard_top_100_features

In [43]:
songs_clusters = pd.read_csv('songs_clusters.csv')
songs_clusters.head()

Unnamed: 0,Song,Artist,track_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,dataset,kmeans,dbscan
0,Vampire,Olivia Rodrigo,4sSKTQcX6pQIncWcNJdkiD,0.675,0.609,1,-7.905,1,0.094,0.284,0.268,0.112,0.057,119.973,Hot,5,0
1,Last Night,Morgan Wallen,59uQI0PADDKeE6UZDTJEe8,0.517,0.675,6,-5.382,1,0.0357,0.459,0.0,0.151,0.518,203.853,Hot,7,0
2,Fast Car,Luke Combs,1Lo0QY9cvc8sUB2vnIOxDT,0.712,0.603,8,-5.52,1,0.0262,0.186,0.0,0.115,0.67,97.994,Hot,2,0
3,Calm Down,Rema & Selena Gomez,0WtM2NBVQNNJLh6scP13H8,0.801,0.806,11,-5.206,1,0.0381,0.382,0.000669,0.114,0.802,106.999,Hot,2,0
4,Flowers,Miley Cyrus,3MnewZrZDqej6thgEx3OB1,0.672,0.159,9,-8.76,0,0.0343,0.962,1.8e-05,0.106,0.117,113.218,Hot,6,0


In [None]:
import pickle

def load_scaler():
    with open('./scaler.pickle', 'rb') as file:
        return pickle.load(file)

def load_kmean():
    with open('./kmeans_8.pickle', 'rb') as file:
        return pickle.load(file)

In [213]:
def hot_select_same_cluster(df: pd.DataFrame, user_song_cluster: int):
    df['Spotify Link'] = "https://open.spotify.com/track/" + df['track_id']
    selected_rows = df[(df['dataset'] == 'Hot') & (df['kmeans'] == user_song_cluster)]
    if (len(selected_rows) <5):
        selected_rows = selected_rows.sample(len(selected_rows))
        display(selected_rows[["Song","Artist","Spotify Link"]].style.hide(axis="index"))
    else:
        selected_rows = selected_rows.sample(5)
        display(selected_rows[["Song","Artist","Spotify Link"]].style.hide(axis="index"))

In [214]:
def not_hot_select_same_cluster(df,user_song_cluster: int):
    df['Spotify Link'] = "https://open.spotify.com/track/" + df['track_id']
    selected_rows = df[(df['dataset'] == 'Not Hot') & (df['kmeans'] == user_song_cluster)]
    if (len(selected_rows) <5):
        selected_rows = selected_rows.sample(len(selected_rows))
        display(selected_rows[["Song","Artist","Spotify Link"]].style.hide(axis="index"))
    else:
        selected_rows = selected_rows.sample(5)
        display(selected_rows[["Song","Artist","Spotify Link"]].style.hide(axis="index"))

In [220]:
def song_recommender():
    
    user_input = 'Yes'
    
    while user_input.lower()=='yes':
    
        user_input_song = input("Enter the song: ")
        user_input_artist = input("Enter the artist: ")
        
        try:
            search_song_df = search_song(user_input_artist, user_input_song)
        except:
            continue
        
        song_options = search_song_df.drop('ID',axis=1)
        
        print()
        print(song_options)
        print()
        
        user_select_song = float('inf')
        while (not(user_select_song >= 0 and user_select_song<len(song_options))):
            if user_select_song != float('inf'):
                print(f"{user_select_song} is not a valid number")
            user_select_song = int(input("Choose the song number: "))

        id_list = []

        song_id = search_song_df.iloc[user_select_song, search_song_df.columns.get_loc('ID')]

        id_list.append(song_id)

        user_audio_features = get_audio_features(id_list)

        song_row_df = search_song_df.iloc[user_select_song].to_frame().transpose().reset_index(drop=True)


        features_user_song = add_audio_features(song_row_df, user_audio_features)


        features_user_song.drop(['uri','track_href','type','id','duration_ms','time_signature','analysis_url'], axis=1, inplace=True)

        features_user_song.rename(columns={'ID':'track_id'},inplace=True)

        if (song_id in songs_clusters[songs_clusters["dataset"] == "Hot"]['track_id'].values.tolist()):
        #if songs_clusters.loc[songs_clusters['track_id'] == features_user_song['track_id'].iloc[0], 'dataset'].values == 'Hot':
            features_user_song['dataset'] = "Hot"
        else:
            features_user_song['dataset'] = "Not Hot"

        numerical = features_user_song.select_dtypes(include=np.number)

        scaler = load_scaler()
        kmeans_8 = load_kmean()

        user_song_audio_features_scaled_np = scaler.transform(numerical)
        user_song_audio_features_scaled_df = pd.DataFrame(user_song_audio_features_scaled_np, columns = numerical.columns)

        user_song_cluster = kmeans_8.predict(user_song_audio_features_scaled_df)[0]

        if features_user_song['dataset'].iloc[0] == 'Hot':
            hot_select_same_cluster(songs_clusters,user_song_cluster)
        else:
            not_hot_select_same_cluster(songs_clusters,user_song_cluster)
        
        print()
        
        user_input = ''
        while (user_input.lower() not in ['yes','no']):
            user_input = input('Do you want another recommendation? (yes/no):\n')
        
        print()

In [221]:
song_recommender()

Enter the song: vampire
Enter the artist: olivia rodrigo

     Title                            Artist    Album
0  vampire                    Olivia Rodrigo  vampire
1  vampire                    Olivia Rodrigo  vampire
2  Vampire  Olivia Eldredge, Kendall Rodrigo  Vampire

Choose the song number: 0


Song,Artist,Spotify Link
"Deceiver, Deceiver",Arch Enemy,https://open.spotify.com/track/54nsNmxvp1ZxWcbKTcJynp
Vale Vale,Alok;Zafrir,https://open.spotify.com/track/0sfgF2kM3yrZbkLmAbjSo6
100 Años,Guasones,https://open.spotify.com/track/7op5RCUvgqOJcKAs4rZw2N
Brothers Unite,Brothers of Metal,https://open.spotify.com/track/0Hg6cD0VepM1bnjl0lObQ3
Black Hole,The Browning,https://open.spotify.com/track/3ogfZo3hQDkU2oX99Od2Av



Do you want another recommendation? (yes/no):
no

