# Import

In [1]:
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import numpy as np
import re
import random
import spotify_helper_functions
import clustering_music
from difflib import get_close_matches

In [19]:
n = 5

### Functions that are in the module

In [2]:
def import_top_songs(path="top_songs.csv"):
    top_df = pd.read_csv(path, index_col=0)
    top_df = top_df.applymap(lambda x: x.lower())
    
    return top_df

def import_spotify_df(path="spotify_songs.csv"):
    df = pd.read_csv(path, index_col=0)
    df["song_name"] = df["song_name"].apply(lambda x: x.lower())
    df["artist_name"] = df["artist_name"].apply(lambda x: x.lower())
    return df


def is_similar(user_input, name_series):
    """
    Finds if a name is similar to a pandas series that is given as input. It uses a built in algorithm that
    checks the similarity of sequences, through the Function get_close_matches.
    Input: the name (song or artist) we would like to search for, and the series.
    Output: a list with the name or names that is similar or equal, if it finds it; empty list if there are no close matches.
    """
    #We extract the list of names from the series which we get as an input, and make them lowecase. 
    #We also remove special characters using the translate string method
    name_list = name_series.to_list()


    # The function will return the closest matches in the list, or an empty list if there are none
    close_matches = get_close_matches(user_input, name_list, n=3, cutoff=0.90)

    return close_matches

def is_top_song(user_input, df):

    song_series = df["songs"]

    return is_similar(user_input, song_series)


def is_spotify_song(user_input, df):

    song_series = df["song_name"]

    matches = is_similar(user_input, song_series)

    if matches == []:
        return []
    else:
        list_ids = []
        match_series = df.loc[df["song_name"] == matches[0], "song_id"]
        for song_id in match_series:
            list_ids.append(song_id)
        return list_ids

def choice(options, df, sp, names_or_ids="ids"):
    print("Select one of the following choices:")
    
    if names_or_ids == "names":
        for index, item in enumerate(options):
            print(f"Option {index + 1} - {item}")
        while True:
            try:
                choice = int(input("Selection: "))
                if choice >= 1 and choice <= len(options):
                    return options[choice-1]
            except:
                continue
    else:
        #Second case, list with song_ids. We return the song_id

        for index, song_id in enumerate(options):
            song_name, artist_name = spotify_helper_functions.get_song_info(song_id, sp)
            print(f"Option {index + 1} - {song_name} by {artist_name}")
        while True:
                try:
                    choice = int(input("Selection: "))
                    if choice >= 1 and choice <= len(options):
                        return options[choice-1]
                except:
                    continue



def recommend_top_song(song_name, df):
    while True:  # Loop so we don't recommend the same song the user inputs.
        random_row = random.choice(range(len(df)))
        random_song = df.iloc[random_row, 0]
        random_artist = df.iloc[random_row, 1]

        if random_song != song_name:
            break

    print(f"TOP recommendation! A similar song to {song_name.capitalize()} that you might \
like is {random_song.capitalize()}, by {random_artist.capitalize()}.")

    return {"song_name":random_song, "artist_name":random_artist}


def recommend_spotify_song(song_id, df, model, sp_connection):

    modeling_df = df.drop(columns=["song_name", "artist_name", "artist_id", "song_id"])
    clusters = model.predict(modeling_df)

    data = {"song_name": df["song_name"], "song_id": df["song_id"], "artist_name": df["artist_name"],
     "artist_id": df["artist_id"], "cluster": clusters}

    names_ids_df = pd.DataFrame(data=data)

    if song_id in names_ids_df["song_id"]:
        recommendation_cluster = names_ids_df.loc[names_ids_df["song_id"] == song_id, "cluster"].unique()[0]
    else:
        attributes = spotify_helper_functions.get_songs_attributes(song_id, sp_connection)
        row = pd.DataFrame(data=attributes, index=modeling_df.columns)
        recommendation_cluster = model.predict(row)[0]

    mask = names_ids_df["cluster"] == recommendation_cluster
    recommendation_df = names_ids_df.loc[mask, ["song_name", "artist_name", "artist_id", "song_id", "cluster"]]

    random_row = random.choice(range(len(recommendation_df)))

    song_rec_name = names_ids_df.iloc[random_row, 0].capitalize()
    song_rec_artist = names_ids_df.iloc[random_row, 2].capitalize()
    song_rec_id = names_ids_df.iloc[random_row, 1]

    print(f"Spotify recommendation! A song you might like is {song_rec_name}, by {song_rec_artist}! ")
    
    return song_rec_id, song_rec_name, song_rec_artist



# Program flow

1. Creating the Spotify connection

In [3]:
sp = spotify_helper_functions.spotify_connection()

2. Loading both datasets: for top songs and spotify songs

In [4]:
top_df = import_top_songs(path="top_songs.csv")
spotify_df = import_spotify_df(path="spotify_songs.csv")

In [5]:
top_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 171 entries, 0 to 170
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   songs    171 non-null    object
 1   artists  171 non-null    object
dtypes: object(2)
memory usage: 4.0+ KB


In [6]:
spotify_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 116107 entries, 0 to 137510
Data columns (total 15 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   song_name         116107 non-null  object 
 1   song_id           116107 non-null  object 
 2   artist_name       116107 non-null  object 
 3   artist_id         116107 non-null  object 
 4   danceability      116107 non-null  float64
 5   energy            116107 non-null  float64
 6   key               116107 non-null  int64  
 7   loudness          116107 non-null  float64
 8   mode              116107 non-null  int64  
 9   speechiness       116107 non-null  float64
 10  acousticness      116107 non-null  float64
 11  instrumentalness  116107 non-null  float64
 12  liveness          116107 non-null  float64
 13  valence           116107 non-null  float64
 14  tempo             116107 non-null  float64
dtypes: float64(9), int64(2), object(4)
memory usage: 14.2+ MB


3. Load the clustering model

In [7]:
model = clustering_music.load_model(path="music_model.pkl")

4. Get the user input

In [11]:
user_input = input("Please insert the name of the song that you like: ").lower()

Please insert the name of the song that you like: hurricane


5. Check if it is among the top songs database

In [12]:
top_song = is_top_song(user_input, top_df)  #This value will be [] if there are no similar songs

if top_song != []:
    if len(top_song) == 1:
        top_recommended = recommend_top_song(top_song[0], top_df)
    else:
        song_choice = choice(top_song, spotify_df, sp, names_or_ids="names")
        top_recommended = recommend_top_song(song_choice, top_df)

TOP recommendation! A similar song to Hurricane that you might like is Don't go yet, by Camila cabello.


6. If it is not, search in the Spotify songs dataframe

In [13]:
spoti_song = is_spotify_song(user_input, spotify_df)
print(spoti_song)

['0RbW8kWozrVSIGb4V13o0o', '6xHI9KjUjYT0FPtGO8Mxa1', '3KvyWiReIKqe3xJFL2qTPX', '7gQ5c5HY6zMsIck6QLY9dJ', '3s5NrHgyTkuinl0TgHCHeK', '7g0W6gCE9pHlnog8WLxr4a', '36GbZPJbjuxw4BahR1gPdM', '1tLp9KH0j1HoKdS1vnvkxO', '1pY0xt2c2NCnkEYbuWQjUG', '5XM0sOAUSCb4P7Q6G1w3jf', '3JoJjPXHPHvLXQRUiNbKfa', '2GFwwTIVLjnOrtP7m9luHC', '0fY2YiLKlxpcM3IghwmgeO']


### Possible cases:

- No matches in the database: we search possible tracks through the Spotify API. If there are any possibilities, we ask the user to make a choice. If not, we tell them there are no matches.

In [15]:
if spoti_song == []:
    print("No matches found in our songs database! Searching Spotify for options.")
    possible_tracks = spotify_helper_functions.find_possible_songs(user_input,sp)
    if possible_tracks:
        song_choice_id = choice(list(possible_tracks.values()), spotify_df, sp)
        for _ in range(n):
            spoti_recommended = recommend_spotify_song(song_choice_id, spotify_df, model, sp)
    else:
        print("Sorry, we didn't find any matches in Spotify")

- If there is only one match in the database, we present the recommendations directly based on that one song.

In [23]:
if len(spoti_song) == 1:
    song_id = spoti_song[0]
    song_info = spotify_helper_functions.get_song_info(song_id, sp)
    print(f"One match found in our songs database! {song_info[0].capitalize()}, by {song_info[1].capitalize()}")
    for _ in range(n):
        spoti_recommended = recommend_spotify_song(spoti_song[0], spotify_df, model, sp)


- If there is more than one match found in our database, we present the user with the options and then make the recommendation based on that.

In [20]:
if len(spoti_song) > 1:
    print("Several matches found in our songs database!")
    song_choice_id = choice(spoti_song, spotify_df, sp)
    for _ in range(n):
        spoti_recommended = recommend_spotify_song(song_choice_id, spotify_df, model, sp)


Several matches found in our songs database!
Select one of the following choices:
Option 1 - Hurricane by Kanye West
Option 2 - Hurricane by Luke Combs
Option 3 - Hurricane by Lisa Loeb & Nine Stories
Option 4 - Hurricane by Bridgit Mendler
Option 5 - Hurricane by The Band Of Heathens
Option 6 - Hurricane by Ella Fence
Option 7 - Hurricane by Wafia
Option 8 - Hurricane by Fleurie
Option 9 - Hurricane by Sidney Charles
Option 10 - Hurricane by The Choir
Option 11 - Hurricane by Buried In Verona
Option 12 - Hurricane by I Prevail
Option 13 - Hurricane by The Asteroids Galaxy Tour
Selection: 8
Spotify recommendation! A song you might like is Sunday morning, by Nico! 
Spotify recommendation! A song you might like is I want to see the bright lights tonight, by Raissa! 
Spotify recommendation! A song you might like is Cant help falling in love with you, by Eminemmylou! 
Spotify recommendation! A song you might like is Burn rubber again - remix, by Azchike! 
Spotify recommendation! A song you