## Music Recommendation
* A recommender system is a software tool and algorithm that gives recommendations for items
- that are most interesting to a user.

##  TYPES OF RECOMMENDATION SYSTEMS
* Basically, we have two types of recommendation systems
(1.) content-based filtering.
(2.) collaborative-based filtering.

*collaborative filtering: this is the process of predicting the interests of a user by - identifying preferences and information frommany users.

*Content-based filtering: Content-based filtering gives recommendations based on the
user’s preference and profile.

###                       DATASET
*For the purpose of our music recommendation algorithm, we acquired a kaggle Dataset that
contains around 600,000 songs from Spotify

###                     ALGORITHM
* Though we have three main Algorithm implemented in this project which are
1. K-Clustering
2. Birch-Clustering
3. DBSCAN-Clustering.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns
import spotipy
import os

import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials

from collections import defaultdict
from scipy.spatial.distance import cdist
import difflib

from sklearn.cluster import KMeans
from sklearn.cluster import Birch
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

client_id = "4813df40c194459085aed9c3ce7add62"
client_secret = "a30fdf36eca04b9186c75432d4ae27f3"

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id, client_secret))

%matplotlib inline

### Reading data and normalization

In [None]:
spotify_data = pd.read_csv('./data/kaggle_spotify2/tracks.csv')
spotify_data = spotify_data.replace({'year': '[0-9]{2}/[0-9]{2}/'}, {'year': ''}, regex=True)
spotify_data = spotify_data.replace({'year': '-[0-9]{2}'}, {'year': ''}, regex=True)
spotify_data['year'] = spotify_data['year'].astype(int)

display(spotify_data.head())

# save min and max for later normalization
year_min = spotify_data["year"].min()
year_max = spotify_data["year"].max()
popularity_min = spotify_data["popularity"].min()
popularity_max = spotify_data["popularity"].max()
tempo_min = spotify_data["tempo"].min()
tempo_max = spotify_data["tempo"].max()
duration_ms_min = spotify_data["duration_ms"].min()
duration_ms_max = spotify_data["duration_ms"].max()
key_min = spotify_data["key"].min()
key_max = spotify_data["key"].max()
mode_min = spotify_data["mode"].min()
mode_max = spotify_data["mode"].max()
loudness_min = spotify_data["loudness"].min()
loudness_max = spotify_data["loudness"].max()

# normalize
# spotify_data["year"] = (spotify_data["year"]-spotify_data["year"].min())/(spotify_data["year"].max()-spotify_data["year"].min())
spotify_data["popularity"] = (spotify_data["popularity"]-spotify_data["popularity"].min())/(spotify_data["popularity"].max()-spotify_data["popularity"].min())
spotify_data["tempo"] = (spotify_data["tempo"]-spotify_data["tempo"].min())/(spotify_data["tempo"].max()-spotify_data["tempo"].min())
spotify_data["duration_ms"] = (spotify_data["duration_ms"]-spotify_data["duration_ms"].min())/(spotify_data["duration_ms"].max()-spotify_data["duration_ms"].min())
spotify_data["key"] = (spotify_data["key"]-spotify_data["key"].min())/(spotify_data["key"].max()-spotify_data["key"].min())
spotify_data["mode"] = (spotify_data["mode"]-spotify_data["mode"].min())/(spotify_data["mode"].max()-spotify_data["mode"].min())
spotify_data["loudness"] = (spotify_data["loudness"]-spotify_data["loudness"].min())/(spotify_data["loudness"].max()-spotify_data["loudness"].min())

display(spotify_data.head())

### Determine the columns we are going to use

In [None]:
used_params = ['valence', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']

### Find song data on spotify and normalize it

In [None]:
def find_song_and_normalize(name, year=0, override_year=False):
    song_data = defaultdict()
    if year <= 1:
        results = sp.search(q= 'track: {}'.format(name), limit=1)
    else:
        results = sp.search(q= 'track: {} year: {}'.format(name, year), limit=1)
    if results['tracks']['items'] == []:
        return None
    
    results = results['tracks']['items'][0]

    track_id = results['id']
    audio_features = sp.audio_features(track_id)[0]
    
    song_data['name'] = [name]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['duration_ms'] = [results['duration_ms']]
    song_data['popularity'] = [results['popularity']]
    song_data['artists'] = [results['artists'][0]['name']]
    song_data['year'] = [year]
        
    for key, value in audio_features.items():
        song_data[key] = value
        
    # normalize
    normalize_song(song_data)
        
    if override_year:
        song_data['year'] = [year]
    
    return pd.DataFrame(song_data)


def find_songs_and_normalize(songs, override_year=False):
    result = []
    frames = []
    for s in songs:
        if "year" not in s:
            s["year"] = 0
        frame = find_song_and_normalize(s["name"], s["year"], override_year=override_year)
        frames.append(frame)
    try:
        result = pd.concat(frames)
        return result
    except:
        return None

def normalize_song(song_data):
    # song_data["year"] = (song_data["year"]-year_min)/(year_max-year_min)
    song_data["popularity"] = (song_data["popularity"]-popularity_min)/(popularity_max-popularity_min)
    song_data["tempo"] = (song_data["tempo"]-tempo_min)/(tempo_max-tempo_min)
    song_data["duration_ms"] = (song_data["duration_ms"]-duration_ms_min)/(duration_ms_max-duration_ms_min)
    song_data["key"] = (song_data["key"]-key_min)/(key_max-key_min)
    song_data["mode"] = (song_data["mode"]-mode_min)/(mode_max-mode_min)
    song_data["loudness"] = (song_data["loudness"]-loudness_min)/(loudness_max-loudness_min)
    
    return song_data

### Example using spotipy find song function

In [None]:
# {'name': 'I\'ll Be Waiting', 'year': 2011},
a = "Tessie"
song = find_song_and_normalize(a, year=2004)
display(song)

### Recommend songs functions

In [None]:
# Gets the song data for a specific song. The song argument takes the form of a dictionary with 
# key-value pairs for the name and release year of the song.
def get_song_data(song, spotify_data):
    if 'year' not in song:
        song['year'] = 0
        
    try:
        song_data = spotify_data[(spotify_data['name'] == song['name']) 
                                & (spotify_data['year'] == song['year'])].iloc[0]
        return song_data
    
    except IndexError:
        return find_song_and_normalize(song['name'], song['year'])
        

# Gets the mean vector for a list of songs.
def get_mean_vector(song_list_data):
    song_vectors = []
    for song in song_list_data.iterrows():
        song_vector = song[1][used_params].values
        song_vectors.append(song_vector)  
    
    song_matrix = np.array(list(song_vectors))
    return np.mean(song_matrix, axis=0)

# Utility function for flattening a list of dictionaries.
def flatten_dict_list(dict_list):
    flattened_dict = defaultdict()
    for key in dict_list[0].keys():
        flattened_dict[key] = []
    
    for dictionary in dict_list:
        for key, value in dictionary.items():
            flattened_dict[key].append(value)
            
    return flattened_dict
       
    
# Recommends songs based on a list of previous songs that a user has listened to.
def recommend_songs(song_list, spotify_data, pipeline, n_songs=10):
    
    metadata_cols = ['name', 'year', 'artists']
    song_dict = flatten_dict_list(song_list)
    
    song_list_data = pd.DataFrame()
    for song in song_list:
        song_data = get_song_data(song, spotify_data)
        song_list_data = song_list_data.append(song_data)
    
    song_list_data = song_list_data[used_params]
    song_center = get_mean_vector(song_list_data)
    
    scaler = pipeline.steps[0][1]
    scaled_data = scaler.transform(spotify_data[used_params])
    scaled_song_center = scaler.transform(song_center.reshape(1, -1))
    
    distances = cdist(scaled_song_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_songs][0])
    
    rec_songs = spotify_data.iloc[index]
    rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
    return rec_songs[metadata_cols].to_dict(orient='records')

### Specify cluster pipeline and fit it for the scaler

In [None]:
song_cluster_pipeline_global = Pipeline([('scaler', StandardScaler()), 
                                  ('kmeans', KMeans(n_clusters=1))])
X = spotify_data[used_params]
song_cluster_pipeline_global.fit(X)
scaler = song_cluster_pipeline_global.steps[0][1]

### Get spotify recommendation functions

In [None]:
#extract needed data from list of recomended spotify songs
def spotifyRecomend_extractData(tracks):
    res=[]    
    for track in tracks['tracks']:   
        trck={
            'name': track['name'],
            'artists': track['artists'][0]['name']
            #'release_date': track['release_date'],
            #'id': track['id']                              
        }
        res.append(trck)
    #res2=pd.DataFrame.from_dict(res)    
    return res


#Number of songs which we will send to spotify recomondation function
NUM_SpotifySng= 5
def getSpotifyRecomendedSongs(listSongs, numOfRecc=3): #Input need to be DataFrame    
    
    #spotify_recomondation=[]
    #display(listSongs)
    #allRecomended=pd.DataFrame()
    allRecomended=[]
    # spotify function accepts max 5 songs, so we go through the list of songs and we divide them into groups ie we will 
    # first get recommended songs for the first 5 songs, then for the second 5 etc, and in the end for the rest of them (can be only 2 left)
    while(len(listSongs)>0):        
        if(len(listSongs)>NUM_SpotifySng):
            first5Songs=listSongs.head(NUM_SpotifySng)
            N = NUM_SpotifySng
            listSongs = listSongs.tail(listSongs.shape[0] - N)
        else:
            first5Songs = listSongs
            listSongs = listSongs.iloc[0:0]                        
        
        first5_ids = first5Songs['id'].values.tolist()    
        spRc = sp.recommendations(seed_tracks=first5_ids, limit=numOfRecc)
        recomendedNow = spotifyRecomend_extractData(spRc)
        #allRecomended = pd.concat([allRecomended, recomendedNow], axis=0, ignore_index=True) 
        allRecomended += recomendedNow
    return allRecomended

### Recommend songs and vector calculation

#### 1) close to average vector recommendation

In [None]:
song_list = [{'name': 'Come As You Are', 'year':1991},
            {'name': 'Smells Like Teen Spirit', 'year': 1991},
            {'name': 'Lithium', 'year': 1992},
            {'name': 'All Apologies', 'year': 1993},
            {'name': 'Stay Away', 'year': 1993}]
input_songs_data = find_songs_and_normalize(song_list, override_year=True)
rec_songs = recommend_songs(song_list, spotify_data, song_cluster_pipeline_global, 5)
display(rec_songs)
rec_songs_data = find_songs_and_normalize(rec_songs, override_year=True)
display(rec_songs_data[used_params])
rec_song_mean = get_mean_vector(rec_songs_data)

fig = px.bar(input_songs_data, x='name', y=['valence', 'energy', 'danceability', 'acousticness', 'liveness'], barmode='group')
fig.show()

fig = px.bar(rec_songs_data, x='name', y=['valence', 'energy', 'danceability', 'acousticness', 'liveness'], barmode='group')
fig.show()

#### 2) Spotify

### Example using spotipy recommendation

In [None]:
song_list_data = pd.DataFrame()
song_data = get_song_data({"name": "Euphoria", "year": 2012}, spotify_data)
song_list_data = song_list_data.append(song_data)
songs = getSpotifyRecomendedSongs(song_list_data)
display(songs)

#### get spotify recommendations and calculate average vector distance n times

In [None]:
song_list_data = pd.DataFrame()
for song in song_list:
    song_data = get_song_data(song, spotify_data)
    song_list_data = song_list_data.append(song_data)
vec_sum = 0
for i in range(10):
    recomenddet_spot = getSpotifyRecomendedSongs(song_list_data, numOfRecc=5)
    #display(recomenddet_spot)
    recomenddet_spot_song_data = find_songs_and_normalize(recomenddet_spot)
    #display(recomenddet_spot_song_data[used_params])
    rec_song_spotify_mean = get_mean_vector(recomenddet_spot_song_data)
    
    # Calculate vector distance
    vec_dist = np.linalg.norm(rec_song_mean - rec_song_spotify_mean)
    print(vec_dist)
    vec_sum += vec_dist
    
vec_avg = vec_sum / 10
print("avg vec: " + str(vec_avg))

#### 3) Get average vector distance if we take 3 random songs from the data base

In [None]:
# get random songs from data base
avg_dist = 0
for i in range(100):
    samp1 = spotify_data.sample(n = 3)
    samp2 = spotify_data.sample(n = 3)

    mean1 = get_mean_vector(samp1)
    mean2 = get_mean_vector(samp2)

    vec_dist = np.linalg.norm(mean1 - mean2)
    avg_dist += vec_dist
print(avg_dist / 100)

#### 4) Test recommendet songs subjectively

In [None]:
display(pd.DataFrame(rec_songs))

### Recommendation on list with songs from different genres

In [None]:
song_list = [{'name': 'Come As You Are', 'year':1991},
            {'name': 'Smells Like Teen Spirit', 'year': 1991},
            {'name': 'Lithium', 'year': 1992},
            {'name': 'Toxicity', 'year':2001},
            {'name': 'Forest', 'year': 2001},
            {'name': 'B.Y.O.B.', 'year': 2005},
            {'name': 'Symphony No. 40 in G minor', 'year': 0},
            {'name': 'Piano Concerto No. 21', 'year': 0},
            {'name': 'Rondo for Piano in D Major, K. 485', 'year': 0},
            {'name': 'Euphoria', 'year': 2012},
            {'name': 'Hello', 'year': 2015},
            {'name': 'Someone like you', 'year': 2011}]

song_list = [{'name': 'Come As You Are', 'year':1991},
            {'name': 'Smells Like Teen Spirit', 'year': 1991},
            {'name': 'Lithium', 'year': 1991},
            {'name': 'In Bloom', 'year': 1991},
            {'name': 'Breed', 'year': 1991},
            {'name': 'Polly', 'year': 1991},
            {'name': 'Drain You', 'year': 1991},
            {'name': 'Stay Away', 'year': 1991},
            {'name': 'Something In The Way', 'year': 1991},
            {'name': 'Territorial Pissings', 'year': 1991},
            {'name': 'Lithium', 'year': 1991},

            {'name': 'Toxicity', 'year':2001},
            {'name': 'Forest', 'year': 2001},
            {'name': 'Prison Song', 'year': 2001},
            {'name': 'Deer dance', 'year': 2001},
            {'name': 'X', 'year': 2001},
            {'name': 'Needles', 'year': 2001},
            {'name': 'Jet Pilot', 'year': 2001},
            {'name': 'Chop Suey!', 'year': 2001},
            {'name': 'Science', 'year': 2001},
            {'name': 'Aerials', 'year': 2001},
            {'name': 'Psycho', 'year': 2001},
            {'name': 'B.Y.O.B.', 'year': 2005},

            {'name': 'Symphony No. 40 in G minor', 'year': 0},
            {'name': 'Piano Concerto No. 21', 'year': 0},
            {'name': 'Rondo for Piano in D Major, K. 485', 'year': 0},

            {'name': 'Someone like you', 'year': 2011},
            {'name': 'Rolling in the Deep', 'year': 2011},
            {'name': 'Rumour Has It', 'year': 2011},
            {'name': 'Turning Tables', 'year': 2011},
            #{'name': 'Don\'t You Remember', 'year': 2011},
            {'name': 'Set Fire to the Rain', 'year': 2011},
            {'name': 'He Won\'t Go', 'year': 2011},
            {'name': 'Lovesong', 'year': 2011},
            {'name': 'Take It All', 'year': 2011},
            #{'name': 'I\'ll Be Waiting', 'year': 2011},
            {'name': 'Hello', 'year': 2015},

            {'name': 'Promises', 'year': 2011},
            {'name': '2808', 'year': 2011},
            {'name': 'Doomsday', 'year': 2011},
            #{'name': 'My Eyes', 'year': 2011},
            #{'name': 'Guilt', 'year': 2011},
            {'name': 'Fugue State', 'year': 2011},
            #{'name': 'Me and You', 'year': 2011},
            {'name': 'Innocence', 'year': 2011},
            {'name': 'Crush on You', 'year': 2011},
            {'name': 'Must Be the Feeling', 'year': 2011},
            {'name': 'Reaching Out', 'year': 2011},
            #{'name': 'Departure', 'year': 2011},

            {'name': 'Without Me', 'year': 2002},
            #{'name': 'White America', 'year': 2002},
            {'name': 'Business', 'year': 2002},
            {'name': 'Square Dance', 'year': 2002},
            #{'name': 'Soldier', 'year': 2002},
            {'name': 'Say Goodbye Hollywood', 'year': 2002},
            {'name': 'Sing for the Moment', 'year': 2002},
            {'name': 'Superman', 'year': 2002},
            {'name': 'Hailie\'s Song', 'year': 2002},
            {'name': 'When the Music Stops', 'year': 2002},
            {'name': 'Say What You Say', 'year': 2002},
            {'name': 'Till I Collapse', 'year': 2002},

            {'name': 'Someone You Loved', 'year': 2018},
            {'name': 'Grace', 'year': 2019},
            {'name': 'Bruises', 'year': 2017},
            {'name': 'Hold Me While You Wait', 'year': 2019},
            {'name': 'Lost on You', 'year': 2019},
            #{'name': 'Fade', 'year': 2019},

            {'name': 'No One', 'year': 2007},
            {'name': 'Go Ahead', 'year': 2007},
            {'name': 'Superwoman', 'year': 2007},
            {'name': 'Like You\'ll Never See Me Again', 'year': 2007},
            {'name': 'Lesson Learned', 'year': 2007},
            {'name': 'Wreckless Love', 'year': 2007},
            {'name': 'The Thing About Love', 'year': 2007},
            {'name': 'Teenage Love Affair', 'year': 2007},
            #{'name': 'I Need You', 'year': 2007},
            {'name': 'Where Do We Go from Here', 'year': 2007},

            #{'name': 'I\'m Shipping Up to Boston', 'year': 2006},
            {'name': 'Tessie', 'year': 2004},
            {'name': 'The Auld Triangle', 'year': 2005},
            {'name': 'The Green Fields of France (No Man\'s Land)', 'year': 2005},
            {'name': 'Your Spirit\'s Alive', 'year': 2005},

            {'name': 'Shout', 'year': 1985},
            {'name': 'The Working Hour', 'year': 1985},
            {'name': 'Everybody Wants to Rule the World', 'year': 1985},
            {'name': 'Mothers Talk', 'year': 1985},
            {'name': 'I Believe', 'year': 1985},
            #{'name': 'Broken', 'year': 1985},
            {'name': 'Head over Heels', 'year': 1985},
            #{'name': 'Listen', 'year': 1985},

            {'name': 'Euphoria', 'year': 2012}]

song_list_data = find_songs_and_normalize(song_list)

#### 1) Obtain artist and album genres function

In [None]:
def album_artist_genres(name, year=0):
    result = ""
    if year == 0:
        result = sp.search(q= 'track: {}'.format(name))
    else:
        result = sp.search(q= 'track: {}, year: {}'.format(name, year))
        
    # print("artist:", result['tracks']['items'][0]['artists'][0]["name"])

    result = sp.search(q= "artist: {}".format(result['tracks']['items'][0]['artists'][0]["name"]))
    track = result['tracks']['items'][0]
    
    album = sp.album(track["album"]["external_urls"]["spotify"])

    artist = sp.artist(track["artists"][0]["external_urls"]["spotify"])
    
    if len(album["genres"]) > 0:
        return album["genres"]
    else:
        return artist["genres"]

#### 2) Determine the number of clusters by grouping songs in genres

In [None]:
clusters = []
for s in song_list:
    print(s["name"], s["year"])
    genres = album_artist_genres(s["name"], s["year"])
    cluster_found = 0
    for idx, c in enumerate(clusters):
        intersection_set = set.intersection(set(genres), set(c))
        
        # check if some cluster has similar genres
        if len(intersection_set) >= len(genres) / 4 or len(intersection_set) > len(c):
            # merge lists
            clusters[idx].extend(list(set(genres) - set(c)))
            cluster_found = 1
            
    if cluster_found == 0:
        # add new list
        clusters.append(genres)
            
clusters_num = len(clusters)
print("Clustered genres")
display(clusters)
print("number of clusters:", clusters_num)

#### 3) Recommend songs for each cluster function

In [None]:
def recommend_clusters(song_list_data, clusters_num, pipeline):
    allRecomendetSongs=[]
    vec_sum = 0
    for i in range(clusters_num):
        print("### Cluster " + str(i + 1) + " ###")
        display(song_list_data.loc[song_list_data['cluster_label'] == i])
        frame = song_list_data.loc[song_list_data['cluster_label'] == i]
        song_list_cluster = []
        for n,y in zip(frame["name"], frame["year"]):
            song_list_cluster.append({"name": n, "year": y})
            
        rec_songs = recommend_songs(song_list_cluster, spotify_data, pipeline, 10)
        rec_songs_data = find_songs_and_normalize(rec_songs, override_year=True)
        # sort by popularity and get the most popular
        rec_songs_data = rec_songs_data.sort_values(by=['popularity'], ascending=False)
        rec_songs_data = rec_songs_data.head(5)
        display(rec_songs_data[["name", "artists"]])
        if rec_songs_data is None:
            print("Songs not found")
            continue
        rec_song_mean = get_mean_vector(rec_songs_data)
        
        song_list_spotify_data = pd.DataFrame()
        for song in song_list_cluster:
            song_data = get_song_data(song, spotify_data)
            song_list_spotify_data = song_list_spotify_data.append(song_data)
        all_spotify_rec = []
        for i in range(10):
            recomenddet_spot = getSpotifyRecomendedSongs(song_list_spotify_data, numOfRecc=10)
            all_spotify_rec += recomenddet_spot
        recomenddet_spot_song_data = find_songs_and_normalize(all_spotify_rec)
        rec_song_spotify_mean = get_mean_vector(recomenddet_spot_song_data)
        
        vec_dist = np.linalg.norm(rec_song_mean - rec_song_spotify_mean)
        print(vec_dist)
        vec_sum += vec_dist

        # allRecomendetSongs += rec_songs
        print("--------------------")
    vec_avg = vec_sum / clusters_num
    print("Average vector: " + str(vec_avg))
    return allRecomendetSongs

#### 4) k-clustering

In [None]:
def K_clustering(input_songs):

    song_cluster_pipeline = Pipeline([('scaler', StandardScaler()), 
                                      ('kmeans', KMeans(n_clusters=clusters_num))])
    X = spotify_data[used_params]
    X2 = song_list_data[used_params]
    song_cluster_pipeline.fit(X)
    song_cluster_pipeline.fit(X2)
    song_cluster_labels = song_cluster_pipeline.predict(X2)
    song_list_data['cluster_label'] = song_cluster_labels

    k_recomSongs = recommend_clusters(song_list_data, clusters_num, song_cluster_pipeline)
    return k_recomSongs

resK = K_clustering(song_list)
table_birch = pd.DataFrame.from_dict(resK)
display(table_birch)

### Birch clustering

In [None]:
song_list_data = find_songs_and_normalize(song_list)
print("number of clusters:", clusters_num)

In [None]:
def Birch_clustering(input_songs):
    
    model = Birch(threshold=0.01, n_clusters=clusters_num)
    X = spotify_data[used_params]
    X2 = song_list_data[used_params]
    #model.fit(X)
    model.fit(X2)
    song_cluster_labels = model.predict(X2)
    song_list_data['cluster_label'] = song_cluster_labels
    
    # print(song_list_data[['name', 'cluster_label']])

    k_recomSongs = recommend_clusters(song_list_data, clusters_num, song_cluster_pipeline_global)
    return k_recomSongs

resB = Birch_clustering(song_list)
table_birch = pd.DataFrame.from_dict(resB)
display(table_birch)

### DBSCAN clustering

In [None]:
song_list_data = find_songs_and_normalize(song_list)
print("number of clusters:", clusters_num)

In [None]:
def DBSCAN_clustering(input_songs):
    
    model = DBSCAN(eps=0.7, min_samples=2)
    X = spotify_data[used_params]
    X2 = song_list_data[used_params]
    # model.fit(X)
    song_cluster_labels = model.fit_predict(X2)
    song_list_data['cluster_label'] = song_cluster_labels
    song_list_data['cluster_label'] = song_list_data['cluster_label'] + 1

    cluster_num = song_list_data['cluster_label'].max() + 1

    k_recomSongs = recommend_clusters(song_list_data, cluster_num, song_cluster_pipeline_global)
    return k_recomSongs

resDBS = DBSCAN_clustering(song_list)
table_DBSCAN = pd.DataFrame.from_dict(resDBS)
display(table_DBSCAN)