### Initialization
![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

In [1]:
import time
import json
import pprint
import config
import base64
import spotipy
import requests
import datetime
import seaborn as sns
import pandas as pd
from functools import lru_cache
from bs4 import BeautifulSoup
from urllib.parse import urlencode
from spotipy.oauth2 import SpotifyClientCredentials

from IPython.display import IFrame

import pandas as pd
import numpy as np
from plotly import express as px
from sklearn.cluster import KMeans
from sklearn.preprocessing import OneHotEncoder
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from matplotlib import pyplot as plt

import pickle

import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)

pd.set_option('display.max_columns', None)

In [2]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.clientId,
                                                           client_secret= config.clientSecret))

In [3]:
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username, playlist_id)
    tracks = results["items"]
    while results["next"]:
        results = sp.next(results)
        tracks.extend(results["items"])
    return tracks

#### Functions

In [4]:
# selective call to function
class dataProfiling:

    def moreInfo(self, data):    
        # more data info
        print(f"MORE DATA INFO :")
        print("-" * 109)

        print(f"Data shape : {data.shape}\n")

        # counts no. total values in the data
        print(f"No. of values in the dataset : {data.size:,}")

        # counts no. of rows
        print(f"Total rows in the dataset : {len(data):,}")

        # counts no. of columns
        print(f"Total columns in the dataset : {len(data.columns):,}")

        # counts no. of missing values
        print(f"\nTotal null values : {data.isnull().sum().sum():,}")

        # counts no. of duplicates
        countDuplicates = data.duplicated().sum()
        print(f"Total duplicated rows : {format(countDuplicates, ',')}")

        # compute missing/duplicates ratio
        print(f"\nRATIO OF MISSING AND DUPLICATED VALUES IN OUR DATA :")
        print("-" * 109)

        # ratio for missing values
        isnull_ratio = (data.isnull().sum().sum() / data.size) * 100

        # ratio for duplicated values
        dup_ratio = (countDuplicates / len(data)) * 100

        # return results
        print(f"\nPercentage of null values in the data : {round(isnull_ratio, 2)}%")
        print(f"Percentage of duplicates in the data : {round(dup_ratio, 2)}%")
        print("\n")

    def valueCounts(self, data):
        # count no. of values per column
        print(f"\nCOUNT VALUES FOR EACH COLUMN :")
        print("-" * 109)
        for column in data.columns:
            countValues = data[column].value_counts(dropna=False).to_frame()
            totalValues = data[column].count()
            print(f"VALUE_COUNTS for column '{column}' : {len(countValues)} rows, {totalValues} values in Total\n")
            display(countValues)
            print("-" * 109)

    def nullColumns(self, data):
        # displays no. of missing values per column
        print(f"\nTOTAL MISSING VALUES FOR EACH COLUMN :")
        display(data.isnull().sum().to_frame().rename(columns={0: "MISSING VALUES"}))
        print("\n")

    def nullGroupby(self, data, column):
        # displays no. of missing values per category
        print(f"\nTOTAL MISSING VALUES PER COLUMN FOR EACH UNIQUE VALUE :")        
        display(data.isna().groupby(data[column]).sum())

        print("\n")
    def visualizeNulls(self, data):
        # visualize missing values
        print(f"VISAULIZE MISSNG VALUES :")
        print("-" * 109)

        # method 1
        print("METHOD 1")
        sns.heatmap(data.isnull().T,
                    xticklabels=False,
                    cmap="viridis",
                    cbar_kws={"label": "Missing Values"})

        plt.tight_layout()
        plt.show()
        print("\n")

        # method 2
        print("METHOD 2")
        plt.imshow(data.isna(), aspect="auto", interpolation="nearest", cmap="gray")
        plt.xlabel("Column Number")
        plt.ylabel("Sample Number");
        print("\n")

    def nullFeature(self, data, column):
        # displays rows where missing values are found of a specific column 
        print(f"DISPLAYS ROWS WHERE MISSING VALUES ARE FOUND OF A SPECIFIC FEATURE:")
        display(data[data[column].isna()])
        print("\n")

    def nullData(self, data):
        # displays rows where missing values are found
        print(f"DISPLAYS ROWS WHERE MISSING VALUES ARE FOUND :")
        display(data[data.isnull().any(axis=1)])
        print("\n")

    def allStats(self, data):
        # overall descerptive analysis (nuemrical and categorical)
        print(f"FULL DATA DESCRIPTIVE STATISTICS :")
        print("-" * 109)
        display(data.describe(include="all"))
        print("\n")

    def descriptiveData(self, data):
        # do quick descriptive statistics
        print(f"QUICK DESCRIPTIVE ANALYSIS :")
        print("-" * 109)
        display(data.describe())
        print("\n")

    def nonNumericStats(self, data):
        # categorical statistics
        print(f"NON-NUMERICAL STATISTICS :")
        print("-" * 109)
        display(data.describe(exclude="number"))
        print("\n")

    def uniqueColumns(self, data):
        # view all unique values for each column
        print(f"UNIQUE VALUES FOR EACH COLUMN :")
        print("-" * 109)

        for column in data.columns:
            uniqueValues = data[column].unique()
            uniqueCount = data[column].nunique()
            print(f"UNIQUE VALUES for column '{column}' : {len(uniqueValues)} including NaN values, {uniqueCount} excluding Nan values\n")
            print(f"{uniqueValues}\n")
            print("-" * 109)

    def plotUniques(self, data):
        # for each numerical feature compute number of unique entries
        unique_values = data.select_dtypes(include="number").nunique().sort_values()

        # plot information with y-axis in log-scale
        unique_values.plot.bar(logy=False, title="No. of unique values per feature", figsize=(25,7))
        plt.xticks(rotation=0)
        plt.tight_layout()
        plt.show()

# instance of the dataProfiling class
profiler = dataProfiling()

## data hunting

In [5]:
playlists = {
    "Who's That Girl": "0cLPcrlTcogy15QDfsQMWH",
    "Eurovision 2023": "37i9dQZF1DWVCKO3xAlT1Q",
    "Soft 50s": "37i9dQZF1DWXcg95telZlE",
    "50s Party": "37i9dQZF1DWSwFS0Z6E1ep",
    "All Out 50s": "37i9dQZF1DWSV3Tk4GO2fq",
    "All Out 60s": "37i9dQZF1DXaKIA8E7WcJj",
    "Années 60": "37i9dQZF1DX7Uol5MpckMS",
    "All Out 70s": "37i9dQZF1DWTJ7xPn4vNaz",
    "Best of Rock 1970s": "37i9dQZF1DXe9Gx5fVy1RT",
    "Top Hits of 1970s": "37i9dQZF1DWXQyLTHGuTIz",
    "Top Global 2023 - Weekly Charts": "37i9dQZEVXbNG2KDcFcKOF",
    "All Out 2000s": "37i9dQZF1DX4o1oenSJRJd",
    "Gold School (Hip Hop)": "37i9dQZF1DWVA1Gq4XHa6U",
    "I Love My 90's Hip Hop": "37i9dQZF1DX186v583rmzp",
    "Van life": "37i9dQZF1DX2ogDiL6nZJr",
    "Good Vibes": "37i9dQZF1DWYBO1MoTDhZI",
    "On the Road": "37i9dQZF1DX1S89tRXh2OS",
    "Electro Chill": "37i9dQZF1DX9ND1QF5hZNF",
    "Summer Tubes": "37i9dQZF1DWVf5yEjHHamn",
    "Beach Vibes": "37i9dQZF1DX83I5je4W4rP",
    "Summer Love": "37i9dQZF1DWY0BsMpCzERZ",
    "Pop 2010": "37i9dQZF1DX8E06AbSENEw",
    "Internet Rewind": "37i9dQZF1DWSPMbB1kcXmo",
    "Equal K-Pop": "37i9dQZF1DX6Cy4Vr7Hu2y",
    "Door Knockers - Classics from women in Hip hop": "37i9dQZF1DX9iGsUcr0Bpa",
    "Baroque Classics": "37i9dQZF1DWXjj6kdiviS0",
    "Romantische Klassik": "37i9dQZF1DWXQD9LUA72yg",
    "Classical Essentials": "37i9dQZF1DWWEJlAGA9gs0",
    "Opera Classics": "37i9dQZF1DWVfS4Cdd3PNf",
    "Jazz Classics": "37i9dQZF1DXbITWG1ZJKYt",
    "20+20+20+20 (80s)": "4Cw5GB3gHYT4UtRLxiafxt",
    "Grrrrls": "2ZalQGM5fnEGFEAWFxnUP0",
    "This Is Ratatat": "37i9dQZF1DZ06evO30oJa0",
    "Top Throwbacks 2022": "37i9dQZF1DXdpy4ZQQMZKm",
    "This Is Suede": "37i9dQZF1DZ06evO42t81I",
    "Alternative 90s": "37i9dQZF1DXaLI8a395lse",
    "Britpop etc": "37i9dQZF1DXaVgr4Tx5kRF",
    "Légendes du Rock": "37i9dQZF1DWXTHBOfJ8aI7",
    "I Love My 90s R&B": "37i9dQZF1DX6VDO8a6cQME",
    "Flashback (Soul & Funk)": "37i9dQZF1DWXncK9DGeLh7",
    "Sad Soul": "37i9dQZF1DXchlyaSeZp0q",
    "Pop Chef": "37i9dQZF1DWXGPruVsfHKD",
    "Funk & Soul Classics": "37i9dQZF1DWWvhKV4FBciw",
    "Love Ballads": "37i9dQZF1DWYMvTygsLWlG",
    "Sad Songs": "37i9dQZF1DX7qK8ma5wgG1",
    "Indie Love Songs": "37i9dQZF1DWZIQpJDqCc10",
    "Iconic Soundtracks": "37i9dQZF1DX1tz6EDao8it",
    "Radio Ratatat": "37i9dQZF1E4qOvEr6iONpb",
    "Radio Rangleklods": "37i9dQZF1E4oIf3a2eTRNV",
    "Radio Björk": "37i9dQZF1E4Dy7CFyohSGG",
    "Radio John Adams": "37i9dQZF1E4AbPpZmpoNtA",
    "Radio Mark Morrison": "37i9dQZF1E4BjguqgbGnPZ",
    "Radio U2": "37i9dQZF1E4lGTiNNgejAf",
    "Radio Tash Sultana": "37i9dQZF1E4yvrFcE1Ebu2",
    "Radio France Joli": "37i9dQZF1E4vvvLV0VoMm7",
    "Radio Darondo": "37i9dQZF1E4nP8olYLWc7X",
    "Radio Rodriguez": "37i9dQZF1E4oc4Q8MNZjh6",
    "Radio Nina Simone": "37i9dQZF1E4nhi0B8j7Dp8",
    "Radio Beyoncé": "37i9dQZF1E4B0yOyj0F53g",
    "Radio Billie Eilish": "37i9dQZF1E4oee5QSbfqLN",
    "Radio Honne": "37i9dQZF1E4uE7XrQEJTw4",
    "Radio Nat King Cole": "37i9dQZF1E4vSoikuLfYSw",
    "Radio Shigeru Umebayashi": "37i9dQZF1E4z4GftZr48PU",
    "Radio Yo-Yo Ma": "37i9dQZF1E4jPjmMkFX0XY",
    "Radio John Williams": "37i9dQZF1E4tbkEjnlwI4U",
    "Radio Alicia Keys": "37i9dQZF1E4myiUG0z7BzX",
    "Radio Patti Smith": "37i9dQZF1E4nIsD70hZdhC",
    "Radio SIA": "37i9dQZF1E4oH2NG2UyvvJ",
    "Radio Radiohead": "37i9dQZF1E4EgTnU7Hvt0A",
    "Radio Taylor Swift": "37i9dQZF1E4AfEUiirXPyP",
    "Rock of the Bands": "37i9dQZF1E8P6ChwSMJUe1",
    "Rock Stars": "37i9dQZF1E8Ut0hWeNgNgi",
    "Attitude": "3nqQXoyQOWXiESFLlDF1hG",
    "I'm Feeling Sexy Today": "6I9VzXrHxO9rA9A5euc8Ak",
    "Most Amazing Christmas Season Ever": "37i9dQZF1E8HZl5GknRK3x",
    "Back in Time": "37i9dQZF1E8BjPYRXyTLb7",
    "That Love": "37i9dQZF1E8Odrx4F9pSag",
    "Darn You, Son of a Beat": "37i9dQZF1E8QhQJSKgG3ab",
    "Hey, Teen Me": "37i9dQZF1E8N4hfYGRochR",
    "I'm Feeling Vibing Today": "1V6gIisPpYqgFeWbMLI0bA",
    "The Kind of Day": "37i9dQZF1E8D5ipngh2Cuw",
    "Haizz": "37i9dQZF1E8BbjDDunyE9T",
    "Cute Crushes": "37i9dQZF1E8Hf8odruMtQj",
    "Radio Ariana Grande": "37i9dQZF1E4xstu1WxmJS4"}

In [6]:
# @lru_cache(maxsize=1000)
# def get_artist_info(artist_uri):
#     return sp.artist(artist_uri)

# @lru_cache(maxsize=1000)
# def get_audio_features(track_id):
#     return sp.audio_features(track_id)[0]

# # maximum number of retries for the API call
# max_retries = 5

# # list to hold track details
# track_details = []

# # iterate over each playlist in the dictionary
# for playlist_name, playlist_id in playlists.items():

#     for i in range(max_retries):
#         try:
#             # first page of playlist tracks
#             results = sp.playlist_tracks(playlist_id)

#             # While there are more pages (tracks) to be fetched
#             while results:

#                 # extract track info from the current page of the playlist
#                 for song in results["items"]:
                    
#                     track_dict = {}

#                     # add the playlist name
#                     track_dict["fromPlaylist"] = playlist_name

#                     # TRACK
#                     # track_dict["trackURI"] = song["track"]["uri"]
#                     # track_dict["trackID"] = song["track"]["id"]
#                     track_dict["trackName"] = song["track"]["name"]
#                     track_dict["trackPopularity"] = song["track"]["popularity"]
#                     # track_dict["trackDuration"] = song["track"]["duration_ms"]
#                     track_dict["trackNumber"] = song["track"]["disc_number"]
#                     track_dict["isExplicit"] = song["track"]["explicit"]

#                     # ARTIST
#                     artist_uri = song["track"]["artists"][0]["uri"]
#                     if isinstance(artist_uri, str):  # check if artist_uri is a string
#                         track_dict["artistID"] = song["track"]["artists"][0]["id"]
#                         track_dict["artistName"] = song["track"]["artists"][0]["name"]
        
#                         artist_info = get_artist_info(artist_uri)  # call the function if artist_uri is a string
                        
#                         track_dict["artistPopularity"] = artist_info["popularity"]
#                         track_dict["artistGenre"] = artist_info["genres"][0] if artist_info["genres"] else None

#                     else:
#                         print(f"Unexpected type for artist_uri: {type(artist_uri)}")

#                     # ALBUM
#                     track_dict["albumName"] = song["track"]["album"]["name"]
#                     track_dict["albumID"] = song["track"]["album"]["id"]
#                     track_dict["albumType"] = song["track"]["album"]["album_type"]
#                     release_date = song["track"]["album"]["release_date"]
#                     if release_date is not None:
#                         track_dict["releaseDate"] = pd.to_datetime(release_date).year
#                     else:
#                         track_dict["releaseDate"] = None
#                     track_dict["album_nTracks"] = song["track"]["album"].get("total_tracks", None)
#                     album_images = song["track"]["album"]["images"]
#                     if album_images:
#                         track_dict["albumCover"] = album_images[0]["url"]
#                     else:
#                         track_dict["albumCover"] = None

#                     # AUDIO FEATURES
#                     track_id = song["track"]["id"] # track_dict["trackID"]
#                     if track_id is not None:
#                         audio_features = get_audio_features(track_id)
#                         track_dict.update(audio_features)
#                     else:
#                         print(f"Unexpected type for trackID: {type(track_id)}")

#                     # append the track_dict to the list
#                     track_details.append(track_dict)

#                 # get next page of tracks
#                 results = sp.next(results)

#             # break loop if the API call was successful
#             break

#         except spotipy.exceptions.SpotifyException as e:
#             # if exception is a rate limiting error (error 429), wait and retry
#             if e.http_status == 429:
#                 print("Rate limit exceeded. Waiting...")
#                 time.sleep(int(e.headers.get("Retry-After", 10)))
#             else:
#                 # if different kind of error, we will handle it differently, let's see..
#                 # or re-raise the exception if it's not something our script can recover from
#                 raise e

# # convert the list of dictionaries to a DataFrame
# spotipyData = pd.DataFrame(track_details)
# spotipyData

## data profiling

In [7]:
# profiler.nullColumns(spotipyData)

In [8]:
# spotipyData.to_csv("FINAL.csv")

## data preparation

In [9]:
# QUEST 1: Load the Pandas data frame songs of the audio features (at least 1000 songs as different as possible)
data = pd.read_csv("FINAL.csv").iloc[:, 1:]
data.head()

Unnamed: 0,fromPlaylist,trackName,trackPopularity,trackNumber,isExplicit,artistID,artistName,artistPop,artistGenre,albumName,albumID,albumType,releaseDate,album_nTracks,albumCover,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,Who's That Girl,Violet,64,1,False,5SHQUMAmEK5KmuSb0aDvsn,Hole,60,alternative rock,Live Through This,2Rwf2nPYZQ9aIe4QXACTC7,album,1994,12,https://i.scdn.co/image/ab67616d0000b273be245e...,0.287,0.78,7,-6.493,1,0.0469,0.049,3.7e-05,0.184,0.383,167.278,audio_features,6CHENx8iqzMwavTIz8s0gm,spotify:track:6CHENx8iqzMwavTIz8s0gm,https://api.spotify.com/v1/tracks/6CHENx8iqzMw...,https://api.spotify.com/v1/audio-analysis/6CHE...,204933,4
1,Who's That Girl,Miss World,54,1,False,5SHQUMAmEK5KmuSb0aDvsn,Hole,60,alternative rock,Live Through This,2Rwf2nPYZQ9aIe4QXACTC7,album,1994,12,https://i.scdn.co/image/ab67616d0000b273be245e...,0.331,0.722,7,-8.461,1,0.0408,0.00204,5.3e-05,0.256,0.349,125.575,audio_features,3yMFBuIdPBdJkkzaPBDjKY,spotify:track:3yMFBuIdPBdJkkzaPBDjKY,https://api.spotify.com/v1/tracks/3yMFBuIdPBdJ...,https://api.spotify.com/v1/audio-analysis/3yMF...,180067,4
2,Who's That Girl,Doll Parts,61,1,False,5SHQUMAmEK5KmuSb0aDvsn,Hole,60,alternative rock,Live Through This,2Rwf2nPYZQ9aIe4QXACTC7,album,1994,12,https://i.scdn.co/image/ab67616d0000b273be245e...,0.245,0.542,7,-7.159,1,0.0324,0.147,4e-06,0.102,0.365,204.824,audio_features,49t1GWE6ZiEoBgN92oMDdM,spotify:track:49t1GWE6ZiEoBgN92oMDdM,https://api.spotify.com/v1/tracks/49t1GWE6ZiEo...,https://api.spotify.com/v1/audio-analysis/49t1...,211960,4
3,Who's That Girl,Rock Star,47,1,True,5SHQUMAmEK5KmuSb0aDvsn,Hole,60,alternative rock,Live Through This,2Rwf2nPYZQ9aIe4QXACTC7,album,1994,12,https://i.scdn.co/image/ab67616d0000b273be245e...,0.308,0.868,4,-6.44,1,0.144,0.00514,4.5e-05,0.27,0.25,125.782,audio_features,72xpgWWuEyLWFFTIKnrAbZ,spotify:track:72xpgWWuEyLWFFTIKnrAbZ,https://api.spotify.com/v1/tracks/72xpgWWuEyLW...,https://api.spotify.com/v1/audio-analysis/72xp...,162200,4
4,Who's That Girl,Lark,0,1,False,6mKqFxGMS5TGDZI3XkT5Rt,Angel Olsen,55,alternative americana,All Mirrors,0RedX0LZkGUFoRwFntAaI0,album,2019,11,https://i.scdn.co/image/ab67616d0000b2732b31fc...,0.447,0.56,7,-8.36,1,0.0316,0.00311,0.0991,0.22,0.0422,98.986,audio_features,13rEyddPoVpxXWWv2aTjqo,spotify:track:13rEyddPoVpxXWWv2aTjqo,https://api.spotify.com/v1/tracks/13rEyddPoVpx...,https://api.spotify.com/v1/audio-analysis/13rE...,378827,4


In [10]:
data["identifier"] = data.index
data.index = data["identifier"]
del data["identifier"]

data["identifier"] = data.index
data.reset_index(drop=True, inplace=True)
data.head()

Unnamed: 0,fromPlaylist,trackName,trackPopularity,trackNumber,isExplicit,artistID,artistName,artistPop,artistGenre,albumName,albumID,albumType,releaseDate,album_nTracks,albumCover,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,identifier
0,Who's That Girl,Violet,64,1,False,5SHQUMAmEK5KmuSb0aDvsn,Hole,60,alternative rock,Live Through This,2Rwf2nPYZQ9aIe4QXACTC7,album,1994,12,https://i.scdn.co/image/ab67616d0000b273be245e...,0.287,0.78,7,-6.493,1,0.0469,0.049,3.7e-05,0.184,0.383,167.278,audio_features,6CHENx8iqzMwavTIz8s0gm,spotify:track:6CHENx8iqzMwavTIz8s0gm,https://api.spotify.com/v1/tracks/6CHENx8iqzMw...,https://api.spotify.com/v1/audio-analysis/6CHE...,204933,4,0
1,Who's That Girl,Miss World,54,1,False,5SHQUMAmEK5KmuSb0aDvsn,Hole,60,alternative rock,Live Through This,2Rwf2nPYZQ9aIe4QXACTC7,album,1994,12,https://i.scdn.co/image/ab67616d0000b273be245e...,0.331,0.722,7,-8.461,1,0.0408,0.00204,5.3e-05,0.256,0.349,125.575,audio_features,3yMFBuIdPBdJkkzaPBDjKY,spotify:track:3yMFBuIdPBdJkkzaPBDjKY,https://api.spotify.com/v1/tracks/3yMFBuIdPBdJ...,https://api.spotify.com/v1/audio-analysis/3yMF...,180067,4,1
2,Who's That Girl,Doll Parts,61,1,False,5SHQUMAmEK5KmuSb0aDvsn,Hole,60,alternative rock,Live Through This,2Rwf2nPYZQ9aIe4QXACTC7,album,1994,12,https://i.scdn.co/image/ab67616d0000b273be245e...,0.245,0.542,7,-7.159,1,0.0324,0.147,4e-06,0.102,0.365,204.824,audio_features,49t1GWE6ZiEoBgN92oMDdM,spotify:track:49t1GWE6ZiEoBgN92oMDdM,https://api.spotify.com/v1/tracks/49t1GWE6ZiEo...,https://api.spotify.com/v1/audio-analysis/49t1...,211960,4,2
3,Who's That Girl,Rock Star,47,1,True,5SHQUMAmEK5KmuSb0aDvsn,Hole,60,alternative rock,Live Through This,2Rwf2nPYZQ9aIe4QXACTC7,album,1994,12,https://i.scdn.co/image/ab67616d0000b273be245e...,0.308,0.868,4,-6.44,1,0.144,0.00514,4.5e-05,0.27,0.25,125.782,audio_features,72xpgWWuEyLWFFTIKnrAbZ,spotify:track:72xpgWWuEyLWFFTIKnrAbZ,https://api.spotify.com/v1/tracks/72xpgWWuEyLW...,https://api.spotify.com/v1/audio-analysis/72xp...,162200,4,3
4,Who's That Girl,Lark,0,1,False,6mKqFxGMS5TGDZI3XkT5Rt,Angel Olsen,55,alternative americana,All Mirrors,0RedX0LZkGUFoRwFntAaI0,album,2019,11,https://i.scdn.co/image/ab67616d0000b2732b31fc...,0.447,0.56,7,-8.36,1,0.0316,0.00311,0.0991,0.22,0.0422,98.986,audio_features,13rEyddPoVpxXWWv2aTjqo,spotify:track:13rEyddPoVpxXWWv2aTjqo,https://api.spotify.com/v1/tracks/13rEyddPoVpx...,https://api.spotify.com/v1/audio-analysis/13rE...,378827,4,4


In [11]:
# QUEST 2: Standardize the data using Standardscaler
toNormalize = ["trackPopularity",
                  "isExplicit",
                  "artistPop",
                  "releaseDate",
                  "danceability",
                  "energy",
                  "key",
                  "loudness",
                  "mode",
                  "speechiness",
                  "acousticness",
                  "instrumentalness",
                  "liveness",
                  "valence",
                  "tempo",
                  "duration_ms",
                  "time_signature"]

scaler = StandardScaler()
X_normalized = scaler.fit_transform(data[toNormalize])

# QUEST 3
# Save the scaler for future use on the new user input song, using Pickle
with open("scaler.pickle", "wb") as f:
    pickle.dump(scaler, f)

# store into a dataframe
normalizedFeatures = pd.DataFrame(X_normalized, columns=toNormalize)

# add identifier
normalizedFeatures["identifier"] = normalizedFeatures.index
normalizedFeatures.index = normalizedFeatures["identifier"]
del normalizedFeatures["identifier"]
normalizedFeatures.head()

Unnamed: 0_level_0,trackPopularity,isExplicit,artistPop,releaseDate,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
identifier,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,0.257207,-0.323661,-0.149228,-0.19954,-1.410212,0.98086,0.494757,0.634119,0.763417,-0.313025,-1.003993,-0.495445,0.11881,-0.399453,1.75279,-0.358869,0.270473
1,-0.252119,-0.323661,-0.149228,-0.19954,-1.180206,0.755968,0.494757,0.308856,0.763417,-0.401592,-1.137671,-0.495389,0.660548,-0.523243,0.325343,-0.618968,0.270473
2,0.10441,-0.323661,-0.149228,-0.19954,-1.629763,0.058027,0.494757,0.524045,0.763417,-0.523554,-0.725022,-0.495557,-0.498169,-0.464989,3.037947,-0.285366,0.270473
3,-0.608647,3.089656,-0.149228,-0.19954,-1.300437,1.322075,-0.345353,0.642879,0.763417,1.096798,-1.128847,-0.495417,0.765886,-0.883693,0.332429,-0.805857,0.270473
4,-3.00248,-0.323661,-0.467121,1.008841,-0.573827,0.127821,0.494757,0.325549,0.763417,-0.53517,-1.134625,-0.15912,0.389679,-1.640274,-0.584768,1.460067,0.270473


In [12]:
# BONUS: Let's check whether the normalized data has a mean of zero and a standard deviation of one.
np.mean(normalizedFeatures), np.std(normalizedFeatures)

(-2.642184880531227e-17,
 trackPopularity     1.0
 isExplicit          1.0
 artistPop           1.0
 releaseDate         1.0
 danceability        1.0
 energy              1.0
 key                 1.0
 loudness            1.0
 mode                1.0
 speechiness         1.0
 acousticness        1.0
 instrumentalness    1.0
 liveness            1.0
 valence             1.0
 tempo               1.0
 duration_ms         1.0
 time_signature      1.0
 dtype: float64)

In [13]:
# # QUEST 6: Build the elbow graph to find the best k
# K = range(5, 40) #range of the clusters
# inertia = []

# for k in K: #for every data in set build a inertia
#     print("Training a K-Means model with {} clusters! ".format(k))
#     print()
#     kmeans = KMeans(n_clusters=k,
#                     random_state=1234)
#     kmeans.fit(normalizedFeatures)
#     inertia.append(kmeans.inertia_)

# import numpy as np
# import matplotlib.pyplot as plt
# %matplotlib inline

# plt.figure(figsize=(16,8))
# plt.plot(K, inertia, 'bx-')
# plt.xlabel('k')
# plt.ylabel('inertia')
# plt.xticks(np.arange(min(K), max(K)+1, 1.0)) #changing the scale
# plt.title('Elbow Method showing the optimal k') 

In [14]:
# QUEST 4: Adjust k means cluster
# QUEST 5: Select the number of clusters k
# QUEST 6: CODE ABOVE
# QUEST 7: Use the model with the best k to assign each observation in your data frame to its cluster number using model.predict (add cluster columns to the pandas data frame)
# QUEST 8: Save the model with the best k as your final model using pickle

# clustering par K-means
kmeans = KMeans(n_clusters=70, n_init=10)
kmeans.fit(normalizedFeatures)

with open("kmeans.pickle", "wb") as f:
    pickle.dump(kmeans, f)

# group sorting
clusters = np.argsort(kmeans.labels_)

# show observations and groups
clustered = pd.DataFrame(normalizedFeatures.index[clusters], kmeans.labels_[clusters])
display(clustered.head())

# add cluster column
clustered["kmeans_cluster"] = clustered.index
clustered.reset_index(drop=True, inplace=True)
display(clustered.head())

# no. of clusters
display(clustered["kmeans_cluster"].unique())

# join noGenre dataset with clustered dataset
labeled = pd.merge(clustered, data, on = "identifier")
display(labeled.head())

# keep cool columns for viewing and get essential track info
trackInfo = labeled[["id", "trackName", "artistName", "artistGenre", "isExplicit", "albumName", "album_nTracks", "releaseDate", "duration_ms", "fromPlaylist", "trackPopularity", "artistPop", "albumCover", "uri", "kmeans_cluster"]]
display(trackInfo.head())

# dataset to pickle
with open("trackInfo.pickle", "wb") as f:
    pickle.dump(trackInfo, f)

Unnamed: 0,identifier
0,589
0,3691
0,510
0,3593
0,3586


Unnamed: 0,identifier,kmeans_cluster
0,589,0
1,3691,0
2,510,0
3,3593,0
4,3586,0


array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69], dtype=int32)

Unnamed: 0,identifier,kmeans_cluster,fromPlaylist,trackName,trackPopularity,trackNumber,isExplicit,artistID,artistName,artistPop,artistGenre,albumName,albumID,albumType,releaseDate,album_nTracks,albumCover,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,589,0,50s Party,Great Balls Of Fire,71,1,False,2zyz0VJqrDXeFDIyrfVXSo,Jerry Lee Lewis,59,piano rock,Jerry Lee's Greatest,02FCCye8QsWyjHwedg9Quj,album,1961,12,https://i.scdn.co/image/ab67616d0000b27395788c...,0.533,0.729,7,-7.227,0,0.0714,0.563,0.0,0.159,0.884,78.67,audio_features,64VP3skE86iTvdOlbzuIcO,spotify:track:64VP3skE86iTvdOlbzuIcO,https://api.spotify.com/v1/tracks/64VP3skE86iT...,https://api.spotify.com/v1/audio-analysis/64VP...,111536,4
1,3691,0,Flashback (Soul & Funk),Ain't No Sunshine,82,1,False,1ThoqLcyIYvZn7iWbj8fsj,Bill Withers,72,funk,Just As I Am,6N8uPmDqbgXD3ztkCCfxoo,album,1971,12,https://i.scdn.co/image/ab67616d0000b273e1e350...,0.527,0.415,4,-11.451,0,0.122,0.457,1.7e-05,0.117,0.515,78.169,audio_features,1k1Bqnv2R0uJXQN4u6LKYt,spotify:track:1k1Bqnv2R0uJXQN4u6LKYt,https://api.spotify.com/v1/tracks/1k1Bqnv2R0uJ...,https://api.spotify.com/v1/audio-analysis/1k1B...,125093,4
2,510,0,Soft 50s,Fever,65,1,False,602DnpaSXJB4b9DZrvxbDc,Peggy Lee,58,adult standards,Things Are Swingin',0puYTmfXiL5UZLyl33nXKT,album,1959,14,https://i.scdn.co/image/ab67616d0000b2737c1a73...,0.838,0.171,4,-12.614,0,0.0729,0.23,0.0,0.11,0.397,135.925,audio_features,3aPlQWU07jGgyHaBHVS5TS,spotify:track:3aPlQWU07jGgyHaBHVS5TS,https://api.spotify.com/v1/tracks/3aPlQWU07jGg...,https://api.spotify.com/v1/audio-analysis/3aPl...,199600,4
3,3593,0,Légendes du Rock,Happy Together,77,1,False,2VIoWte1HPDbZ2WqHd2La7,The Turtles,59,bubblegum pop,Happy Together,2pMxs38Y5A0mmHrcu3twvB,album,1967,14,https://i.scdn.co/image/ab67616d0000b27372649a...,0.584,0.367,6,-9.638,0,0.0328,0.55,1.4e-05,0.0818,0.588,120.175,audio_features,1JO1xLtVc8mWhIoE3YaCL0,spotify:track:1JO1xLtVc8mWhIoE3YaCL0,https://api.spotify.com/v1/tracks/1JO1xLtVc8mW...,https://api.spotify.com/v1/audio-analysis/1JO1...,176293,4
4,3586,0,Légendes du Rock,Behind Blue Eyes,72,1,False,67ea9eGLXYMsO2eYQRui3w,The Who,68,album rock,Who's Next (Deluxe Edition),5MqyhhHbT13zsloD3uHhlQ,album,1971,29,https://i.scdn.co/image/ab67616d0000b273fe24dc...,0.394,0.622,4,-8.258,0,0.0336,0.213,4.4e-05,0.0892,0.342,126.844,audio_features,0cKk8BKEi7zXbdrYdyqBP5,spotify:track:0cKk8BKEi7zXbdrYdyqBP5,https://api.spotify.com/v1/tracks/0cKk8BKEi7zX...,https://api.spotify.com/v1/audio-analysis/0cKk...,221427,4


Unnamed: 0,id,trackName,artistName,artistGenre,isExplicit,albumName,album_nTracks,releaseDate,duration_ms,fromPlaylist,trackPopularity,artistPop,albumCover,uri,kmeans_cluster
0,64VP3skE86iTvdOlbzuIcO,Great Balls Of Fire,Jerry Lee Lewis,piano rock,False,Jerry Lee's Greatest,12,1961,111536,50s Party,71,59,https://i.scdn.co/image/ab67616d0000b27395788c...,spotify:track:64VP3skE86iTvdOlbzuIcO,0
1,1k1Bqnv2R0uJXQN4u6LKYt,Ain't No Sunshine,Bill Withers,funk,False,Just As I Am,12,1971,125093,Flashback (Soul & Funk),82,72,https://i.scdn.co/image/ab67616d0000b273e1e350...,spotify:track:1k1Bqnv2R0uJXQN4u6LKYt,0
2,3aPlQWU07jGgyHaBHVS5TS,Fever,Peggy Lee,adult standards,False,Things Are Swingin',14,1959,199600,Soft 50s,65,58,https://i.scdn.co/image/ab67616d0000b2737c1a73...,spotify:track:3aPlQWU07jGgyHaBHVS5TS,0
3,1JO1xLtVc8mWhIoE3YaCL0,Happy Together,The Turtles,bubblegum pop,False,Happy Together,14,1967,176293,Légendes du Rock,77,59,https://i.scdn.co/image/ab67616d0000b27372649a...,spotify:track:1JO1xLtVc8mWhIoE3YaCL0,0
4,0cKk8BKEi7zXbdrYdyqBP5,Behind Blue Eyes,The Who,album rock,False,Who's Next (Deluxe Edition),29,1971,221427,Légendes du Rock,72,68,https://i.scdn.co/image/ab67616d0000b273fe24dc...,spotify:track:0cKk8BKEi7zXbdrYdyqBP5,0


In [15]:
# check coherence
labeled[labeled["kmeans_cluster"] == 8].head()

Unnamed: 0,identifier,kmeans_cluster,fromPlaylist,trackName,trackPopularity,trackNumber,isExplicit,artistID,artistName,artistPop,artistGenre,albumName,albumID,albumType,releaseDate,album_nTracks,albumCover,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
799,3040,8,Jazz Classics,I'm A Fool To Want You,50,1,False,3NUsiT2JSyaWAnWaXxDzhQ,Dexter Gordon,47,bebop,Ballads,5lzhBDGZKhGKFFVlMOKR0K,album,1991,8,https://i.scdn.co/image/ab67616d0000b273f3a82e...,0.497,0.153,5,-12.65,0,0.0407,0.978,0.809,0.138,0.276,100.222,audio_features,3kYrQXSao9IXp3EzMZQt5J,spotify:track:3kYrQXSao9IXp3EzMZQt5J,https://api.spotify.com/v1/tracks/3kYrQXSao9IX...,https://api.spotify.com/v1/audio-analysis/3kYr...,405227,4
800,3054,8,Jazz Classics,Porgy (I Loves You Porgy) - Outtake,49,1,False,3VEG6gxFIMfl4Cdog26avS,Bill Evans Trio,52,bebop,Waltz For Debby [Original Jazz Classics Remast...,0MjlKhtsyax9HSWNkYaWM2,album,1961,11,https://i.scdn.co/image/ab67616d0000b273aa2fcb...,0.389,0.145,5,-22.965,1,0.0301,0.92,0.876,0.656,0.15,99.541,audio_features,3NmLBig4t7RZWbLT3U2eBb,spotify:track:3NmLBig4t7RZWbLT3U2eBb,https://api.spotify.com/v1/tracks/3NmLBig4t7RZ...,https://api.spotify.com/v1/audio-analysis/3NmL...,360307,4
801,3056,8,Jazz Classics,Young And Foolish,48,1,False,4jXfFzeP66Zy67HM2mvIIF,Bill Evans,61,bebop,Everybody Digs Bill Evans,2zSAVheEFBPMuUozd6C9gt,album,1959,10,https://i.scdn.co/image/ab67616d0000b27386ae75...,0.438,0.0172,9,-23.66,1,0.0384,0.973,0.557,0.0815,0.136,100.989,audio_features,1WygPWqmNs4s2fxtjmtI3O,spotify:track:1WygPWqmNs4s2fxtjmtI3O,https://api.spotify.com/v1/tracks/1WygPWqmNs4s...,https://api.spotify.com/v1/audio-analysis/1Wyg...,354667,4
802,3066,8,Jazz Classics,Search For Peace,48,1,False,2EsmKkHsXK0WMNGOtIhbxr,McCoy Tyner,49,contemporary post-bop,The Real McCoy (Remastered / Rudy Van Gelder E...,22HoIP0ai6Wikh4R8yM0AX,album,1967,5,https://i.scdn.co/image/ab67616d0000b27375d3c6...,0.456,0.222,0,-16.879,1,0.0354,0.938,0.806,0.24,0.15,112.725,audio_features,4qRJ7ZJyfzchEd2fjle7Bz,spotify:track:4qRJ7ZJyfzchEd2fjle7Bz,https://api.spotify.com/v1/tracks/4qRJ7ZJyfzch...,https://api.spotify.com/v1/audio-analysis/4qRJ...,392000,4
803,3058,8,Jazz Classics,I Remember Clifford - Rudy Van Gelder Edition;...,49,1,False,38C3okxv3fyyOIQUVPCdGX,Lee Morgan,46,bebop,Volume 3,63pHpN0nqkNbaeP2UEexY0,album,1957,6,https://i.scdn.co/image/ab67616d0000b273b7f776...,0.376,0.278,0,-8.597,0,0.0416,0.912,0.886,0.0728,0.199,62.055,audio_features,6KawDsALZTY6FA8CE1NLZK,spotify:track:6KawDsALZTY6FA8CE1NLZK,https://api.spotify.com/v1/tracks/6KawDsALZTY6...,https://api.spotify.com/v1/audio-analysis/6Kaw...,429040,4


In [16]:
# embedded player
from IPython.display import IFrame

def play_song(track_id):
    return IFrame(src="https://open.spotify.com/embed/track/"+track_id,
       width="320",
       height="80",
       frameborder="0",
       allowtransparency="true",
       allow="encrypted-media")

play_song("64MHnMvyOiNJU9tmUOBjeV")

## recommender

If we were to load our scaler and then print it, it would look something like this:

```python
# Use the scaler from the pickle file
with open('scaler.pkl', 'rb') as f:
    loaded_scaler = pickle.load(f)

print(loaded_scaler)
```

The output would look something like this:
```python
StandardScaler()
```

The fact that it doesn't print the internal data of the scaler (*like the **means** and **standard deviations***) doesn't mean they aren't there. <br>
This is just the standard way that sklearn prints its objects. <br>
We can access the mean and standard deviation of each feature with the `mean_` and `scale_` attributes, respectively:

```python
print(loaded_scaler.mean_)  # prints the means of each feature
print(loaded_scaler.scale_)  # prints the standard deviations of each feature
```

`loaded_scaler` is now a `StandardScaler` instance with the same state as scaler when it was pickled. <br>
We can use it to transform new data in the same way as the original data:

```python
inputNormalized = loaded_scaler.transform(newData)
```

Remember, pickle is not secure against erroneous or maliciously constructed data. <br>
Never unpickle data received from an untrusted or unauthenticated source.

We can load this model again with these lines:

```python
with open("kmeans.pickle", "rb") as f:
    loaded_kmeans = pickle.load(f)
```

Now `loaded_kmeans` is a **KMeans** model that's in the same state as `kmeans` was when it was pickled. <br>
We can use it to predict the cluster of new data points:

```python
inputCluster = loaded_kmeans.predict(newData)
```

In conclusion, the `KMeans` model, including the number of clusters and the cluster centers, is what's saved in the pickle file. <br>
This model can be used to assign new data points to the existing clusters.

In [31]:
# 1ST PART: PLUG AND PLAY

# embedded player
def play_song(track_id):
    return IFrame(src="https://open.spotify.com/embed/track/"+track_id,
                  width="320",
                  height="80",
                  frameborder="0",
                  allowtransparency="true",
                  allow="encrypted-media")

try:
    # 1. get song name from the user as input
    song_name = input("Enter the name of the song: ")
    if not song_name:
        raise ValueError("Song name cannot be empty")

    # perform search on Spotify for the song
    results = sp.search(q="track:" + song_name, type="track", limit=20)
    if not results["tracks"]["items"]:
        raise ValueError(f"No track found with the name {song_name}")
    
    # if multiple matches found, ask the user to choose the correct artist
    if len(results["tracks"]["items"]) > 1:
        print("Multiple matches found. Please choose the correct artist:")
        for i, item in enumerate(results["tracks"]["items"], 1):
            print(f"{i}. {item['artists'][0]['name']}")
        
        artist_choice = int(input("Enter the number for the correct artist: ")) - 1
        track_id = results["tracks"]["items"][artist_choice]["id"]
    else:
        # extract track ID of the first result
        track_id = results["tracks"]["items"][0]["id"]
    
    print(f"Track ID: {track_id}")

    # save track_id to a file
    with open("track_id.txt", "w") as file:
        file.write(track_id)

    # 2. play input song in the music embed player
    display(play_song(track_id))
    
except ValueError as e:
    print("Error: ", str(e))

Multiple matches found. Please choose the correct artist:
1. Taylor Swift
2. Foster The People
3. Tesla
4. Foster The People
5. rum.gold
6. Tesla
7. Taylor Swift
8. Keyrenity
9. King Lil G
10. Foster The People
11. A-Wax
12. Foster The People
13. New Kids On The Block
14. Nebu Kiniza
15. Ephemerals
16. rum.gold
17. Taylor Swift
18. Bill Summers
19. New Kids On The Block
20. Julia Sheer
Track ID: 1GwMQaZz6Au3QLDbjbMdme


In [33]:
# 2ND PART: RECOMMEND AND PLAY
# load track_id from the file
with open("track_id.txt", "r") as file:
    track_id = file.read()

try:
    # 3. get audio features for this song from the Spotify API
    audio_features = sp.audio_features(track_id)
    if not audio_features:
        raise ValueError(f"No audio features found for the track {song_name}")
    
    # get additional features
    results = sp.track(track_id)
    
    trackPopularity = results["popularity"]
    isExplicit = results["explicit"]
    artist_id = results["artists"][0]["id"]
    artistPop = sp.artist(artist_id)["popularity"]
    releaseDate = pd.to_datetime(results["album"]["release_date"]).year
    
    # create DataFrame from audio_features (which is a list of dictionaries)
    audioData = pd.DataFrame(audio_features)
    
    # add additional features to the DataFrame
    audioData["trackPopularity"] = trackPopularity
    audioData["isExplicit"] = isExplicit
    audioData["artistPop"] = artistPop
    audioData["releaseDate"] = releaseDate

    # select only the columns that were used in the original data
    features = ["trackPopularity", "isExplicit", "artistPop", "releaseDate", "danceability",
                  "energy", "key", "loudness", "mode", "speechiness", "acousticness",
                  "instrumentalness", "liveness", "valence", "tempo", "duration_ms",
                  "time_signature"]
    
    audioData = audioData[features]
    
    # 4. load StandardScaler using Pickle and use it to scale the new song
    with open("scaler.pickle", "rb") as f:
        loaded_scaler = pickle.load(f)
    scaled_features = loaded_scaler.transform(audioData)

    # transform scaled_features to a DataFrame with column names
    scaled_features_df = pd.DataFrame(scaled_features, columns=features)

    # 5. load KMeans model using Pickle and predict the cluster for the new song
    with open("kmeans.pickle", "rb") as f:
        loaded_kmeans = pickle.load(f)
    predicted_cluster = loaded_kmeans.predict(scaled_features_df)

    # 6. return random song from the same cluster
    clustered_songs = labeled[labeled["kmeans_cluster"] == predicted_cluster[0]]
    if clustered_songs.empty:
        raise ValueError(f"No songs found in the same cluster as {song_name}")
    random_song = clustered_songs.sample()

    print("WE RECOMMEND")
    print("-" * 100)
    print(f"Song: {random_song['trackName'].values[0]}")
    print(f"By: {random_song['artistName'].values[0]}")
    print(f"Genre: {random_song['artistGenre'].values[0]}")
    print(f"Explicit content: {'Yes' if random_song['isExplicit'].values[0] else 'No'}")
    print(f"Date released: {random_song['releaseDate'].values[0]}")
    print(f"Album: {random_song['albumName'].values[0]}")
    print(f"Album type: {random_song['albumType'].values[0]}")
    print(f"Popularity of the song: {random_song['trackPopularity'].values[0]}")
    print(f"Popularity of the artist: {random_song['artistPop'].values[0]}")
    print(f"Fetched from Data Playlist: {random_song['fromPlaylist'].values[0]}")
    print(f"See album cover here: {random_song['albumCover'].values[0]}")

    # 7. play recommended song using the built-in music player
    recommended_track_id = random_song["id"].values[0] # assuming "id" is the column name for track id
    display(play_song(recommended_track_id))
    
except ValueError as e:
    print("Error: ", str(e))
except FileNotFoundError:
    print("Error: Required file not found")
except pickle.UnpicklingError:
    print("Error: Failed to load data from pickle file")

WE RECOMMEND
----------------------------------------------------------------------------------------------------
Song: Pagan Poetry
By: Björk
Genre: art pop
Explicit content: No
Date released: 2001
Album: Vespertine
Album type: album
Popularity of the song: 50
Popularity of the artist: 63
Fetched from Data Playlist: Who's That Girl
See album cover here: https://i.scdn.co/image/ab67616d0000b2733d15323e148511fc307d95bd


## streamlit

In [19]:
import streamlit as st

def get_song_features(song_id):
    # your function for getting song features here, which should return a dataframe
    pass

def recommend_song(song_df):
    # your function for recommending a song based on input song_df
    pass

def play_song(track_id):
    # your function to get the embedded player for the song
    pass

# Streamlit code starts here
st.title('Spotify Song Recommender')

# Input song name
song_name = st.text_input('Enter the name of a song:')

# Search button
if st.button('Search'):
    # your code for searching the song and getting the id
    track_id = sp.search(q="track:" + song_name, type="track", limit=1)["tracks"]["items"][0]["id"]
    song_df = get_song_features(track_id)
    recommended_song = recommend_song(song_df)

    # Display recommended song
    st.write("We recommend: ", recommended_song)
    st.write("Listen to the song here:")
    st.markdown(play_song(recommended_song), unsafe_allow_html=True)

2023-06-07 06:26:39.838 
  command:

    streamlit run /Users/katrinajmd/Library/Python/3.9/lib/python/site-packages/ipykernel_launcher.py [ARGUMENTS]
