In [2]:
"""
Read dataset file using pandas.
"""

from pandas import read_csv

dataset_path = "./dataset/spotify.csv"

raw_df = read_csv(dataset_path)


In [3]:
df = raw_df.sample(n=10_000)


In [4]:
from sklearn.preprocessing import StandardScaler

features = [
    "danceability",
    "energy",
    "speechiness",
    "acousticness",
    "instrumentalness",
    "liveness",
    "valence",
    "tempo",
]

X = df[features]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

for i, feature in enumerate(features):
    df[feature] = X_scaled.T[i]

df.head()


Unnamed: 0,id,name,album,album_id,artists,artist_ids,track_number,disc_number,explicit,danceability,...,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,year,release_date
1003371,11rFn8XI5vqDkwC4vb30jA,With All My Heart,Why We Sing,6QQ8BHjy3UI8gem2EAjK6w,['Dionne Warwick'],['2JSjCHK79gdaiPWdKiNUNp'],3,1,False,0.25875,...,-0.440589,0.350538,-0.766976,0.784572,-0.996593,0.351501,301880,4.0,2008,2008-02-20
896300,4Fgz0gqwN5nZFjxpRR6Inc,Talisman,Talisman,2EQpreA8NCbh75dGf3d8rr,['Adam Hurst'],['4laSA8avg7weycHmryv1fj'],3,1,False,0.825404,...,-0.384653,1.265061,1.677049,-0.468632,-0.268538,0.25575,275040,3.0,2018,2018-08-06
453668,0aESsIVFCUEvMwSLk7v2tl,Saz Nemishe Zad (Don't Play Your Axe Here),Zang Bezan Azhans (Call A Cab),6NGSzgPPkrbcacuinWD4cu,['Kiosk'],['42o3S5oDd4Q4YyMNRmN5oS'],1,1,False,0.631273,...,-0.377661,-0.727664,-0.766285,0.047067,1.546057,0.088234,233203,4.0,2014,2014-08-06
446620,1B6PGYzpixFz7e6StYTF4f,Somethin Holy Like Qur'an,Horns And Halos,6QgSbcf3iJ6lKJEOfhWJNI,"['Equipto', 'Andre Nickatina']","['4mPPxTpWqDR5rmOrTOh4nk', '7boQqq2hUcz9kHcQOe...",16,1,True,1.486501,...,1.936702,-1.066714,-0.766976,-0.241281,1.553448,-1.277477,199680,4.0,2005,2005-05-15
979035,6UNV8mflEmh8PJqeTPC1p3,Body Language,Amala (Deluxe Version),3wOMqxNHgkga91RBC7BaZU,['Doja Cat'],['5cj0lLjcoR7YOSnhnX0Po5'],12,1,True,1.182186,...,-0.270159,-0.439278,-0.766976,-0.496357,0.984308,-0.675817,245240,4.0,2019,2019-03-01


In [5]:
"""
Connect to Spotify via spotipy library.
"""

from spotipy import Spotify
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import dotenv_values

config = dotenv_values(".env")
CLIENT_ID = config["CLIENT_ID"]
CLIENT_SECRET = config["CLIENT_SECRET"]

auth_manager = SpotifyClientCredentials(
    client_id=CLIENT_ID, client_secret=CLIENT_SECRET
)
sp = Spotify(auth_manager=auth_manager)


In [6]:
from numpy.random import randint


def get_music_infos(num):
    musics_num, _ = df.shape

    music_infos = []

    while len(music_infos) < num:
        play_music_index = randint(low=0, high=musics_num)

        music = df.iloc[play_music_index, :]

        music_info = get_music_info(music)
        preview_url = music_info["preview_url"]

        if preview_url != None:
            music_infos.append(music_info)

    return music_infos


def get_music_info(music):
    music_id = music["id"]

    track = sp.track(track_id=music_id)

    name = track["name"]
    artists = track["artists"]
    artist_names = map(lambda x: x["name"], artists)
    preview_url = track["preview_url"]

    music_info = {
        "id": music_id,
        "name": name,
        "artist_names": artist_names,
        "preview_url": preview_url,
        "danceability": music["danceability"],
        "energy": music["energy"],
        "speechiness": music["speechiness"],
        "acousticness": music["acousticness"],
        "instrumentalness": music["instrumentalness"],
        "liveness": music["liveness"],
        "valence": music["valence"],
        "tempo": music["tempo"],
    }

    return music_info


In [7]:
from textwrap import dedent
from vlc import MediaPlayer


def print_song_info(no, name, artist_names):
    artist_name = " & ".join(artist_names)

    print(
        dedent(
            f"""
				---
				Song No.{no}
				Now Playing
						
				Name: {name}
				Artist: {artist_name}
				---
			"""
        )
    )


def get_user_input():
    user_input = str(
        input(
            dedent(
                f"""
					Do you enjoy this music ?

					If yes, press (y).
					If no, press (n).

					Press (s) to stop/pause the music.
					Press (p) to play/resume the music.
					Press (e) to exit.
				"""
            )
        )
    )
    user_input_lowercase = user_input.lower()

    if user_input_lowercase in ["s", "p", "y", "n", "e"]:
        return user_input_lowercase
    else:
        return get_user_input()


In [8]:
liked_music_infos = []
disliked_music_infos = []

music_infos = get_music_infos(num=10)

for index, music_info in enumerate(music_infos):
    no = index + 1
    name = music_info["name"]
    artist_names = music_info["artist_names"]
    preview_url = music_info["preview_url"]

    print_song_info(no, name, artist_names)

    media_player = MediaPlayer(preview_url)
    media_player.play()

    is_finish = False
    is_exit = False

    while is_finish != True:
        user_input = get_user_input()

        if user_input == "e":
            media_player.stop()

            is_finish = True
            is_exit = True
        elif user_input == "s":
            media_player.set_pause(True)
        elif user_input == "p":
            is_media_playing = media_player.is_playing()

            if is_media_playing:
                media_player.set_pause(False)
            else:
                media_player.play()
        elif user_input == "y":
            media_player.stop()
            is_finish = True

            liked_music_infos.append(music_info)
        elif user_input == "n":
            media_player.stop()
            is_finish = True

            disliked_music_infos.append(music_info)

    if is_exit:
        break



---
Song No.1
Now Playing

Name: Adew Dundee
Artist: Anonymous & Baltimore Consort
---


---
Song No.2
Now Playing

Name: All You Got
Artist: Tegan and Sara
---


---
Song No.3
Now Playing

Name: Prélude, choral et fugue, M. 21: I. Prelude
Artist: César Franck & Murray Perahia
---


---
Song No.4
Now Playing

Name: On n'ira jamais à Venise
Artist: Marc Lavoine
---


---
Song No.5
Now Playing

Name: Rockstar For A Day
Artist: Bumblefoot
---


---
Song No.6
Now Playing

Name: His Yoke Is Easy, His Burden Is Light
Artist: George Frederic Handel 1685-1759 & London Philharmonic Orchestra
---


---
Song No.7
Now Playing

Name: Thunderstorm 3
Artist: Peter Samuels
---


---
Song No.8
Now Playing

Name: Dance from the Outside
Artist: Barry Schrader
---


---
Song No.9
Now Playing

Name: Die schöne Müllerin, Op. 25, D. 795: No. 2, Wohin?. "Ich hört ein Bächlein rauschen"
Artist: Franz Schubert & Christoph Prégardien
---


---
Song No.10
Now Playing

Name: Girl Talk
Artist: Barbara Lusch
---



## Enter [Machine Learning] Zone.

In [9]:
from sklearn.cluster import KMeans

kmeans = KMeans(
    n_clusters=len(features), init="k-means++", n_init="auto", random_state=0
)
kmeans.fit(X_scaled)

df["cluster"] = kmeans.predict(X_scaled)


In [10]:
from sklearn.manifold import TSNE
import pandas as pd
import plotly.express as px

perplexity = 30
tsne = TSNE(n_components=2, perplexity=perplexity, random_state=0)

positions = tsne.fit_transform(X_scaled)

df.loc[:, ["position_x", "position_y"]] = positions

tsne_df = pd.DataFrame(df, columns=["position_x", "position_y", "name", "cluster"])

fig = px.scatter(
	tsne_df, x="position_x", y="position_y", color="cluster", hover_data=["position_x", "position_y", "name"]
)
fig.show()

In [11]:
user_music_infos = liked_music_infos
user_music_ids = list(map(lambda x: x["id"], user_music_infos))

user_music_df = df[df["id"].isin(user_music_ids)].copy().reset_index(drop=True)
recommended_df = df[~df["id"].isin(user_music_ids)].copy().reset_index(drop=True)

In [12]:
from math import sqrt

recommended_music_indexs = []

for _, user_music in user_music_df.iterrows():
    cluster = user_music["cluster"]
    position_x = user_music["position_x"]
    position_y = user_music["position_y"]

    recommended_music_index = None
    recommended_distance = None

    recommended_in_clusters = recommended_df[recommended_df["cluster"] == cluster]
    for index, recommended in recommended_in_clusters.iterrows():
        recommended_position_x = recommended["position_x"]
        recommended_position_y = recommended["position_y"]

        distance = sqrt(
            ((position_x - recommended_position_x) ** 2)
            + ((position_y - recommended_position_y) ** 2)
        )

        if (recommended_music_index is None) or (recommended_distance is None) or (distance < recommended_distance):
            recommended_music_index = index
            recommended_distance = distance

    recommended_df.drop([recommended_music_index], inplace=True)

    recommended_music_indexs.append(recommended_music_index)

recommended_df.iloc[recommended_music_indexs]

Unnamed: 0,id,name,album,album_id,artists,artist_ids,track_number,disc_number,explicit,danceability,...,liveness,valence,tempo,duration_ms,time_signature,year,release_date,cluster,position_x,position_y
1728,4cczHLUkIxzQdYdlo8bRW5,New Taste,Gymnastics,1fHozeTsMeD2qq3ivDY8tA,['Sneaks'],['40iUcsx6LWJ0DEPryPmU30'],3,1,False,1.712113,...,-0.590625,1.479534,0.414454,92368,4.0,2016,2016-09-09,3,-75.635605,-24.8297
8028,0UhbX4tRbBBKrH6uqWBHrx,My Heart Is Gone,The Woodsman (Original Off-Broadway Solo Recor...,10wdiKeCkxRsofOA6ncZs6,['Edward W. Hardy'],['5H0BY5ImaZwuCHtQgfe3CW'],10,1,False,-2.208819,...,-0.602824,-1.166596,-1.423552,132830,4.0,2016,2016-05-29,2,65.551605,38.816341
2649,63jtqu6kDQ989osVZO0m28,SAN SALVADOR,ESG VOL. 1,0WYh5y2RJRhiXGKryKoYOo,"['Obijuan', 'Dutchmastered']","['5ajMNymelIspkqnQ8ZRxvh', '7lPrfZtRKJtRIYBqCV...",3,1,True,0.421401,...,-0.291187,0.773653,-0.962223,97958,4.0,2020,2020-08-27,3,-80.778557,-7.280152
9555,3hAVrCZGQEctTxFXSwfObs,Mental Hostage,"Mentally Ill, Legally Sane",5mB8KZlBkTqiv5UWAmgWix,['BlackList 9'],['1Qa9wkeKDGubRkeVHpbRN4'],7,1,False,-0.323644,...,0.540585,-0.246364,0.776899,193174,4.0,2019,2019-03-22,7,-38.6437,-20.939571
6951,2yp5p4rG68qAHzVpGs4NPI,Do You Hear What I Hear - Bonus Track,True Stories,5MWKZ7FEYnj2JTiknaYjnQ,['Russell Watson'],['2hrfuB0LtVmFZ6MvPMIHTK'],13,1,False,-1.787501,...,-0.643858,-0.878331,1.676973,153493,4.0,2016,2016-11-04,7,23.665302,2.621076
3860,34BIBOdPkeuCHJkE0UI5DL,Organic,Still Strugglin',1Rep9DiumhCDDgbD0wO30u,"['Ovii', 'Danny Fantom']","['0KIGWQ9RsA8AhcptUd32pa', '5doECGz0XPq0CnbR3g...",5,1,True,0.626026,...,1.0951,0.174947,-1.303911,148829,3.0,2018,2018-04-28,3,-75.589066,-10.911073
7510,61LKjGfMKfddHAIgZXuFMy,I Like You As You Are,4TRK Mind,21Vp1ykPjMC2LPyQD3FrtL,['Exile'],['4r4XYZJUeeKCcrkvi7voDP'],15,1,True,0.578805,...,0.180151,1.047135,-1.327311,53747,3.0,2011,2011-10-04,3,-89.152145,-9.385794
5912,2RDJPeDw61AHy7cYhrBGCr,"Guitar Trio No. 3 in F Major, Op. 18: III. Min...","Fossa: Guitar Trios, Op. 18",30cGm6kzhoCfyAgKikCFRS,"['Francois de Fossa', 'Martin Beaver', 'Bryan ...","['6dwvOAIz6aIgPyDEZpv2oh', '2In5AyYy7sjlNQNxVx...",11,1,False,0.510596,...,-0.615023,1.634754,2.226286,165133,3.0,1994,1994-06-27,4,37.585011,-29.947083
5155,0RvMcWdwbyyYRjXvSqlmw6,The Light of the Dark,Singing in the Dead of Night,4dHOP7U6ree7BjlbO3oRox,"['Michael Gordon', 'eighth blackbird']","['69Qu66Y6G07dk23Ld2ym1F', '08JsND1KfYyI41gWdA...",2,1,False,-0.077045,...,-0.546264,-0.856156,0.998328,701993,3.0,2020,2020-06-12,4,26.2871,-25.401487
1981,1kM9NXDsaak8gYjbOSGKJl,Заблудилась я,Избранное,71gSldViV0gigUnp2LFcWF,['Маша Распутина'],['4uyptB9Hp8N6pjqSZJ9dSW'],13,1,False,1.06151,...,-0.616687,1.645841,0.744851,233273,4.0,2019,2019-07-05,1,-54.760563,27.505043
