In [None]:
"""
Read dataset file using pandas.
"""

from pandas import read_csv

dataset_path = "./dataset/spotify.csv"

raw_df = read_csv(dataset_path)


In [None]:
"""
Reduce data size
"""

df = raw_df.sample(n=10_000)


In [None]:
"""
Standardization
"""

from sklearn.preprocessing import StandardScaler

features = [
    "danceability",
    "energy",
    "speechiness",
    "acousticness",
    "instrumentalness",
    "liveness",
    "valence",
    "tempo",
]

X = df[features]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

for i, feature in enumerate(features):
    df[feature] = X_scaled.T[i]

df.head()


In [None]:
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt

kmeans = KMeans()
n_clusters_range = range(2, len(features) + 1)

best_score = -1
best_n_clusters = 1
silhouette_scores = []

for n_clusters in n_clusters_range:
    kmeans.set_params(
        **{
            "n_clusters": n_clusters,
            "init": "k-means++",
            "n_init": "auto",
            "random_state": 42,
        }
    )
    kmeans.fit(X_scaled)

    ss = silhouette_score(X_scaled, kmeans.labels_)
    silhouette_scores += [ss]

    if ss > best_score:
        best_score = ss
        best_n_clusters = n_clusters

plt.bar(range(len(silhouette_scores)), list(silhouette_scores))
plt.xticks(range(len(silhouette_scores)), list(n_clusters_range))
plt.title("Silhouette Score")
plt.xlabel("Number of Clusters")
plt.show()


In [None]:
kmeans = KMeans(
    n_clusters=best_n_clusters, init="k-means++", n_init="auto", random_state=42
)
kmeans.fit(X_scaled)

df["cluster"] = kmeans.predict(X_scaled)


In [None]:
from sklearn.manifold import TSNE
import pandas as pd
import plotly.express as px

perplexity = 30
tsne = TSNE(n_components=2, perplexity=perplexity, random_state=42)

positions = tsne.fit_transform(X_scaled)

df.loc[:, ["position_x", "position_y"]] = positions

tsne_df = pd.DataFrame(df, columns=["position_x", "position_y", "cluster", "name"])

fig = px.scatter(
    tsne_df,
    x="position_x",
    y="position_y",
    color="cluster",
    hover_data=["position_x", "position_y", "name"],
)
fig.show()


In [None]:
"""
Connect to Spotify via spotipy library.
"""

from spotipy import Spotify
from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import dotenv_values

config = dotenv_values(".env")
CLIENT_ID = config["CLIENT_ID"]
CLIENT_SECRET = config["CLIENT_SECRET"]

auth_manager = SpotifyClientCredentials(
    client_id=CLIENT_ID, client_secret=CLIENT_SECRET
)
sp = Spotify(auth_manager=auth_manager)


In [None]:
def get_music_info(music):
    [music_id] = music["id"].values
    [music_danceability] = music["danceability"].values
    [music_energy] = music["energy"].values
    [music_speechiness] = music["speechiness"].values
    [music_acousticness] = music["acousticness"].values
    [music_instrumentalness] = music["instrumentalness"].values
    [music_liveness] = music["liveness"].values
    [music_valence] = music["valence"].values
    [music_tempo] = music["tempo"].values

    track = sp.track(track_id=music_id)

    name = track["name"]
    artists = track["artists"]
    artist_names = map(lambda x: x["name"], artists)
    preview_url = track["preview_url"]

    music_info = {
        "id": music_id,
        "name": name,
        "artist_names": artist_names,
        "preview_url": preview_url,
        "danceability": music_danceability,
        "energy": music_energy,
        "speechiness": music_speechiness,
        "acousticness": music_acousticness,
        "instrumentalness": music_instrumentalness,
        "liveness": music_liveness,
        "valence": music_valence,
        "tempo": music_tempo,
    }

    return music_info


In [None]:
from textwrap import dedent
from vlc import MediaPlayer


def print_song_info(name, artist_names):
    artist_name = " & ".join(artist_names)

    print(
        dedent(
            f"""
				---
				Now Playing
						
				Name: {name}
				Artist: {artist_name}
				---
			"""
        )
    )


def get_user_input():
    user_input = str(
        input(
            dedent(
                f"""
					Do you enjoy this music ?

					If yes, press (y).
					If no, press (n).

					Press (s) to stop/pause the music.
					Press (p) to play/resume the music.
					Press (e) to exit.
				"""
            )
        )
    )
    user_input_lowercase = user_input.lower()

    if user_input_lowercase in ["s", "p", "y", "n", "e"]:
        return user_input_lowercase
    else:
        return get_user_input()


In [None]:
from numpy.random import randint

musics_num = df.shape
liked_music_infos = []

is_exit = False

while is_exit == False:
    music_info = None

    while music_info is None:
        play_music_index = randint(low=0, high=musics_num, size=1)

        music = df.iloc[play_music_index, :]
        music_info_ = get_music_info(music)

        preview_url = music_info_["preview_url"]
        if preview_url is not None:
            music_info = music_info_

    name = music_info["name"]
    artist_names = music_info["artist_names"]
    preview_url = music_info["preview_url"]

    print_song_info(name, artist_names)

    media_player = MediaPlayer(preview_url)
    media_player.play()

    is_finish = False

    while is_finish != True:
        user_input = get_user_input()

        if user_input == "e":
            media_player.stop()

            is_finish = True
            is_exit = True
        elif user_input == "s":
            media_player.set_pause(True)
        elif user_input == "p":
            is_media_playing = media_player.is_playing()

            if is_media_playing:
                media_player.set_pause(False)
            else:
                media_player.play()
        elif user_input == "y":
            media_player.stop()
            is_finish = True

            liked_music_infos.append(music_info)
        elif user_input == "n":
            media_player.stop()
            is_finish = True


In [None]:
from math import sqrt

user_music_infos = liked_music_infos
user_music_ids = list(map(lambda x: x["id"], user_music_infos))

user_music_df = df[df["id"].isin(user_music_ids)].copy().reset_index(drop=True)
recommended_df = df[~df["id"].isin(user_music_ids)].copy().reset_index(drop=True)

recommended_music_indexs = []

for _, user_music in user_music_df.iterrows():
    cluster = user_music["cluster"]
    position_x = user_music["position_x"]
    position_y = user_music["position_y"]

    recommended_music_index = None
    recommended_distance = None

    recommended_in_clusters = recommended_df[recommended_df["cluster"] == cluster]
    for index, recommended in recommended_in_clusters.iterrows():
        recommended_position_x = recommended["position_x"]
        recommended_position_y = recommended["position_y"]

        distance = sqrt(
            ((position_x - recommended_position_x) ** 2)
            + ((position_y - recommended_position_y) ** 2)
        )

        if (
            (recommended_music_index is None)
            or (recommended_distance is None)
            or (distance < recommended_distance)
        ):
            recommended_music_index = index
            recommended_distance = distance

    recommended_df.drop([recommended_music_index], inplace=True)

    recommended_music_indexs.append(recommended_music_index)

recommended_musics = df.iloc[recommended_music_indexs]
recommended_musics
