In [None]:
import pandas as pd
import numpy as np
import spotipy as spy
import plotly.express as px
import os
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics.pairwise import euclidean_distances
from spotipy.oauth2 import SpotifyOAuth
from spotipy.oauth2 import SpotifyClientCredentials
from skimage import io


In [None]:
with open('../.env') as file:
    for line in file:
        if line.startswith('#') or not line.strip():
            continue

        key, value = line.strip().split('=', 1)
        os.environ[key] = value


In [None]:
dados = pd.read_csv("../data/files/dados_reduzidos.csv",
                    sep=",", encoding="utf-8")

dados_generos = pd.read_csv(
    "../data/files/data_by_genres.csv", sep=",", encoding="utf-8")

dados_anos = pd.read_csv(
    "../data/files/data_by_year.csv", sep=",", encoding="utf-8")

In [None]:
dados_generos.head(2)


In [None]:
dados_anos.head(2)


In [None]:
dados.head(2)


In [None]:
dados = dados[dados.year >= 2000]
dados["year"].unique()


In [None]:
dados.shape


In [None]:
dados = dados.drop(["explicit", "key", "mode"], axis=1)


In [None]:
dados.shape


In [None]:
dados.isnull().sum()


In [None]:
dados.isna().sum()


In [None]:
dados_generos.head(2)


In [None]:
dados_generos = dados_generos.drop(["key", "mode"], axis=1)
dados_generos.head(2)


In [None]:
dados_generos.isna().sum()


In [None]:
dados_generos.isnull().sum()


In [None]:
dados_anos.head()


In [None]:
dados_anos.year.unique()


In [None]:
dados_anos = dados_anos[dados_anos.year >= 2000]
dados_anos = dados_anos.drop(["key", "mode"], axis=1)
dados_anos.year.unique()


In [None]:
dados_anos.reset_index()


In [None]:
fig = px.line(dados_anos, x="year", y="loudness",
              title="Variação do loudness conforme os anos")
fig.show()


In [None]:
import plotly.graph_objects as go


In [None]:
fig = go.Figure()


In [None]:
fig.add_trace(go.Scatter(
    x=dados_anos["year"], y=dados_anos["acousticness"], name="Acousticness"))
fig.add_trace(go.Scatter(
    x=dados_anos["year"], y=dados_anos["valence"], name="Valence"))
fig.add_trace(go.Scatter(
    x=dados_anos["year"], y=dados_anos["danceability"], name="Danceability"))
fig.add_trace(go.Scatter(
    x=dados_anos["year"], y=dados_anos["energy"], name="Energy"))
fig.add_trace(go.Scatter(
    x=dados_anos["year"], y=dados_anos["instrumentalness"], name="Instrumentalness"))
fig.add_trace(go.Scatter(
    x=dados_anos["year"], y=dados_anos["liveness"], name="Liveness"))
fig.add_trace(go.Scatter(
    x=dados_anos["year"], y=dados_anos["speechiness"], name="Speechiness"))
fig.show()


In [None]:
dados_generos.head(2)


In [None]:
dados_generos["genres"].value_counts().sum()


In [None]:
dados_generos1 = dados_generos.drop('genres', axis=1)
dados_generos1.head(2)


In [None]:
SEED = 1234
np.random.seed(1224)

pca_pipeline = Pipeline([('scaler', StandardScaler()),
                        ('PCA', PCA(n_components=2, random_state=SEED))])

genre_embedding_pca = pca_pipeline.fit_transform(dados_generos1)

projection = pd.DataFrame(columns=['x', 'y'], data=genre_embedding_pca)


In [None]:
print(f"Projection: {projection.shape}")

In [None]:
k_means_pca = KMeans(n_clusters=5, verbose=False, random_state=SEED)
k_means_pca.fit(projection)

dados_generos["cluster_pca"] = k_means_pca.predict(projection)
projection["cluster_pca"] = k_means_pca.predict(projection)


In [None]:
projection["generos"] = dados_generos['genres']
projection.head()


In [None]:
fig = px.scatter(projection, x="x", y="y", color="cluster_pca",
                 hover_data=['x', 'y', 'generos'])
fig.show()


In [None]:
# quantidade de radio que são explicadas
pca_pipeline[1].explained_variance_ratio_.sum()


In [None]:
# quantidade de colunas ou clusters que são explicadas
pca_pipeline[1].explained_variance_.sum()


<h1>Fazendo Cluster com Músicas</h1>

<h1></h1>

In [None]:
dados.head()

In [None]:
music_dumie = pd.concat([dados, pd.get_dummies(
    dados['artists'], prefix='artists')], axis=1).drop(['artists'], axis=1)
music_dumie.head()

In [None]:
print(f"dados ==> {dados.shape}")
print(f'dados dummies ==> {music_dumie.shape}')


In [None]:
pca_pipeline = Pipeline([('scaler', StandardScaler()),
                        ('PCA', PCA(n_components=0.7, random_state=SEED))])

music_embedding_pca = pca_pipeline.fit_transform(music_dumie.drop(
    ['id', 'name', 'artists_song', 'release_date'], axis=1))

projection_music = pd.DataFrame(data=music_embedding_pca)

In [None]:
projection_music.head()

In [None]:
print(f'components embdding ==> {pca_pipeline[1].n_components_}')
print(f'projection music ==> {projection_music.shape[1]}')


In [None]:
# Aplicando cluster com k-means
k_means_pca_pipeline = KMeans(n_clusters=50, verbose=False, random_state=SEED)
k_means_pca_pipeline.fit(projection_music)

dados["cluster_pca"] = k_means_pca_pipeline.predict(projection_music)
projection_music["cluster_pca"] = k_means_pca_pipeline.predict(projection_music)


In [None]:
projection_music['artist'] = dados['artists'].tolist()
projection_music['song'] = dados['artists_song'].tolist()
projection_music['id'] = dados['id'].tolist()


In [None]:
projection_music.head()


In [None]:
fig = px.scatter(projection_music, x=0, y=1, color="cluster_pca",
                 hover_data=[0, 1, 'song'])
fig.show()


<h1>Sistema de Recomendação</h1>

In [None]:
nome_musica = "Ed Sheeran - Shape of You"

In [None]:
# find cluster
cluster_found = list(projection_music[projection_music['song'] == nome_musica]['cluster_pca'])[0]

In [None]:
# music recommended
music_recommended = projection_music[projection_music['cluster_pca'] == cluster_found][[0, 1, 'id','song', 'artist']]
music_recommended.head()

In [None]:
x_musica = list(projection_music[projection_music['song'] ==nome_musica][0])[0]
y_musica = list(projection_music[projection_music['song'] ==nome_musica][1])[0]

In [None]:
distances = euclidean_distances(music_recommended[[0, 1]], [[x_musica, y_musica]])
music_recommended['distances'] = distances
recommended = music_recommended.sort_values('distances').head(10)
recommended


<h1>Spotify</h1>

In [None]:
Oauth = SpotifyOAuth(
    scope=os.environ.get('spotify_scope'),
    redirect_uri=os.environ.get('spotify_redirect_url'),
    client_id=os.environ.get('spotify_client_id'),
    client_secret=os.environ.get('spotify_client_secret')
)


In [None]:
client_credential_manager = SpotifyClientCredentials(
    client_id=os.environ.get('spotify_client_id'),
    client_secret=os.environ.get('spotify_client_secret')
)
sp = spy.Spotify(client_credentials_manager=client_credential_manager)


In [None]:
id = dados[dados['artists_song'] == nome_musica]['id'].iloc[0]

In [60]:
def recommend_id(playlist):
    urls = []
    names = []

    for i in playlist:
        track = sp.track(i)
        urls.append(track['album']['images'][1]['url'])
        names.append(track['name'])
    return names, urls


In [63]:
names, urls = recommend_id(recommended['id'])
urls

['https://i.scdn.co/image/ab67616d00001e02ba5db46f4b838ef6027e6f96',
 'https://i.scdn.co/image/ab67616d00001e025ef878a782c987d38d82b605',
 'https://i.scdn.co/image/ab67616d00001e0288e3cda6d29b2552d4d6bc43',
 'https://i.scdn.co/image/ab67616d00001e0273304ce0653c7758dd94b259',
 'https://i.scdn.co/image/ab67616d00001e02b46b5fb6fdc6f5c720fcb183',
 'https://i.scdn.co/image/ab67616d00001e02891f0552bed344dc2bfe322f',
 'https://i.scdn.co/image/ab67616d00001e022e3aeb1879f589ff44304411',
 'https://i.scdn.co/image/ab67616d00001e0207aa1426cb2b3cfd8ad67c64',
 'https://i.scdn.co/image/ab67616d00001e022cbb5f97a0853d1c7cbbe231',
 'https://i.scdn.co/image/ab67616d00001e02f9f2d43ff44bdfbe8c556f8d']