In [48]:
pip install python-dotenv spotipy

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.1.1 -> 24.2
[notice] To update, run: C:\Users\rodyv\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


# Extraccion de canciones "Me gusta" y creacion de playlist

## Obtencion de canciones con me gusta

In [49]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

### Autenticacion

In [50]:
from dotenv import load_dotenv
import os
import spotipy
from spotipy.oauth2 import SpotifyOAuth

# Cargar las variables desde el archivo .env
load_dotenv(".env")

scope = 'user-library-read,playlist-modify-public,playlist-modify-private'

sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=os.getenv('SPOTIPY_CLIENT_ID'),
    client_secret=os.getenv('SPOTIPY_CLIENT_SECRET'),
    redirect_uri=os.getenv('SPOTIPY_REDIRECT_URI'),
    scope=scope
))


### Obtener canciones

In [51]:

results = sp.current_user_saved_tracks()
tracks = results['items']

while results['next']:
    results = sp.next(results)
    tracks.extend(results['items'])

print(f'Total de canciones: {len(tracks)}')


Total de canciones: 573


### DF con las canciones y sus generos asociados

In [67]:
# Extraer detalles de las canciones
track_data = []
for item in tracks:
    track = item['track']
    artist_info = sp.artist(track['artists'][0]['id'])
    track_data.append({
        'id': track['id'],
        'name': track['name'],
        'artist': track['artists'][0]['name'],
        'album': track['album']['name'],
        'genre': artist_info['genres']
    })


df = pd.DataFrame(track_data)
df.head()


[{'danceability': 0.761, 'energy': 0.535, 'key': 8, 'loudness': -7.665, 'mode': 1, 'speechiness': 0.06, 'acousticness': 0.882, 'instrumentalness': 0, 'liveness': 0.183, 'valence': 0.899, 'tempo': 90.965, 'type': 'audio_features', 'id': '1qgtSWA2GbpAXfyC51M1ph', 'uri': 'spotify:track:1qgtSWA2GbpAXfyC51M1ph', 'track_href': 'https://api.spotify.com/v1/tracks/1qgtSWA2GbpAXfyC51M1ph', 'analysis_url': 'https://api.spotify.com/v1/audio-analysis/1qgtSWA2GbpAXfyC51M1ph', 'duration_ms': 153209, 'time_signature': 4}]


Unnamed: 0,id,name,artist,album,genre
0,1qgtSWA2GbpAXfyC51M1ph,Por Ti,4x3,Epílogo,[latin viral rap]


In [53]:
# todos los géneros
all_genres = set()
for genres in df['genre']:
    all_genres.update(genres)

all_genres = list(all_genres)
print(all_genres)
print(f'Total de géneros: {len(all_genres)}')

['dance rock', 'bases de freestyle', 'rap marseille', 'french soundtrack', 'viking metal', 'slavic folk metal', 'urbano mexicano', 'folk', 'nueva trova chilena', 'canadian metal', 'folk-pop', 'pop venezolano', 'rap metal espanol', 'drill espanol', 'urban contemporary', 'power metal', 'lo-fi vgm', 'mexican rock', 'ska mexicano', 'colombian indie', 'mexican hip hop', 'alternative rock', 'argentine alternative rock', 'icelandic indie', 'indie valenciana', 'gym phonk', 'pop rap', 'latin metal', 'canadian electropop', 'metalcore', 'glam punk', 'blues rock', 'reggae catala', 'russian metal', 'dream pop', 'neo-psychedelic', 'mexican pop', 'lounge', 'hopebeat', 'french hip hop', 'argentine telepop', 'spanish folk metal', 'hi-nrg', 'bedroom pop', 'argentine heavy metal', 'sad lo-fi', 'canto popular uruguayo', 'funk mtg', 'canadian punk', 'tatar pop', 'argentine reggae', 'hardcore punk espanol', 'disco', 'classic schlager', 'viral rap', "women's music", 'spanish rock', 'rap metal', 'rap', 'elect

sospechoso 

In [54]:
# ver si genero contine lgbtq o alguna variacion
lgbtq_genres = [genre for genre in all_genres if 'lgbtq' in genre.lower()]
print(lgbtq_genres)
# ver los registros que contienen lgbtq
lgbtq_tracks = df[df['genre'].apply(lambda x: any([genre in x for genre in lgbtq_genres]))]
lgbtq_tracks.head()


['lgbtq+ hip hop']


Unnamed: 0,name,artist,album,genre
403,INDUSTRY BABY (feat. Jack Harlow),Lil Nas X,INDUSTRY BABY (feat. Jack Harlow),[lgbtq+ hip hop]
480,Old Town Road - Remix,Lil Nas X,7 EP,[lgbtq+ hip hop]
559,MONTERO (Call Me By Your Name),Lil Nas X,MONTERO (Call Me By Your Name),[lgbtq+ hip hop]


## Clustering

La idea es usar TF-IDF (Term Frecuency - Inverse Document Frecuency) para representar los generos como vectores numericos

In [55]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans

In [56]:
genres_list = [' '.join(genres) for genres in df['genre']]
track_genre_mapping = []

for i, genres in enumerate(df['genre']):
    for genre in genres:
        track_genre_mapping.append({
            'track': df['name'][i],
            'artist': df['artist'][i],
            'genre': genre
        })

track_genre_df = pd.DataFrame(track_genre_mapping)

track_genre_df.head()

Unnamed: 0,track,artist,genre
0,Por Ti,4x3,latin viral rap
1,Locked out of Heaven,Bruno Mars,dance pop
2,Locked out of Heaven,Bruno Mars,pop
3,Umbrella,Rihanna,barbadian pop
4,Umbrella,Rihanna,pop


In [57]:
vectorizer = TfidfVectorizer(stop_words='english')
X = vectorizer.fit_transform(genres_list) 

### K-Means para agrupar las canciones
Se pueden seleccionar mejores parametros con el metodo del codo

In [58]:
n_clusters = 4  # establecido por preferencias personales

kmeans = KMeans(n_clusters=n_clusters, random_state=42)
df['cluster'] = kmeans.fit_predict(X)

# Mostrar las canciones agrupadas por cluster
for i in range(n_clusters):
    print(f"\nCluster {i+1} ({sum(df['cluster'] == i)} canciones):")
    cluster_tracks = df[df['cluster'] == i]
    print(cluster_tracks[['name', 'artist', 'genre']].head(5))


Cluster 1 (15 canciones):
                                                  name          artist  \
37   La leçon particulière - Bande originale du fil...     Francis Lai   
67                                             Idea 22  Gibran Alcocer   
105                                            Idea 10  Gibran Alcocer   
166                                          Haggstrom           Curly   
205                                        Kass' Theme           Mikel   

                                                 genre  
37   [classic soundtrack, french soundtrack, vintag...  
67                                     [ambient lo-fi]  
105                                    [ambient lo-fi]  
166                                        [lo-fi vgm]  
205                                        [lo-fi vgm]  

Cluster 2 (61 canciones):
                          name              artist  \
8                     El Viejo      La Vela Puerca   
12           Un problema menos  El Cuarteto De Nos 

In [61]:
for i in range(n_clusters):
    
    cluster_tracks = df[df['cluster'] == i] #
    if not cluster_tracks.empty:
        playlist_name = f"Cluster {i+1} Playlist"
        # podemos llamar a gpt para
        playlist = sp.user_playlist_create(user=sp.current_user()['id'], name=playlist_name) # Crear la playlist
        
        track_ids = []
        for _, row in cluster_tracks.iterrows():
            search_result = sp.search(q=f"{row['name']} artist:{row['artist']}", type='track')
            items = search_result['tracks']['items']
            if items:  # Verificar si hay resultados antes de acceder
                track_ids.append(items[0]['id']) # Añadir el ID de la primera pista
            else:
                print(f"No se encontró el track: {row['name']} de {row['artist']}") # Imprimir si no se encontró la pista

        # Añadir las pistas a la playlist
        if track_ids:
            sp.user_playlist_add_tracks(user=sp.current_user()['id'], playlist_id=playlist['id'], tracks=track_ids)
            print(f"Lista de reproducción creada: {playlist_name}")
        else:
            print(f"No se añadieron pistas a la playlist: {playlist_name}")


Lista de reproducción creada: Cluster 1 Playlist
Lista de reproducción creada: Cluster 2 Playlist
Lista de reproducción creada: Cluster 3 Playlist
No se encontró el track: You're so Cold de Two Feet
No se encontró el track: The Less I Know the Sexy Back de Gold Slides
No se encontró el track: Història d'Espanya (Explicada pels Espanyols) de Brams
No se encontró el track: The Most Mysterious Song on the Internet de Scooter
No se encontró el track: They Took the Like de BloodyD
No se encontró el track: L'acqua der tevere de Rancore
No se encontró el track: Danza dell'autumno rosa de Talco
No se encontró el track: thought it was (feat. Machine Gun Kelly & Travis Barker) de iann dior


HTTP Error for POST to https://api.spotify.com/v1/playlists/3sT2fz2bRZ82NRNn4MnQnl/tracks with Params: {'position': None} returned 400 due to Too many ids requested


SpotifyException: http status: 400, code:-1 - https://api.spotify.com/v1/playlists/3sT2fz2bRZ82NRNn4MnQnl/tracks:
 Too many ids requested, reason: None

In [10]:
df_tracks = pd.read_csv('PlaylistClasificador.csv')
df_tracks.describe()

Unnamed: 0,Duration (ms),Popularity,Danceability,Energy,Key,Loudness,Mode,Speechiness,Acousticness,Instrumentalness,Liveness,Valence,Tempo,Time Signature
count,585.0,585.0,585.0,585.0,585.0,585.0,585.0,585.0,585.0,585.0,585.0,585.0,585.0,585.0
mean,217778.347009,45.62735,0.61417,0.722972,5.333333,-6.637672,0.509402,0.103893,0.22263,0.072597,0.193325,0.605734,121.460453,3.916239
std,68669.094553,23.051267,0.150513,0.197879,3.580771,3.605142,0.500339,0.103385,0.266919,0.223944,0.150234,0.229037,31.481807,0.390152
min,57818.0,0.0,0.0,0.0146,0.0,-31.047,0.0,0.0,1.8e-05,0.0,0.0324,0.0,0.0,0.0
25%,177789.0,32.0,0.516,0.619,2.0,-7.708,0.0,0.0396,0.0173,0.0,0.0963,0.442,95.047,4.0
50%,211653.0,48.0,0.621,0.764,6.0,-5.762,1.0,0.0612,0.103,1e-06,0.136,0.622,116.047,4.0
75%,249360.0,64.0,0.721,0.87,9.0,-4.397,1.0,0.116,0.362,0.000384,0.25,0.797,144.968,4.0
max,891152.0,88.0,0.94,0.991,11.0,1.099,1.0,0.841,0.994,0.973,0.984,0.978,207.771,5.0
