## Recomendador básico

Instalar el Frameword Surprise

In [1]:
!pip install numpy
!pip install scikit-surprise



Importamos las bibliotecas necesarias para nuestro recomendador

In [51]:
from surprise import Dataset
from surprise import Reader
from surprise import KNNBasic
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise import accuracy

Cargamos el dataset en pandas para luego pasarlo a surprise

In [52]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import KNNBasic

# Load the CSV data into a pandas DataFrame.
df = pd.read_csv('songs_dataset.csv')

# Define a reader to specify the rating scale
reader = Reader(rating_scale=(0, 100))  # Assuming the "pop" column represents ratings on a 0-100 scale

# Load the Pandas DataFrame into a Surprise Dataset
data = Dataset.load_from_df(df[['title', 'artist', 'pop']], reader)

Entrenar los datos

In [53]:
trainset, testset = train_test_split(data, test_size=0.2)

In [54]:
trainsetfull = data.build_full_trainset()

In [55]:
# Crear un modelo modelo k-NN
sim_options = {
    'name': 'pearson',  
    'user_based': False  # Recomendador basado en contenidos
}
clf = KNNBasic(sim_options=sim_options, k=20, verbose=True)

In [56]:
#Measures probar con RMSE o MAE, cuál sea el mejor
results = cross_validate(clf, data, measures=['RMSE'], cv=5, return_train_measures=True)

Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.
Computing the pearson similarity matrix...
Done computing similarity matrix.


In [57]:
results['test_rmse'].mean()

14.254662241012241

In [58]:
clf.fit(trainsetfull)

Computing the pearson similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x21f89ef2c10>

Función para determinar canciones similares

In [59]:
#Para hacer la búsqueda si la canción está en los datos entrenados
user_ids = [trainsetfull.to_raw_uid(uid) for uid in trainsetfull.all_users()]

In [60]:
def get_similar_songs(song_title, k=10):
    try: 
        # Comprueba si la canción está en los datos entrenados
        if song_title not in user_ids:
            return []  # Devuelve una lista vacía si no está entre los datos entrenados

        # Obtiene el id de la canción para luego pasarlo al algoritmo
        song_id = trainsetfull.to_inner_uid(song_title)

        # Usa el modelo k-NN para encontrar canciones similares
        similar_items = clf.get_neighbors(song_id, k)

        # Una vez tiene los ids de las canciones similares se pasa a los nombres reales de las canciones
        similar_songs = [trainsetfull.to_raw_uid(item_id) for item_id in similar_items]

        return similar_songs

    except IndexError as e:
        return []

Generar una "playlist" random con 10 canciones

In [61]:
import random

random_songs = df.sample(n=10)

In [62]:
random_songs

Unnamed: 0.1,Unnamed: 0,title,artist,top genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop
484,485,OK - Spotify Version,Robin Schulz,dance pop,2017,122,81,65,-6,13,58,189,12,6,69
589,590,Giant (with Rag'n'Bone Man),Calvin Harris,dance pop,2019,122,89,81,-4,8,61,229,2,4,84
4,5,Just the Way You Are,Bruno Mars,pop,2010,109,84,64,-5,9,43,221,2,4,78
228,229,Burn,Ellie Goulding,dance pop,2014,87,78,56,-5,11,33,231,31,4,71
392,393,Close,Nick Jonas,dance pop,2016,124,62,65,-5,14,40,234,25,8,72
564,565,Filthy,Justin Timberlake,dance pop,2018,97,58,75,-6,25,65,294,4,14,62
322,323,Sugar,Maroon 5,pop,2015,120,79,75,-7,9,88,235,6,3,66
499,500,Lust for Life (with The Weeknd),Lana Del Rey,art pop,2017,100,67,51,-9,36,27,264,58,6,54
488,489,Kissing Strangers,DNCE,dance pop,2017,120,74,77,-6,9,86,202,5,4,66
299,300,"Hey Mama (feat. Nicki Minaj, Bebe Rexha & Afro...",David Guetta,dance pop,2015,86,73,60,-4,33,53,193,24,15,72


Conseguir las recomendaciones 

In [63]:
similar_songs = []

for song_title in random_songs['title']:
    sim_songs = get_similar_songs(song_title, k=10)    
    
    #Para evitar canciones ya recomendadas antes
    for s in sim_songs:
        if s not in similar_songs:
            similar_songs.append(s)

In [64]:
for song in similar_songs:
    print(song)

Hey, Soul Sister
Love The Way You Lie
TiK ToK
Bad Romance
Baby
Dynamite
Secrets
Empire State of Mind (Part II) Broken Down
Only Girl (In The World)
Club Can't Handle Me (feat. David Guetta)
