## Recomendador básico

Instalar el Frameword Surprise

In [1]:
!pip install numpy
!pip install scikit-surprise



Importamos las bibliotecas necesarias para nuestro recomendador

In [2]:
# Cargamos las librerías necesarias de Pandas
import pandas as pd
import numpy as np

In [3]:
# Cargamos las librerías necesarias de Surprise
from surprise import Dataset
from surprise import Reader
from surprise import KNNBasic
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise import accuracy

Cargamos el dataset en pandas para luego pasarlo a surprise

In [4]:
# Cargamos el CSV con el dataframe de Pandas
df = pd.read_csv('songs_dataset.csv')

reader = Reader(rating_scale=(1, 30))

# Vamos a transformar los distintos géneros a números para que el recomendador funcione por recomendación por género
# Con esto evitamos los repetidos
distinct_genres = df['top genre'].unique()

# Asignamos números a los géneros
value_to_number = {genre: i+1 for i, genre in enumerate(distinct_genres)}

# Creamos una nueva columna que es la misma que géneros pero en números
df['genre number'] = df['top genre'].map(value_to_number)

# Load the Pandas DataFrame into a Surprise Dataset
data = Dataset.load_from_df(df[['artist', 'title', 'genre number']], reader)

In [5]:
df

Unnamed: 0.1,Unnamed: 0,title,artist,top genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop,genre number
0,1,"Hey, Soul Sister",Train,neo mellow,2010,97,89,67,-4,8,80,217,19,4,83,1
1,2,Love The Way You Lie,Eminem,detroit hip hop,2010,87,93,75,-5,52,64,263,24,23,82,2
2,3,TiK ToK,Kesha,dance pop,2010,120,84,76,-3,29,71,200,10,14,80,3
3,4,Bad Romance,Lady Gaga,dance pop,2010,119,92,70,-4,8,71,295,0,4,79,3
4,5,Just the Way You Are,Bruno Mars,pop,2010,109,84,64,-5,9,43,221,2,4,78,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
598,599,Find U Again (feat. Camila Cabello),Mark Ronson,dance pop,2019,104,66,61,-7,20,16,176,1,3,75,3
599,600,Cross Me (feat. Chance the Rapper & PnB Rock),Ed Sheeran,pop,2019,95,79,75,-6,7,61,206,21,12,75,4
600,601,"No Brainer (feat. Justin Bieber, Chance the Ra...",DJ Khaled,dance pop,2019,136,76,53,-5,9,65,260,7,34,70,3
601,602,Nothing Breaks Like a Heart (feat. Miley Cyrus),Mark Ronson,dance pop,2019,114,79,60,-6,42,24,217,1,7,69,3


Entrenar los datos

In [6]:
# Crear un modelo modelo k-NN
sim_options = {
    'name': 'cosine',  
    'user_based': False  # Recomendador basado en contenidos
}
clf = KNNBasic(sim_options=sim_options, k=50, verbose=True)

In [7]:
#Measures probar con RMSE o MAE, cuál sea el mejor
cv = cross_validate(clf, data, measures=['RMSE', 'MAE'], cv=7, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 7 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Mean    Std     
RMSE (testset)    12.3178 10.0481 10.2951 10.4023 11.0697 9.7654  8.7870  10.3836 1.0187  
MAE (testset)     8.0521  7.2663  7.2772  7.5710  7.8975  7.6030  6.8020  7.4956  0.3909  
Fit time          0.03    0.00    0.00    0.00    0.02    0.02    0.02    0.01    0.01    
Test time         0.00    0.00    0.00    0.0

Función para determinar canciones similares

In [8]:
#Para hacer la búsqueda si la canción está en los datos entrenados
items_ids = [clf.trainset.to_raw_iid(iid) for iid in clf.trainset.all_items()]

In [9]:
def get_similar_songs(song_title, k=10):
    try: 
        # Comprueba si la canción está en los datos entrenados
        if song_title not in items_ids:
            return []  # Devuelve una lista vacía si no está entre los datos entrenados

        # Obtiene el id de la canción para luego pasarlo al algoritmo
        song_id = clf.trainset.to_inner_iid(song_title)

        # Usa el modelo k-NN para encontrar canciones similares
        similar_items = clf.get_neighbors(song_id, k)

        # Una vez tiene los ids de las canciones similares se pasa a los nombres reales de las canciones
        similar_songs = [clf.trainset.to_raw_iid(item_id) for item_id in similar_items]

        return similar_songs

    except IndexError as e:
        return []

Generar una "playlist" random con 10 canciones

In [10]:
import random

random_songs = df.sample(n=10)

In [11]:
random_songs

Unnamed: 0.1,Unnamed: 0,title,artist,top genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop,genre number
114,115,Stronger (What Doesn't Kill You),Kelly Clarkson,dance pop,2012,116,94,56,-4,11,68,222,5,5,74,3
11,12,Marry You,Bruno Mars,pop,2010,145,83,62,-5,10,48,230,33,4,73,4
350,351,"Celebrate (From the Original Motion Picture ""P...",Pitbull,dance pop,2015,127,85,68,-3,6,93,193,1,6,50,3
95,96,Castle Walls (feat. Christina Aguilera),T.I.,atl hip hop,2011,80,86,45,-5,26,58,329,7,39,49,8
73,74,Yeah 3x,Chris Brown,dance pop,2011,130,88,71,-3,9,70,242,0,4,69,3
443,444,Shape of You,Ed Sheeran,pop,2017,96,65,83,-3,9,93,234,58,8,87,4
460,461,Praying,Kesha,dance pop,2017,73,39,58,-7,11,32,230,49,3,77,3
109,110,I Knew You Were Trouble.,Taylor Swift,pop,2012,77,47,62,-7,3,68,220,0,4,77,4
3,4,Bad Romance,Lady Gaga,dance pop,2010,119,92,70,-4,8,71,295,0,4,79,3
445,446,Starboy,The Weeknd,canadian contemporary r&b,2017,186,59,68,-7,14,49,230,14,28,85,33


Conseguir las recomendaciones 

In [12]:
similar_songs = []

for song_title in random_songs['title']:
    sim_songs = get_similar_songs(song_title, k=10)    
    
    #Para evitar canciones ya recomendadas antes
    for s in sim_songs:
        if s not in similar_songs:
            similar_songs.append(s)

In [13]:
similar_songs

['Finesse - Remix; feat. Cardi B',
 "That's What I Like",
 'Just the Way You Are',
 'Locked Out of Heaven',
 '24K Magic',
 'It Will Rain',
 'Treasure',
 'Grenade',
 'Young Girls',
 'Imma Be',
 'Give Me Everything',
 'Fireball (feat. John Ryan)',
 "Don't Stop the Party (feat. TJR)",
 'Outta Nowhere (feat. Danny Mercer)',
 'Time of Our Lives',
 "Messin' Around",
 'Feel This Moment (feat. Christina Aguilera)',
 'International Love',
 'Greenlight (feat. Flo Rida & LunchMoney Lewis)',
 'Genie In a Bottle',
 'Written in the Stars (feat. Eric Turner)',
 'All Around The World (La La La)',
 'Chained To The Rhythm',
 'Yesterday (feat. Bebe Rexha)',
 'Tequila',
 'Break Your Heart',
 'Your Love Is My Drug',
 'I Took A Pill In Ibiza - Seeb Remix',
 'No Guidance (feat. Drake)',
 'Turn Up the Music',
 "Don't",
 "I Don't Care (with Justin Bieber)",
 'South of the Border (feat. Camila Cabello & Cardi B)',
 'Thinking out Loud',
 'Beautiful People (feat. Khalid)',
 'Antisocial (with Travis Scott)',
 'Sin

Vamos a probar recomendación con una canción

In [14]:
similar_songs = get_similar_songs('Find U Again (feat. Camila Cabello)', k=10)
similar_songs

[]