## Recomendador básico

Instalar el Frameword Surprise

In [1]:
!pip install numpy
!pip install scikit-surprise



Importamos las bibliotecas necesarias para nuestro recomendador

In [2]:
# Cargamos las librerías necesarias de Pandas
import pandas as pd
import numpy as np

In [3]:
# Cargamos las librerías necesarias de Surprise
from surprise import Dataset
from surprise import Reader
from surprise import KNNBasic
from surprise.model_selection import train_test_split
from surprise.model_selection import cross_validate
from surprise import accuracy

Cargamos el dataset en pandas para luego pasarlo a surprise

In [4]:
# Cargamos el CSV con el dataframe de Pandas
df = pd.read_csv('songs_dataset.csv')

reader = Reader(rating_scale=(1, 30))

# Vamos a transformar los distintos géneros a números para que el recomendador funcione por recomendación por género
# Con esto evitamos los repetidos
distinct_genres = df['top genre'].unique()

# Asignamos números a los géneros
value_to_number = {genre: i+1 for i, genre in enumerate(distinct_genres)}

# Creamos una nueva columna que es la misma que géneros pero en números
df['genre number'] = df['top genre'].map(value_to_number)

# Load the Pandas DataFrame into a Surprise Dataset
data = Dataset.load_from_df(df[['artist', 'title', 'genre number']], reader)

In [5]:
df

Unnamed: 0.1,Unnamed: 0,title,artist,top genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop,genre number
0,1,"Hey, Soul Sister",Train,neo mellow,2010,97,89,67,-4,8,80,217,19,4,83,1
1,2,Love The Way You Lie,Eminem,detroit hip hop,2010,87,93,75,-5,52,64,263,24,23,82,2
2,3,TiK ToK,Kesha,dance pop,2010,120,84,76,-3,29,71,200,10,14,80,3
3,4,Bad Romance,Lady Gaga,dance pop,2010,119,92,70,-4,8,71,295,0,4,79,3
4,5,Just the Way You Are,Bruno Mars,pop,2010,109,84,64,-5,9,43,221,2,4,78,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
598,599,Find U Again (feat. Camila Cabello),Mark Ronson,dance pop,2019,104,66,61,-7,20,16,176,1,3,75,3
599,600,Cross Me (feat. Chance the Rapper & PnB Rock),Ed Sheeran,pop,2019,95,79,75,-6,7,61,206,21,12,75,4
600,601,"No Brainer (feat. Justin Bieber, Chance the Ra...",DJ Khaled,dance pop,2019,136,76,53,-5,9,65,260,7,34,70,3
601,602,Nothing Breaks Like a Heart (feat. Miley Cyrus),Mark Ronson,dance pop,2019,114,79,60,-6,42,24,217,1,7,69,3


Entrenar los datos

In [7]:
# Crear un modelo modelo k-NN
sim_options = {
    'name': 'cosine',  
    'user_based': False  # Recomendador basado en contenidos
}
clf = KNNBasic(sim_options=sim_options, k=50, verbose=True)

In [8]:
#Measures probar con RMSE o MAE, cuál sea el mejor
cv = cross_validate(clf, data, measures=['RMSE', 'MAE'], cv=7, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 7 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Mean    Std     
RMSE (testset)    8.2087  11.9439 10.6893 11.2006 10.9430 10.3284 9.4015  10.3879 1.1468  
MAE (testset)     6.3624  8.2033  7.1137  8.1497  7.8423  7.7904  7.0704  7.5046  0.6263  
Fit time          0.00    0.01    0.00    0.00    0.00    0.00    0.00    0.00    0.00    
Test time         0.00    0.00    0.00    0.0

Función para determinar canciones similares

In [10]:
#Para hacer la búsqueda si la canción está en los datos entrenados
items_ids = [clf.trainset.to_raw_iid(iid) for iid in clf.trainset.all_items()]

In [11]:
def get_similar_songs(song_title, k=10):
    try: 
        # Comprueba si la canción está en los datos entrenados
        if song_title not in items_ids:
            return []  # Devuelve una lista vacía si no está entre los datos entrenados

        # Obtiene el id de la canción para luego pasarlo al algoritmo
        song_id = clf.trainset.to_inner_iid(song_title)

        # Usa el modelo k-NN para encontrar canciones similares
        similar_items = clf.get_neighbors(song_id, k)

        # Una vez tiene los ids de las canciones similares se pasa a los nombres reales de las canciones
        similar_songs = [clf.trainset.to_raw_iid(item_id) for item_id in similar_items]

        return similar_songs

    except IndexError as e:
        return []

Generar una "playlist" random con 10 canciones

In [12]:
import random

random_songs = df.sample(n=10)

In [13]:
random_songs

Unnamed: 0.1,Unnamed: 0,title,artist,top genre,year,bpm,nrgy,dnce,dB,live,val,dur,acous,spch,pop,genre number
437,438,Little Lies,Hilary Duff,dance pop,2016,124,76,57,-6,4,40,204,0,6,36,3
426,427,Keeping Your Head Up,Birdy,neo mellow,2016,122,78,60,-5,19,53,208,16,5,52,1
101,102,Last Friday Night (T.G.I.F.),Katy Perry,dance pop,2011,126,81,65,-4,67,72,231,0,4,27,3
100,101,Moment 4 Life - Album Version (Edited),Nicki Minaj,dance pop,2011,130,88,50,-4,22,37,279,39,38,28,3
565,566,Never Be the Same - Radio Edit,Camila Cabello,dance pop,2018,130,69,63,-4,12,25,227,16,9,57,3
143,144,Hall of Fame,The Script,celtic rock,2013,85,87,42,-4,12,63,203,7,6,80,20
277,278,"Love Me Like You Do - From ""Fifty Shades Of Grey""",Ellie Goulding,dance pop,2015,190,61,26,-7,13,28,253,25,5,79,3
79,80,I Wanna Go,Britney Spears,dance pop,2011,130,55,70,-7,33,79,210,0,4,64,3
517,518,Dusk Till Dawn - Radio Edit,ZAYN,dance pop,2018,180,44,26,-7,11,10,239,10,4,83,3
125,126,Domino,Jessie J,australian pop,2012,127,55,76,-5,4,78,232,1,3,69,9


Conseguir las recomendaciones 

In [14]:
similar_songs = []

for song_title in random_songs['title']:
    sim_songs = get_similar_songs(song_title, k=10)    
    
    #Para evitar canciones ya recomendadas antes
    for s in sim_songs:
        if s not in similar_songs:
            similar_songs.append(s)

In [16]:
similar_songs

['Sparks',
 'Talk Dirty (feat. 2 Chainz)',
 'No Guidance (feat. Drake)',
 'Invading My Mind',
 'Sweet Nothing (feat. Florence Welch)',
 'Boom Clap - From the Motion Picture Das Schicksal ist ein mieser Verräter',
 'Bon appétit',
 'Break Your Heart',
 'Genie In a Bottle',
 'Till the World Ends',
 'Not About Angels',
 'Words as Weapons',
 'Tee Shirt - Soundtrack Version',
 'Wings',
 'Firework',
 'Rise',
 'E.T.',
 'Roar',
 'Chained To The Rhythm',
 'California Gurls',
 'Legendary Lovers',
 'Part Of Me',
 'The One That Got Away',
 'Havana (feat. Young Thug)',
 'How Do You Sleep?',
 'I Don’t Wanna Live Forever (Fifty Shades Darker)',
 'Let Me']

Vamos a probar recomendación con una canción

In [19]:
similar_songs = get_similar_songs('Find U Again (feat. Camila Cabello)', k=10)
similar_songs

['Nothing Breaks Like a Heart (feat. Miley Cyrus)',
 'Uptown Funk',
 'Talk Dirty (feat. 2 Chainz)',
 'No Guidance (feat. Drake)',
 'Invading My Mind',
 'Sweet Nothing (feat. Florence Welch)',
 'Boom Clap - From the Motion Picture Das Schicksal ist ein mieser Verräter',
 'Bon appétit',
 'Break Your Heart',
 'Genie In a Bottle']