<a href="https://colab.research.google.com/github/Elimeleth/aprende_ml/blob/main/Songs_Recomendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install -qqq implicit

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[?25h

In [93]:
"""This module features functions and classes to manipulate data for the
collaborative filtering algorithm.
"""

from pathlib import Path

import scipy
import pandas as pd


def load_user_artists(user_artists_file: Path) -> scipy.sparse.csr_matrix:
    """Load the user artists file and return a user-artists matrix in csr
    fromat.
    """
    user_artists = pd.read_csv(user_artists_file, sep=" ")
    user_artists.set_index(["musicId", "artistId"], inplace=True)
    print(user_artists.head())
    coo = scipy.sparse.coo_matrix(
        (
            user_artists.weight.astype(float),
            (
                user_artists.index.get_level_values(0),
                user_artists.index.get_level_values(1),
            ),
        )
    )
    return coo.tocsr()


class ArtistRetriever:
    """The ArtistRetriever class gets the artist name from the artist ID."""

    def __init__(self):
        self._artists_df = None

    def get_artist_name_from_id(self, artist_id: int) -> str:
        """Return the artist name from the artist ID."""
        return self._artists_df.loc[artist_id].to_dict()

    def df(self):
      return self._artists_df

    def load_artists(self, artists_file: Path) -> None:
        """Load the artists file and stores it as a Pandas dataframe in a
        private attribute.
        """
        artists_df = pd.read_csv(artists_file, sep=" ")
        artists_df = artists_df.set_index("id")
        self._artists_df = artists_df

In [95]:

# El JSON que proporcionaste
data = [
    {"id": 1, "name": "Air Supply", "music_name": "All Out of Love"},
    {"id": 2, "name": "Lionel Richie", "music_name": "Hello"},
    {"id": 3, "name": "Richard Marx", "music_name": "Right Here Waiting"},
    {"id": 4, "name": "Chicago", "music_name": "Hard to Say I'm Sorry"},
    {"id": 5, "name": "Foreigner", "music_name": "I Want to Know What Love Is"},
    {"id": 6, "name": "REO Speedwagon", "music_name": "Can't Fight This Feeling"},
    {"id": 7, "name": "Journey", "music_name": "Don't Stop Believin'"},
    {"id": 8, "name": "Kenny Rogers", "music_name": "The Gambler"},
    {"id": 9, "name": "Phil Collins", "music_name": "In the Air Tonight"},
    {"id": 10, "name": "Bryan Adams", "music_name": "(Everything I Do) I Do It for You"},
    {"id": 11, "name": "Toto", "music_name": "Africa"},
    {"id": 12, "name": "Michael Bolton", "music_name": "How Am I Supposed to Live Without You"},
    {"id": 13, "name": "Spandau Ballet", "music_name": "True"},
    {"id": 14, "name": "Christopher Cross", "music_name": "Sailing"},
    {"id": 15, "name": "Bonnie Tyler", "music_name": "Total Eclipse of the Heart"},
    {"id": 16, "name": "Bee Gees", "music_name": "Stayin' Alive"},
    {"id": 17, "name": "Rod Stewart", "music_name": "Maggie May"},
    {"id": 18, "name": "Bread", "music_name": "Make It with You"},
    {"id": 19, "name": "Barry Manilow", "music_name": "Mandy"},
    {"id": 20, "name": "Elton John", "music_name": "Rocket Man"},
    {"id": 21, "name": "Lobo", "music_name": "I'd Love You to Want Me"},
    {"id": 22, "name": "Dan Hill", "music_name": "Sometimes When We Touch"},
    {"id": 23, "name": "Billy Joel", "music_name": "Piano Man"},
    {"id": 24, "name": "Bette Midler", "music_name": "The Rose"},
    {"id": 25, "name": "Carole King", "music_name": "It's Too Late"},
    {"id": 26, "name": "Kenny Loggins", "music_name": "Footloose"},
    {"id": 27, "name": "Celine Dion", "music_name": "My Heart Will Go On"},
    {"id": 28, "name": "Peabo Bryson", "music_name": "A Whole New World"},
    {"id": 29, "name": "Dolly Parton", "music_name": "Jolene"},
    {"id": 30, "name": "Neil Diamond", "music_name": "Sweet Caroline"},
    {"id": 31, "name": "Barbra Streisand", "music_name": "Woman in Love"},
    {"id": 32, "name": "Peter Cetera", "music_name": "Glory of Love"},
    {"id": 33, "name": "Roxette", "music_name": "It Must Have Been Love"},
    {"id": 34, "name": "Hall & Oates", "music_name": "Maneater"},
    {"id": 35, "name": "Anne Murray", "music_name": "Snowbird"},
    {"id": 36, "name": "Dionne Warwick", "music_name": "Walk On By"},
    {"id": 37, "name": "Leo Sayer", "music_name": "You Make Me Feel Like Dancing"},
    {"id": 38, "name": "Sheena Easton", "music_name": "Morning Train (Nine to Five)"},
    {"id": 39, "name": "Olivia Newton-John", "music_name": "Physical"},
    {"id": 40, "name": "Stevie Wonder", "music_name": "Superstition"},
    {"id": 41, "name": "Debbie Gibson", "music_name": "Lost in Your Eyes"},
    {"id": 42, "name": "Paul Anka", "music_name": "Put Your Head on My Shoulder"},
    {"id": 43, "name": "James Ingram", "music_name": "Just Once"},
    {"id": 44, "name": "Tina Turner", "music_name": "What's Love Got to Do with It"},
    {"id": 45, "name": "Cliff Richard", "music_name": "We Don't Talk Anymore"},
    {"id": 46, "name": "Cyndi Lauper", "music_name": "Girls Just Want to Have Fun"},
    {"id": 47, "name": "John Denver", "music_name": "Take Me Home, Country Roads"},
    {"id": 48, "name": "Boz Scaggs", "music_name": "Lowdown"},
    {"id": 49, "name": "Patti LaBelle", "music_name": "Lady Marmalade"},
    {"id": 50, "name": "Pointer Sisters", "music_name": "I'm So Excited"}
]

# Convertir a DataFrame
df = pd.DataFrame(data)

dat_file_path = 'artists.dat'
df.to_csv(dat_file_path, sep=' ', index=False)
# Mostrar el DataFrame
df.head()

Unnamed: 0,id,name,music_name
0,1,Air Supply,All Out of Love
1,2,Lionel Richie,Hello
2,3,Richard Marx,Right Here Waiting
3,4,Chicago,Hard to Say I'm Sorry
4,5,Foreigner,I Want to Know What Love Is


In [96]:
artist_retriever = ArtistRetriever()
artist_retriever.load_artists(Path("artists.dat"))
artist = artist_retriever.get_artist_name_from_id(1)
print(artist)
artist_retriever.df().head()

{'name': 'Air Supply', 'music_name': 'All Out of Love'}


Unnamed: 0_level_0,name,music_name
id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Air Supply,All Out of Love
2,Lionel Richie,Hello
3,Richard Marx,Right Here Waiting
4,Chicago,Hard to Say I'm Sorry
5,Foreigner,I Want to Know What Love Is


In [97]:
"""This module features the ImplicitRecommender class that performs
recommendation using the implicit library.
"""


from pathlib import Path
from typing import Tuple, List

import implicit
import scipy

class ImplicitRecommender:
    """The ImplicitRecommender class computes recommendations for a given user
    using the implicit library.

    Attributes:
        - artist_retriever: an ArtistRetriever instance
        - implicit_model: an implicit model
    """

    def __init__(
        self,
        artist_retriever: ArtistRetriever,
        implicit_model: implicit.recommender_base.RecommenderBase,
    ):
        self.artist_retriever = artist_retriever
        self.implicit_model = implicit_model

    def fit(self, user_artists_matrix: scipy.sparse.csr_matrix) -> None:
        """Fit the model to the user artists matrix."""
        self.implicit_model.fit(user_artists_matrix)

    def recommend(
        self,
        user_id: int,
        user_artists_matrix: scipy.sparse.csr_matrix,
        n: int = 10,
    ) -> Tuple[List[str], List[float]]:
          """Return the top n recommendations for the given user."""
          artist_ids, scores = self.implicit_model.recommend(user_id, user_artists_matrix[n], N=n)
          artists = [
              self.artist_retriever.get_artist_name_from_id(artist_id)
              for artist_id in artist_ids
          ]
          return artists, scores

In [98]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Parámetros del dataset
n_users = 100  # Número de usuarios
n_artists = 50  # Número de artistas

# Generar datos aleatorios
np.random.seed(42)
music_ids = np.random.randint(1, n_users + 1, size=500)  # IDs de usuarios
artist_ids = np.random.randint(1, n_artists + 1, size=500)  # IDs de artistas

# Generar interacciones
reproductions = np.random.randint(0, 100, size=500)  # Número de reproducciones
favorites = np.random.randint(0, 2, size=500)  # 1 = Me gusta, 0 = No
skips = np.random.randint(0, 10, size=500)  # Número de saltos

# Generar datos adicionales para tiempo de reproducción
total_song_duration = np.random.randint(180, 360, size=500)  # Duración total de la canción (en segundos)
playback_time = np.random.randint(0, total_song_duration + 1, size=500)  # Tiempo de reproducción (en segundos)

# Crear DataFrame
df_root = pd.DataFrame({
    'music_id': music_ids,
    'artist_id': artist_ids,
    'reproductions': reproductions,
    'favorites': favorites,
    'skips': skips,
    'total_song_duration': total_song_duration,
    'playback_time': playback_time
})

df = df_root.copy()
# # Calcular el porcentaje de reproducción
df['playback_percentage'] = df['playback_time'] / df['total_song_duration']

# # Ajustar interacciones en función de las reproducciones, favoritos, saltos y porcentaje de reproducción
df['adjusted_interaction'] = (
    (df['reproductions'] + (5 * df['favorites'])) - (0.5 * df['skips'])
) * df['playback_percentage']

# Asegurarse de que los valores negativos sean ajustados a 0
df['adjusted_interaction'] = df['adjusted_interaction'].clip(lower=0)
# # Aplicar MinMaxScaler a las columnas que deben ser normalizadas
scaler = MinMaxScaler()

df[["adjusted_interaction"]] = scaler.fit_transform(
    df[["adjusted_interaction"]]
)

# Seleccionar columnas finales y renombrarlas
df = df[['music_id', 'artist_id', 'adjusted_interaction']].rename(columns={
    'music_id': 'musicId',
    'artist_id': 'artistId',
    'adjusted_interaction': 'weight'
}).fillna(0)

# rows_with_nan = df[df.isna().any(axis=1)].index
# print(rows_with_nan)

# # Guardar en formato .dat
dat_file_path = 'user_artists.dat'
df.to_csv(dat_file_path, sep=' ', index=False)

df.head()

Unnamed: 0,musicId,artistId,weight
0,52,12,0.371531
1,93,26,0.314161
2,15,16,0.027332
3,72,37,0.161505
4,61,22,0.153845


In [99]:

import time
# print results
start = time.time()

 # load user artists matrix
user_artists = load_user_artists(Path('user_artists.dat'))

# # instantiate artist retriever
artist_retriever = ArtistRetriever()
artist_retriever.load_artists(Path("artists.dat"))

# instantiate ALS using implicit
implict_model = implicit.als.AlternatingLeastSquares(
    factors=50, iterations=10, regularization=0.01
)

# instantiate recommender, fit, and recommend
recommender = ImplicitRecommender(artist_retriever, implict_model)
recommender.fit(user_artists)
artists, scores = recommender.recommend(72, user_artists, n=7)

for artist, score in zip(artists, scores):
    print(f"{artist}: {score}")

print(f"Tiempo de ejecución: {(time.time() - start)*60} segundos")

                    weight
musicId artistId          
52      12        0.371531
93      26        0.314161
15      16        0.027332
72      37        0.161505
61      22        0.153845


  0%|          | 0/10 [00:00<?, ?it/s]

{'name': 'Lobo', 'music_name': "I'd Love You to Want Me"}: 0.9742953777313232
{'name': 'Kenny Loggins', 'music_name': 'Footloose'}: 0.9694312214851379
{'name': 'Leo Sayer', 'music_name': 'You Make Me Feel Like Dancing'}: 0.967601478099823
{'name': 'Debbie Gibson', 'music_name': 'Lost in Your Eyes'}: 0.20487016439437866
{'name': 'REO Speedwagon', 'music_name': "Can't Fight This Feeling"}: 0.007460238412022591
{'name': 'Spandau Ballet', 'music_name': 'True'}: 0.004135109484195709
{'name': 'Kenny Rogers', 'music_name': 'The Gambler'}: 0.003856886178255081
Tiempo de ejecución: 7.066612243652344 segundos
