In [34]:
import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.data import Dataset

In [35]:
import numpy as np
import scipy.sparse as sparse
from lightfm import LightFM
from sklearn.model_selection import train_test_split

class SongRecommender:
    def __init__(self, learning_rate=0.05, epochs=50, num_components=30, loss='warp'):
        self.model = LightFM(learning_rate=learning_rate,
                            no_components=num_components,
                            loss=loss)
        self.epochs = epochs
        
    def create_interaction_matrix(self, user_ids, song_ids, play_counts):
        self.user_mapping = {user: idx for idx, user in enumerate(np.unique(user_ids))}
        self.song_mapping = {song: idx for idx, song in enumerate(np.unique(song_ids))}
        
        mapped_users = [self.user_mapping[user] for user in user_ids]
        mapped_songs = [self.song_mapping[song] for song in song_ids]
        
        n_users = len(self.user_mapping)
        n_songs = len(self.song_mapping)
        
        numeric_play_counts = np.array(play_counts).astype(np.float32)
        normalized_counts = np.log1p(numeric_play_counts)
        
        interaction_matrix = sparse.coo_matrix(
            (normalized_counts, (mapped_users, mapped_songs)),
            shape=(n_users, n_songs)
        )
        
        return interaction_matrix.tocsr()
    
    def fit(self, interaction_matrix):
        self.model.fit(interaction_matrix,
                      epochs=self.epochs,
                      num_threads=4,
                      verbose=True)
        
    def recommend_songs(self, user_id, n_recommendations=10):
        if user_id not in self.user_mapping:
            raise ValueError("User ID not found in training data")
            
        user_idx = self.user_mapping[user_id]
        n_songs = len(self.song_mapping)
        
        scores = self.model.predict(user_idx, np.arange(n_songs))
        
        top_song_indices = np.argsort(-scores)[:n_recommendations]
        
        reverse_mapping = {idx: song for song, idx in self.song_mapping.items()}
        recommendations = [reverse_mapping[idx] for idx in top_song_indices]
        
        return recommendations, scores[top_song_indices]

In [36]:
df = pd.read_csv('song_dataset.csv', header=None, 
                 names=['user_id', 'song_id', 'play_count', 'title', 'album', 'artist', 'year'])

df = df[1:]


In [37]:
# Building the interactions matrix https://making.lyst.com/lightfm/docs/lightfm.data.html
# Some play_count values are missing, so we'll fill them with 0
# and convert the column to numeric because it's currently a string
'''TODO: Decide what we do with string values'''
recomender = SongRecommender()

interaction_matrix = recomender.create_interaction_matrix(df['user_id'], df['song_id'], df['play_count'])
recomender.fit(interaction_matrix)



Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
Epoch 10
Epoch 11
Epoch 12
Epoch 13
Epoch 14
Epoch 15
Epoch 16
Epoch 17
Epoch 18
Epoch 19
Epoch 20
Epoch 21
Epoch 22
Epoch 23
Epoch 24
Epoch 25
Epoch 26
Epoch 27
Epoch 28
Epoch 29
Epoch 30
Epoch 31
Epoch 32
Epoch 33
Epoch 34
Epoch 35
Epoch 36
Epoch 37
Epoch 38
Epoch 39
Epoch 40
Epoch 41
Epoch 42
Epoch 43
Epoch 44
Epoch 45
Epoch 46
Epoch 47
Epoch 48
Epoch 49


In [47]:
USER_INDEX = 1
(song_id, score) = recomender.recommend_songs(df['user_id'][USER_INDEX])

print(f"Recommendations for user {df['user_id'][USER_INDEX]}")
for i, song in enumerate(song_id):
    #get the song names
    print(f"{i+1}. {df[df['song_id'] == song]['title'].values[0]} with score {score[i]}")

Recommendations for user b80344d063b5ccb3212f76538f3d9e43d87dca9e
1. Givin' It Up (Album Version) with score 0.4817662537097931
2. Wa Winjigo Ero with score 0.2768052816390991
3. Speed Of Sound with score 0.1840609461069107
4. Nice To Fit In with score 0.17327721416950226
5. It's a Trip with score 0.13761737942695618
6. Food Chain (Album Version) with score 0.1324848234653473
7. My Love Has Gone with score 0.056497592478990555
8. Chiro with score 0.045754846185445786
9. Leap Year with score -0.029971202835440636
10. Neural Rust with score -0.03608044609427452


In [42]:
#https://stackoverflow.com/questions/68857138/predict-new-user-using-lightfm

In [43]:
new_user = "new_user"
five_random_songs = df.sample(5)['song_id'].values
playcounts = [1,2,4,5,6]


