In [1]:
import pandas as pd
import numpy as np
from lightfm import LightFM
from lightfm.data import Dataset



In [None]:
def create_model(df):
    dataset = Dataset()
    dataset.fit(df['user_id'].unique(), df['song_id'].unique())
    
    interactions = dataset.build_interactions(
        ((row['user_id'], row['song_id'], float(row['play_count'])) 
         for idx, row in df.iterrows())
    )[0]
    
    model = LightFM(loss='warp')
    model.fit(interactions, epochs=30, num_threads=2)
    
    return model, dataset

def get_recommendations(model, dataset, df, user_index, num_recommendations=5):
    target_user_id = df['user_id'].unique()[user_index]
    n_items = len(dataset.mapping()[2])
    user_id_map = dataset.mapping()[0]
    user_internal_id = user_id_map[target_user_id]
    
    scores = model.predict(user_internal_id, np.arange(n_items))
    top_items = np.argsort(-scores)[:num_recommendations]
    
    item_map = {v: k for k, v in dataset.mapping()[2].items()}
    return [item_map[i] for i in top_items], target_user_id

In [2]:
df = pd.read_csv('song_dataset.csv', header=None, 
                 names=['user_id', 'song_id', 'play_count', 'title', 'album', 'artist', 'year'])


In [None]:
# Building the interactions matrix https://making.lyst.com/lightfm/docs/lightfm.data.html
# Some play_count values are missing, so we'll fill them with 0
# and convert the column to numeric because it's currently a string
'''TODO: Decide what we do with string values'''

df['play_count'] = pd.to_numeric(df['play_count'], errors='coerce').fillna(0)

dataset = Dataset()
dataset.fit(df['user_id'].unique(), df['song_id'].unique())

interactions = dataset.build_interactions(
    ((row['user_id'], row['song_id'], row['play_count']) 
     for idx, row in df.iterrows())
)[0]

In [None]:
model = LightFM(loss='warp')
model.fit(interactions, epochs=100, num_threads=2)

<lightfm.lightfm.LightFM at 0x147872eb0>

In [None]:
TARGET_USER = "1"

In [None]:
first_user = df['user_id'].iloc[1]

most_listened_songs = df[df['user_id'] == first_user].sort_values(by='play_count', ascending=False).head(5)
print("5 Most Listened Songs by the First User:")
print(most_listened_songs[['title', 'artist', 'play_count']])

first_user_internal_id = user_id_map[first_user]

scores = model.predict(first_user_internal_id, np.arange(n_items))
top_items = np.argsort(-scores)[:5]

recommended_song_ids = [item_map[item] for item in top_items]
recommended_songs = df[df['song_id'].isin(recommended_song_ids)].drop_duplicates('song_id')
print("\n5 Recommended Songs for the First User:")
print(recommended_songs[['title', 'artist']])

5 Most Listened Songs by the First User:
                                  title               artist  play_count
101                           Moonshine         Jack Johnson         8.0
47     Behind The Sea [Live In Chicago]   Panic At The Disco         6.0
50                   Do You Wanna Dance        Bobby Freeman         6.0
15         Apuesta Por El Rock 'N' Roll  Héroes del Silencio         5.0
56   No So Silent Night (album version)           Bob Rivers         5.0

5 Recommended Songs for the First User:
                                                 title                  artist
1509                                        Joe's Head           Kings Of Leon
2435                          Dog Days Are Over (Demo)  Florence + The Machine
2787                                          Boadicea                    Enya
4261  The Remedy (I Won't Worry) (New EQ'd LP Version)              Jason Mraz
7350                                     Invisible Sun              The Police


In [None]:
random_songs = df.sample(5)

#create a new user
new_user_id = df["user_id"][5] + "42"
for idx, row in random_songs.iterrows():
    copy = row.copy()
    copy['user_id'] = new_user_id
    df = df.add(copy)

dataset.fit(df['user_id'].unique(), df['song_id'].unique())
interactions = dataset.build_interactions(
    ((row['user_id'], row['song_id'], row['play_count']) 
     for idx, row in df.iterrows())
)[0]

model.fit(interactions, epochs=100, num_threads=2)

In [28]:

most_listened_songs_new_user = df[df['user_id'] == new_user_id].sort_values(by='play_count', ascending=False).head(5)
print("\n5 Most Listened Songs by the New User:")
print(most_listened_songs_new_user[['title', 'artist', 'play_count']])

print(new_user_id)
new_user_internal_id = user_id_map[new_user_id]



5 Most Listened Songs by the New User:
Empty DataFrame
Columns: [title, artist, play_count]
Index: []
useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user111useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user111useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user111useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user1

KeyError: 'useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user111useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user111useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user111useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user111useruser1user1user1user1user1useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user11useruser1user1user1user1user1111'