In [44]:
from implicit.evaluation import leave_k_out_split, precision_at_k, train_test_split
# from implicitMusic import ImplicitRecommender, ArtistRetriever
import implicitMusic
import implicit
from pathlib import Path
import pandas as pd
import numpy as np
import heapq

music_path="../data/music_info.csv"
df_path="../data/normalized_filtered_user_listening.csv"
    
# load user artists matrix
user_artists = implicitMusic.load_user_artists(Path(df_path))

# instantiate artist retriever
artist_retriever = implicitMusic.ArtistRetriever()
artist_retriever.load_artists(Path(music_path),Path(df_path))

implict_model = implicit.cpu.bpr.BayesianPersonalizedRanking().load('800-BPS.npz')
recommender = implicitMusic.ImplicitRecommender(artist_retriever, implict_model)

# CSV files of Content-Based Results
indices = np.genfromtxt('../Content-Based/kNN-indices.csv', delimiter=',')
distances= np.genfromtxt('../Content-Based/kNN-distances.csv', delimiter=',')

# Example of how to use the results for recommendations
def get_recommendations(item_index, indices, distances, n_recommendations=5):
    # Get the indices of the recommended items
    recommended_indices = indices[item_index, 1:n_recommendations+1]  # Exclude the first item since it's the query item itself
    recommended_distances = distances[item_index, 1:n_recommendations+1]
    return recommended_indices,recommended_distances

df=pd.read_csv(df_path)
df_music=pd.read_csv(music_path)

users=df['user_id'].unique()
print('user count:',len(users))
listened_musics=df['track_id'].unique()
print('unique listened music count:',len(listened_musics))

def kNN_recommend(N,df,df_music,indices,distances,n_recommendations=5):
    all_recommendations={}
    tracks= df[df['user_id']==users[N]]['track_id']
    weights=df[df['user_id']==users[N]]['normalized_playcount']
    for i in range(len(tracks)):
        track=df_music[df_music['track_id']==tracks.iloc[i]].index[0]
        recommended_ind, recommended_dist=get_recommendations(track,indices,distances,n_recommendations)
        recommended_dist=recommended_dist/weights.iloc[i]
        for n in range(len(recommended_ind)):
            all_recommendations[recommended_ind[n]]=recommended_dist[n]
    recommendation_list=heapq.nsmallest(n_recommendations, all_recommendations, key=all_recommendations.get)
    return(recommendation_list)

user count: 692376
unique listened music count: 28597


In [59]:
User_N=45
# 3801 numbered user has 29 different musics
# 692373 numbered user has 35 different musics
# 693 numbered user has 2 different musics
# 45 numbered user has 5 different musics

# How many different music the user listened:
print(df[df['user_id']==users[User_N]].shape[0])
user_unique_music=df[df['user_id']==users[User_N]].shape[0]
if user_unique_music>3 and user_unique_music<=5:
    content_based_recommendations=kNN_recommend(User_N,df,df_music,indices,distances)
    music_id, artists, tracks, scores = recommender.recommend(User_N, user_artists, n=5)
    print("\n Recommended Musics:")
    for artist, track in zip(artists, tracks):
        print(f"{artist} by {track} (Collaborative Filtering Based)")
    for music_index in content_based_recommendations:
        print(f"{df_music.iloc[int(music_index)]['name']} by {df_music.iloc[int(music_index)]['artist']} (Content Based)")
elif user_unique_music<=3:
    content_based_recommendations=kNN_recommend(User_N,df,df_music,indices,distances,n_recommendations=10)
    for music_index in content_based_recommendations:
        print(f"{df_music.iloc[int(music_index)]['name']} by {df_music.iloc[int(music_index)]['artist']} (Content Based)")
elif user_unique_music>5:
    content_based_recommendations=kNN_recommend(User_N,df,df_music,indices,distances,n_recommendations=3)
    music_id, artists, tracks, scores = recommender.recommend(User_N, user_artists, n=7)
    print("\n Recommend Musics:")
    for artist, track in zip(artists, tracks):
        print(f"{artist} by {track} (Collaborative Filtering Based)")
    for music_index in content_based_recommendations:
        print(f"{df_music.iloc[int(music_index)]['name']} by {df_music.iloc[int(music_index)]['artist']} (Content Based)")

5

 Recommended Musics:
Some Kinda Love by The Velvet Underground (Collaborative Filtering Based)
Age of Consent by New Order (Collaborative Filtering Based)
I'm Sleeping in a Submarine by Arcade Fire (Collaborative Filtering Based)
Childhood Remembered by Kevin Kern (Collaborative Filtering Based)
Thieves Like Us by New Order (Collaborative Filtering Based)
Kein Mitleid by Eisbrecher (Content Based)
Tostaky (Le Continent) by Noir Désir (Content Based)
Easy Love by MSTRKRFT (Content Based)
Avantasia by Avantasia (Content Based)
Mysterious Skies by ATB (Content Based)


In [19]:
import pandas as pd

# Assuming 'df' is your DataFrame
user_music_counts = df.groupby('user_id').size()

# Count users with more than 5 different music tracks
users_above_threshold = (user_music_counts > 3).sum()

total_users = len(user_music_counts)

print(f"Number of users who listened to more than 5 different music tracks: {users_above_threshold}")
print(f"Percentage of users who listened to more than 5 different music tracks: {(users_above_threshold / total_users) * 100:.2f}%")

Number of users who listened to more than 5 different music tracks: 300053
Percentage of users who listened to more than 5 different music tracks: 43.34%
