In [18]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import json
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import ndcg_score
from scipy.stats import rankdata
from scipy.sparse import csr_matrix
from sklearn.preprocessing import StandardScaler

In [27]:
file_list = ["playlist.json"] #, "playlist2.json", "playlist3.json", "playlist4.json", "playlist5.json"]

In [63]:
def find_similar_playlists(playlist_id, similarity_matrix, number_of_playlists = 10):
    playlist_similarity_scores = similarity_matrix[playlist_id]
    similar_playlist_indices = playlist_similarity_scores.argsort()[:-number_of_playlists-2:-1]
    similar_playlist_indices = [index for index in similar_playlist_indices if index != playlist_id]
    similar_playlist_scores = playlist_similarity_scores[similar_playlist_indices]
    return similar_playlist_indices, similar_playlist_scores

In [85]:
for filename in file_list:
    file_data = open(f'./{filename}', 'r')
    playlists = json.load(file_data)
    all_songs = list(set(song for user in playlists for song in user))
    song_to_index = {song: i for i, song in enumerate(all_songs)}
    playlist_songs = np.zeros((len(playlists), len(all_songs))).astype('int')
    for i, playlist in enumerate(playlists):
        for song in playlist:
            playlist_songs[i, song_to_index[song]] = 1

    playlist_songs_copy = np.copy(playlist_songs)
    # Calculate cosine similarity matrix using training data
    similarity_matrix = cosine_similarity(playlist_songs_copy)
    similar_playlist_indices, similar_playlist_scores = find_similar_playlists(20, similarity_matrix)

    recommendation_points = np.sum(playlist_songs_copy[similar_playlist_indices] * similar_playlist_scores[:, np.newaxis], axis = 0)
    total_recommend_list_point = np.sum(similar_playlist_scores)
    sorted_indices = np.argsort(recommendation_points)[::-1]
    
    # Create an index_to_song dictionary
    index_to_song = {index: song for song, index in song_to_index.items()}

    # Print the indices of recommended songs along with their corresponding songs
    print(index_to_song[20])
    for sorted_index in sorted_indices[:20]:
        print(f"Song Index: {sorted_index}, Song: {index_to_song[sorted_index]}")

30RRzWqys8uIH4PM5cfPU2
Song Index: 29490, Song: 2cZrrQMjB63c0iIugYH9zS
Song Index: 6315, Song: 0YJPnuDiRc85oU1FdWa4ij
Song Index: 26643, Song: 0M1JWs0Bqk5MatyRq2CfKY
Song Index: 4121, Song: 5dTrfP8uUjHyjO23PxjHi2
Song Index: 12676, Song: 1NhjYYcYTRywc0di98xHxf
Song Index: 26979, Song: 219JFLfxvaJohp4D5S3Unf
Song Index: 23932, Song: 7dfSLUvvnO63sp6o6uvMA5
Song Index: 33070, Song: 7oRpS6xml8gG9l4XWFk41a
Song Index: 10327, Song: 34brgtDHRFbjKR4yHIGI31
Song Index: 27329, Song: 0LzidBf7cUsnZnG34OUPSF
Song Index: 23923, Song: 5F7AoCwgftgUdGUgd57RVE
Song Index: 24342, Song: 3lEgJXucXfkLyfONuUwC9K
Song Index: 25785, Song: 1qd1ipkgtdhbiJ2UiAH1HM
Song Index: 27340, Song: 22DRA2em8kN01PTtasTPXv
Song Index: 29299, Song: 0GX9k4ni4IDqU1vRHVPmiw
Song Index: 1592, Song: 1Eu00VxwsLMjHI6nQZ9RtW
Song Index: 19708, Song: 6xtikyQjxhR55y46lzp9W9
Song Index: 24531, Song: 0OOc4CWq6Cg8fTFE1vCvwS
Song Index: 21283, Song: 5au3jBfE3ySuVd3rg0UbYO
Song Index: 28815, Song: 1VziLmorXUGxwBPplIy09f


In [None]:
def reciprocal_rank(ranking):
    return 1 / ranking[0]

def ndcg_at_k(ranking, k):
    ideal_ranking = sorted(ranking, reverse=True)[:k]
    dcg = np.sum([(2**rel - 1) / np.log2(i + 2) for i, rel in enumerate(ranking)])
    idcg = np.sum([(2**rel - 1) / np.log2(i + 2) for i, rel in enumerate(ideal_ranking)])
    return dcg / idcg if idcg > 0 else 0