In [3]:
import pandas as pd
import numpy as np
import random

from scipy.sparse import csr_matrix

In [5]:

playlists_df = pd.read_csv("data/spotify_df_cleaned.csv")
recommendationInfo_df = pd.read_csv("data/recommendation_info.csv")

print("Number of rows and columns:", playlists_df.shape)
print("Number of rows and columns:", recommendationInfo_df.shape)

print("Column names:", playlists_df.columns.tolist())
print("Column names:", recommendationInfo_df.columns.tolist())

Number of rows and columns: (32828, 21)
Number of rows and columns: (32828, 3)
Column names: ['track_popularity', 'playlist_name', 'playlist_subgenre', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'playlist_genre_edm', 'playlist_genre_latin', 'playlist_genre_pop', 'playlist_genre_r&b', 'playlist_genre_rap', 'playlist_genre_rock']
Column names: ['track_artist', 'track_name', 'track_album_name']


In [15]:
def norm(x):
    norm = np.linalg.norm(x)
    if norm == 0:
        return x
    return x / norm

track_index_map = {}
for i, track_id in enumerate(recommendationInfo_df['track_name']):
    track_index_map[track_id] = i

In [None]:
num_tracks = len(recommendationInfo_df)
num_playlists = len(playlists_df)
data = []
row_indices = []
col_indices = []
for playlist_idx, row in playlists_df.iterrows():
    track_ids = row['playlist_name'].split(',')
    for track_id in track_ids:
        if track_id in track_index_map:
            track_idx = track_index_map[track_id]
            data.append(1)
            row_indices.append(playlist_idx)
            col_indices.append(track_idx)
interaction_matrix = csr_matrix((data, (row_indices, col_indices)), shape=(num_playlists, num_tracks))
normalized_matrix = interaction_matrix.copy().astype(float)
for i in range(num_playlists):
    normalized_matrix[i] = norm(normalized_matrix[i].toarray().flatten())
interaction_matrix = normalized_matrix
def recommend_tracks(playlist_idx, interaction_matrix, top_k=10):
    playlist_vector = interaction_matrix[playlist_idx]
    scores = interaction_matrix.dot(playlist_vector.T).toarray().flatten()
    scores[interaction_matrix[playlist_idx].nonzero()[1]] = -1
    recommended_indices = np.argsort(scores)[-top_k:][::-1]
    return recommended_indices


KeyError: 'track_name'

In [None]:
#  Precompute item norms for cosine similarity
item_norms = np.sqrt(interaction_matrix.power(2).sum(axis=0)).A1
def cosine_similarity(vec_a, vec_b):
    dot_product = vec_a.dot(vec_b.T).toarray()[0, 0]
    norm_a = np.sqrt(vec_a.power(2).sum())
    norm_b = np.sqrt(vec_b.power(2).sum())
    if norm_a == 0 or norm_b == 0:
        return 0.0
    return dot_product / (norm_a * norm_b)
def recommend_tracks_cosine(playlist_idx, interaction_matrix, top_k=10):
    playlist_vector = interaction_matrix[playlist_idx]
    scores = interaction_matrix.dot(playlist_vector.T).toarray().flatten()
    playlist_norm = np.sqrt(playlist_vector.power(2).sum())
    scores = scores / (item_norms * playlist_norm + 1e-10)
    scores[interaction_matrix[playlist_idx].nonzero()[1]] = -1
    recommended_indices = np.argsort(scores)[-top_k:][::-1]
    return recommended_indices
# Example usage
playlist_idx = 0
recommended_indices = recommend_tracks_cosine(playlist_idx, interaction_matrix, top_k=10)
print("Recommended track indices for playlist {}: {}".format(playlist_idx, recommended_indices))