In [5]:
import pandas as pd


dtype_triplets = {'user_id': str, 'song_id': str, 'listen_count': int}


triplets_df = pd.read_csv(triplets_file_path, header=0, names=['user_id', 'song_id', 'listen_count'], dtype=dtype_triplets)
song_data_df = pd.read_csv(song_data_file_path)

print("Triplets DataFrame:")
print(triplets_df.head())

print("\nSong Data DataFrame:")
print(song_data_df.head())


merged_df = pd.merge(triplets_df, song_data_df, on='song_id', how='left')


print("\nMerged DataFrame:")
print(merged_df.head())


Triplets DataFrame:
                                    user_id             song_id  listen_count
0  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOAKIMP12A8C130995             1
1  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBBMDR12A8C13253B             2
2  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBXHDL12A81C204C0             1
3  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SOBYHAJ12A6701BF1D             1
4  b80344d063b5ccb3212f76538f3d9e43d87dca9e  SODACBL12A8C13C273             1

Song Data DataFrame:
              song_id              title  \
0  SOQMMHC12AB0180CB8       Silent Night   
1  SOVFVAK12A8C1350D9        Tanssi vaan   
2  SOGTUKN12AB017F4F1  No One Could Ever   
3  SOBNYVR12A8C13558C      Si Vos Querés   
4  SOHSBXH12A8C13B0DF   Tangle Of Aspens   

                                release       artist_name  year  
0                 Monster Ballads X-Mas  Faster Pussy cat  2003  
1                           Karkuteillä  Karkkiautomaatti  1995  
2                          

In [None]:
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split


aggregated_df = merged_df.groupby(['user_id', 'song_id']).agg({'listen_count': 'sum'}).reset_index()


user_song_matrix = aggregated_df.pivot(index='user_id', columns='song_id', values='listen_count').fillna(0)

user_song_sparse_matrix = csr_matrix(user_song_matrix.values)


train_data, test_data = train_test_split(user_song_sparse_matrix, test_size=0.2, random_state=42)

print("Data preparation complete. Ready for model training.")

In [9]:
from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import train_test_split as surprise_train_test_split
import pandas as pd


reader = Reader(rating_scale=(1, aggregated_df['listen_count'].max()))
data = Dataset.load_from_df(aggregated_df[['user_id', 'song_id', 'listen_count']], reader)


trainset, testset = surprise_train_test_split(data, test_size=0.2, random_state=42)


svd = SVD()
svd.fit(trainset)
predictions = svd.test(testset)

accuracy.rmse(predictions)


RMSE: 2209.8328


2209.8328178818188

In [11]:
def get_top_n_recommendations(predictions, user_id, n=10):
  
    user_predictions = [pred for pred in predictions if pred.uid == user_id]
    user_predictions.sort(key=lambda x: x.est, reverse=True)
    
    
    top_n_songs = user_predictions[:n]
    top_n_song_ids = [pred.iid for pred in top_n_songs]
    
   
    return song_data_df[song_data_df['song_id'].isin(top_n_song_ids)][['song_id', 'title', 'artist_name']]


user_id = 'b80344d063b5ccb3212f76538f3d9e43d87dca9e'  
top_recommendations = get_top_n_recommendations(predictions, user_id, n=10)
print("Top 10 Recommendations for User:", user_id)
print(top_recommendations)


Top 10 Recommendations for User: b80344d063b5ccb3212f76538f3d9e43d87dca9e
                   song_id                          title  \
133174  SOBXHDL12A81C204C0                       Stronger   
147425  SOHQWYZ12A6D4FA701  Heaven's gonna burn your eyes   
176820  SOUKXIN12A8C133C7F                          Drive   
248353  SOXZQDE12A8C135833                     Right Back   
523183  SOVYIYI12A8C138D88            He Doesn't Know Why   
531659  SOQLCKR12A81C22440                Jewels And Gold   
594489  SOBXHDL12A81C204C0                       Stronger   
922538  SOWSPUS12AC468BEE3            Bigger Isn't Better   

                                        artist_name  
133174                                   Kanye West  
147425  Thievery Corporation feat. Emiliana Torrini  
176820                                      Incubus  
248353                                      Sublime  
523183                                  Fleet Foxes  
531659                          Angus & Julia Stone 