# Customizing the recomendation pipeline for a Spotify user

In the last notebook, a collaborative filtering SVD model is trained to predict the preference of Spotify songs for a EchoNet user in the training set. To deploy the model for our MuseX Spotify Dashboard, there are still some gaps to close:

1) map the Spotify user information to a known Echonet user for inference, using cosine similarity

2) create checkpoints and curated data files for model deployment

In [None]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [10]:
MSD_listen = 'msd_songs.csv'
msd_songs = pd.read_csv(MSD_listen).iloc[:,:3].rename(columns={'song_id':'song_id_MSD'})
msd_songs

Unnamed: 0,user_id,song_id_MSD,count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1
1,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAPDEY12A81C210A9,1
2,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBBMDR12A8C13253B,2
3,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBFNSP12AF72A0E22,1
4,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOBFOVM12A58A7D494,1
...,...,...,...
36192630,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOUHHHH12AF729E4AF,2
36192631,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOUJVIT12A8C1451C1,1
36192632,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOUSMXX12AB0185C24,1
36192633,b7815dbb206eb2831ce0fe040d0aa537e2e800f7,SOWYSKH12AF72A303A,3


In [4]:
msd2spf = pd.read_csv("MSD2spotify.csv",names=['score','id_left', 'id_right',
                                                       'song_id_MSD', 'song_MSD', 'artist_MSD',
                                                        'song_id_SPF', 'song_SPF', 'artist_SPF', 'popularity',
                                                ])
msd2spf = msd2spf.loc[:, ['song_id_MSD', 'song_id_SPF']]
msd2spf

Unnamed: 0,song_id_MSD,song_id_SPF
0,SOAKIMP12A8C130995,27uOuxKlQE9c1i4bh89Wg5
1,SOAPDEY12A81C210A9,72PoJMDfdaw9gGECgA9kTZ
2,SOBBMDR12A8C13253B,0bV0jAPUTN9xdVcW0nFos2
3,SOBSUJE12A6D4F8CF5,2ECKXkpPAxky87ohawpaeD
4,SOBXHDL12A81C204C0,4fzsfWzRhPawzqhX8Qt9F3
...,...,...
16591,SOCGXNI12A8C136131,2ZqprVgnllancjXke8wLc5
16592,SODYPUW12AF729F2FC,5bNpYsNWgvF3kLC54vplp8
16593,SOHKFEZ12AB01871E6,5A8GIIn5vD5Jrm1etHXS3d
16594,SOKSYTG12A8C136080,2qzQZ23b0SXFwz812P83hn


In [5]:
msd2spf_dict = msd2spf.to_dict('list')
msd2spf_dict

{'song_id_MSD': ['SOAKIMP12A8C130995',
  'SOAPDEY12A81C210A9',
  'SOBBMDR12A8C13253B',
  'SOBSUJE12A6D4F8CF5',
  'SOBXHDL12A81C204C0',
  'SOBYHAJ12A6701BF1D',
  'SOCNMUH12A6D4F6E6D',
  'SODACBL12A8C13C273',
  'SODDNQT12A6D4F5F7E',
  'SODZWFT12A8C13C0E4',
  'SOEWFWM12A8C1308BA',
  'SOFFJPX12A6D4F7456',
  'SOFGUAY12AB017B0A8',
  'SOHQWYZ12A6D4FA701',
  'SOIYTOA12A6D4F9A23',
  'SOIZAZL12A6701C53B',
  'SOJNNUA12A8AE48C7A',
  'SOJPFQG12A58A7833A',
  'SOJTEDZ12A58A7CB3D',
  'SOKRIMP12A6D4F5DA3',
  'SOLGNOE12A8C139CA9',
  'SOLUHPJ12A8C13AB0F',
  'SOMGIYR12AB0187973',
  'SOMLMKI12A81C204BC',
  'SOMSQJY12A8C138539',
  'SOMZWUW12A8C1400BC',
  'SONRXOY12AB0181E84',
  'SONSAEZ12A8C138D7A',
  'SONVJAW12A8C139EFB',
  'SOOGGEX12A58A7DACF',
  'SOOKGRB12A8C13CD66',
  'SOPCVQE12AC468AF36',
  'SOPZAUC12A58A7DB24',
  'SOQIVUD12AB01821D2',
  'SOQJLDY12AAF3B456D',
  'SOQLCKR12A81C22440',
  'SOQMUDW12A58A7AF03',
  'SORPMYJ12AF729EB90',
  'SORUFVF12AB018230B',
  'SORWLTW12A670208FA',
  'SORZASF12A6D4F8CFA',
 

In [7]:
new_dict = dict(zip(msd2spf_dict['song_id_MSD'],msd2spf_dict['song_id_SPF']))
len(new_dict)

16596

In [None]:
msd_songs.to_csv('SPF_user_listen_count_new.csv', index=False)

In [11]:
SPF_listen = pd.merge(msd_songs, msd2spf, how='inner', on=['song_id_MSD'])
SPF_listen

Unnamed: 0,user_id,song_id_MSD,count,song_id_SPF
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,27uOuxKlQE9c1i4bh89Wg5
1,7c86176941718984fed11b7c0674ff04c029b480,SOAKIMP12A8C130995,1,27uOuxKlQE9c1i4bh89Wg5
2,76235885b32c4e8c82760c340dc54f9b608d7d7e,SOAKIMP12A8C130995,3,27uOuxKlQE9c1i4bh89Wg5
3,250c0fa2a77bc6695046e7c47882ecd85c42d748,SOAKIMP12A8C130995,1,27uOuxKlQE9c1i4bh89Wg5
4,3f73f44560e822344b0fb7c6b463869743eb9860,SOAKIMP12A8C130995,6,27uOuxKlQE9c1i4bh89Wg5
...,...,...,...,...
19199538,5c2d0d9194262ea44a55d32fc8defcf650f41996,SOHZIPR12A8C1350D3,1,0rT9f5BSZrZ6DvzfBEckG0
19199539,85bb0545b3f3aff3d149baeec60911a577cfce74,SOHZIPR12A8C1350D3,6,0rT9f5BSZrZ6DvzfBEckG0
19199540,078134c9a39fc34657d7bade7a6b6dd753a0159d,SOHZIPR12A8C1350D3,1,0rT9f5BSZrZ6DvzfBEckG0
19199541,8b4c719e712d45cc8273023e40134b4e69d9394c,SOHZIPR12A8C1350D3,1,0rT9f5BSZrZ6DvzfBEckG0


In [12]:
SPF_listen.to_csv('SPF_MSD_listen_count.csv', index=False)

In [None]:
playcount_df = pd.read_csv('SPF_MSD_listen_count.csv')

## Filter Listen Count DF again to reduce sparsity

In [14]:
# Get how many user have listened to each song
playcount_df = SPF_listen
user_counts = playcount_df.groupby('user_id')['song_id_SPF'].count()

user_counts

user_id
00001cf0dce3fb22b0df0f3a1d9cd21e38385372     9
0000267bde1b3a70ea75cf2b2d216cb828e3202b    10
00004fb90a86beb8bed1e9e328f5d9b6ee7dc03e    39
000060ca4e6bea0a5c9037fc1bbd7bbabb98c754    10
00007ed2509128dcdd74ea3aac2363e24e9dc06b     7
                                            ..
ffffd330940a2a40754ec0383391f55c6129f48b    31
ffffdc274ca76d154b4e56b2dbc82ff538c93c0b    12
ffffdc6c89988cd6119067769162948eacf8b670    27
fffff67d54a40927c93d03bd6c816b034b59f087    38
fffff9534445f481b6ab91c345500083d2ce4df1    13
Name: song_id_SPF, Length: 740942, dtype: int64

In [15]:
# Filter users which have listen to at least 16 songs
user_ten_id = user_counts[user_counts > 16].index.to_list()
len(user_ten_id)

391523

In [16]:
# Get how many user have listened to each song
song_counts = playcount_df.groupby('song_id_SPF')['user_id'].count()
print(len(song_counts))

# Get songs which have been listened at least 4 times
song_ten_id = song_counts[song_counts > 200].index.to_list()
print(len(song_ten_id))

# Filtered the dataset to keep only those users with more than 16 listened
df_song_reduced = playcount_df[(playcount_df['user_id'].isin(user_ten_id)) & 
                           (playcount_df['song_id_SPF'].isin(song_ten_id))].reset_index(drop=True)
df_song_reduced

12967
12443


Unnamed: 0,user_id,song_id_MSD,count,song_id_SPF
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,SOAKIMP12A8C130995,1,27uOuxKlQE9c1i4bh89Wg5
1,7c86176941718984fed11b7c0674ff04c029b480,SOAKIMP12A8C130995,1,27uOuxKlQE9c1i4bh89Wg5
2,76235885b32c4e8c82760c340dc54f9b608d7d7e,SOAKIMP12A8C130995,3,27uOuxKlQE9c1i4bh89Wg5
3,250c0fa2a77bc6695046e7c47882ecd85c42d748,SOAKIMP12A8C130995,1,27uOuxKlQE9c1i4bh89Wg5
4,3f73f44560e822344b0fb7c6b463869743eb9860,SOAKIMP12A8C130995,6,27uOuxKlQE9c1i4bh89Wg5
...,...,...,...,...
15540305,5c2d0d9194262ea44a55d32fc8defcf650f41996,SOHZIPR12A8C1350D3,1,0rT9f5BSZrZ6DvzfBEckG0
15540306,85bb0545b3f3aff3d149baeec60911a577cfce74,SOHZIPR12A8C1350D3,6,0rT9f5BSZrZ6DvzfBEckG0
15540307,078134c9a39fc34657d7bade7a6b6dd753a0159d,SOHZIPR12A8C1350D3,1,0rT9f5BSZrZ6DvzfBEckG0
15540308,8b4c719e712d45cc8273023e40134b4e69d9394c,SOHZIPR12A8C1350D3,1,0rT9f5BSZrZ6DvzfBEckG0


In [3]:
df_song_reduced = pd.read_csv('SPF_listen_count_filtered.csv')
df_song_reduced

Unnamed: 0,user_id,song_id_SPF,count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,27uOuxKlQE9c1i4bh89Wg5,1
1,7c86176941718984fed11b7c0674ff04c029b480,27uOuxKlQE9c1i4bh89Wg5,1
2,76235885b32c4e8c82760c340dc54f9b608d7d7e,27uOuxKlQE9c1i4bh89Wg5,3
3,250c0fa2a77bc6695046e7c47882ecd85c42d748,27uOuxKlQE9c1i4bh89Wg5,1
4,3f73f44560e822344b0fb7c6b463869743eb9860,27uOuxKlQE9c1i4bh89Wg5,6
...,...,...,...
15540305,5c2d0d9194262ea44a55d32fc8defcf650f41996,0rT9f5BSZrZ6DvzfBEckG0,1
15540306,85bb0545b3f3aff3d149baeec60911a577cfce74,0rT9f5BSZrZ6DvzfBEckG0,6
15540307,078134c9a39fc34657d7bade7a6b6dd753a0159d,0rT9f5BSZrZ6DvzfBEckG0,1
15540308,8b4c719e712d45cc8273023e40134b4e69d9394c,0rT9f5BSZrZ6DvzfBEckG0,1


In [17]:
df_song_reduced = df_song_reduced.iloc[:, [0,3,2]]
df_song_reduced.to_csv('SPF_listen_count_filtered.csv', index=False)

In [18]:
# Bining using 1-10 scale
bins = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, np.inf]
classes = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

df_song_reduced['count'] = pd.cut(df_song_reduced['count'], bins=bins, labels=classes)
listen_counts = pd.DataFrame(df_song_reduced.groupby('count').size(), columns=['rating_count']).reset_index(drop=False)
display(df_song_reduced)

Unnamed: 0,user_id,song_id_SPF,count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,27uOuxKlQE9c1i4bh89Wg5,1
1,7c86176941718984fed11b7c0674ff04c029b480,27uOuxKlQE9c1i4bh89Wg5,1
2,76235885b32c4e8c82760c340dc54f9b608d7d7e,27uOuxKlQE9c1i4bh89Wg5,3
3,250c0fa2a77bc6695046e7c47882ecd85c42d748,27uOuxKlQE9c1i4bh89Wg5,1
4,3f73f44560e822344b0fb7c6b463869743eb9860,27uOuxKlQE9c1i4bh89Wg5,6
...,...,...,...
15540305,5c2d0d9194262ea44a55d32fc8defcf650f41996,0rT9f5BSZrZ6DvzfBEckG0,1
15540306,85bb0545b3f3aff3d149baeec60911a577cfce74,0rT9f5BSZrZ6DvzfBEckG0,6
15540307,078134c9a39fc34657d7bade7a6b6dd753a0159d,0rT9f5BSZrZ6DvzfBEckG0,1
15540308,8b4c719e712d45cc8273023e40134b4e69d9394c,0rT9f5BSZrZ6DvzfBEckG0,1


In [37]:
grouper = df_song_reduced.groupby(['user_id', 'song_id_SPF']).sum()
new_df = grouper.reset_index()
new_df
new_df = new_df.rename(columns={'count':'score'})
new_df.to_csv('SPF_user_song_score.csv', index=False)

Unnamed: 0,user_id,song_id_SPF,count
0,00004fb90a86beb8bed1e9e328f5d9b6ee7dc03e,0bTlXQJzw4ddaLQPJ4rGS2,1
1,00004fb90a86beb8bed1e9e328f5d9b6ee7dc03e,0iwo02NoGqyooxs81iEkz3,1
2,00004fb90a86beb8bed1e9e328f5d9b6ee7dc03e,0nCAVzknkzRXRgB6LhRv04,1
3,00004fb90a86beb8bed1e9e328f5d9b6ee7dc03e,18DDI7sV7bF4vZXhqnDhWH,1
4,00004fb90a86beb8bed1e9e328f5d9b6ee7dc03e,1A8SxnqRK3qabmuN2qxIpa,1
...,...,...,...
15009493,fffff67d54a40927c93d03bd6c816b034b59f087,721gOIOYPYJIGw2LLvLjsd,1
15009494,fffff67d54a40927c93d03bd6c816b034b59f087,7lRFR5GJCxK87ZbVMtQSeS,1
15009495,fffff67d54a40927c93d03bd6c816b034b59f087,7nCdeDrv9Tu4nsT3K638em,2
15009496,fffff67d54a40927c93d03bd6c816b034b59f087,7uHZgfmWRqjFrC85Ou6A1I,1


Generate list of unique songs to recommend from, and save as csv for later use.

In [64]:
songs_pool = df_song_reduced.loc[:,'song_id_SPF'].drop_duplicates(inplace=False)
songs_pool.to_csv('songs_pool.csv', index=False)

## Recommender Model

In [42]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import time

from surprise import SVD
from surprise import Dataset, Reader
from surprise import accuracy
from surprise.model_selection import train_test_split
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate

In [43]:
df_song_reduced = pd.read_csv('SPF_user_song_score.csv')

reader = Reader(rating_scale=(1, 10))
data = Dataset.load_from_df(df_song_reduced.loc[:, ['user_id', 'song_id_SPF', 'score']], reader)

trainset, testset = train_test_split(data, test_size=0.01)

In [44]:
# Perform the cross validation - 1-10 scale
#cross_validate(find_algo, data, measures=['RMSE'], cv=5, verbose=True)

start = time.perf_counter()
final_algorithm = SVD() #SVD(n_factors=160, n_epochs=20, lr_all=0.005, reg_all=0.08)
final_algorithm.fit(trainset)
test_predictions = final_algorithm.test(testset)
print(f"The RMSE is {accuracy.rmse(test_predictions, verbose=True)}")
end = time.perf_counter()
print(end-start)

RMSE: 2.3208
The RMSE is 2.3208416398385783
1174.9153354912996


In [45]:
test_predictions[:10]

[Prediction(uid='7ec7b3e9beb1925a43e0a70316812e79be9d55db', iid='4ERK3MXPXLWxy0UazrDKup', r_ui=2.0, est=1.2175859182049509, details={'was_impossible': False}),
 Prediction(uid='d7cb5fcfcb39ec522163ead2a3e12faa2e24a9c2', iid='16sbugRbf9xEKlekuQ7Whm', r_ui=6.0, est=2.251062475052392, details={'was_impossible': False}),
 Prediction(uid='749c1212db00749d3cf9cb81ea1b28c8de907b9e', iid='22wWxHGHRH8gZXNDXRgSqY', r_ui=9.0, est=6.658994939682305, details={'was_impossible': False}),
 Prediction(uid='371761c5873cce3ee6dd5342149d17c4a150f705', iid='2dyfo7lqKI7NtSAhUZwnoJ', r_ui=1.0, est=1.7567832119523845, details={'was_impossible': False}),
 Prediction(uid='c9a35927ecf7ebe52968d40f47dc0f1e505ba116', iid='1yJ5YFWOaYWf6d71ZyzD5r', r_ui=1.0, est=1.04856973749626, details={'was_impossible': False}),
 Prediction(uid='6b343ccfb23a3e8c1dd2abd8a23df88329b053da', iid='4VQu1ooCteGDynSZYUgvT4', r_ui=1.0, est=1.5703498421683129, details={'was_impossible': False}),
 Prediction(uid='ab577169cfe53d178d3f927522e

In [46]:
from surprise import dump
model_name = 'recommender_model_final'
#final_algorithm = SVD(n_factors=160, n_epochs=20, lr_all=0.005, reg_all=0.08)
dump.dump(model_name, predictions=None, algo=final_algorithm, verbose=0)

In [47]:
final_algorithm.predict('2df98aad3fdbc770c45774866d9508ccad472430', '2KaPQvJCkqdOF41vlTRTku')

Prediction(uid='2df98aad3fdbc770c45774866d9508ccad472430', iid='2KaPQvJCkqdOF41vlTRTku', r_ui=None, est=1, details={'was_impossible': False})

## Predict Song Preferences for new Spotify user

To make recommendations for a new Spotify user given saved/liked songs, we found the most similar EchoNet user to this Spotify user based on cosine similarity, and then recommend songs based on the predicted scores for the Echonet user. 

Simply put, the idea is "user with a similar taste as you may also like these songs...". This can be potentially interesting because users on EchoNet may have a different listening preference distribution than Spotify users.

In [2]:
df_song_reduced = pd.read_csv('rec_dataset.csv')
df_song_reduced

Unnamed: 0,user_id,song_id_SPF,count
0,b80344d063b5ccb3212f76538f3d9e43d87dca9e,27uOuxKlQE9c1i4bh89Wg5,1
1,7c86176941718984fed11b7c0674ff04c029b480,27uOuxKlQE9c1i4bh89Wg5,1
2,76235885b32c4e8c82760c340dc54f9b608d7d7e,27uOuxKlQE9c1i4bh89Wg5,3
3,250c0fa2a77bc6695046e7c47882ecd85c42d748,27uOuxKlQE9c1i4bh89Wg5,1
4,3f73f44560e822344b0fb7c6b463869743eb9860,27uOuxKlQE9c1i4bh89Wg5,6
...,...,...,...
15540305,5c2d0d9194262ea44a55d32fc8defcf650f41996,0rT9f5BSZrZ6DvzfBEckG0,1
15540306,85bb0545b3f3aff3d149baeec60911a577cfce74,0rT9f5BSZrZ6DvzfBEckG0,6
15540307,078134c9a39fc34657d7bade7a6b6dd753a0159d,0rT9f5BSZrZ6DvzfBEckG0,1
15540308,8b4c719e712d45cc8273023e40134b4e69d9394c,0rT9f5BSZrZ6DvzfBEckG0,1


In [23]:
# create dummy user data
saved_songs = ['4puHMOaNsxXs6s45g7NkJI', '1EGIeHEvMZh3MNkIn2itUy', '1oZYaztSjkVQ7PeKwUbdab', '5YuXkLn4gXk9JYD1del0qR',
               '5pWBLvVsEhKiWba3wvYIXB', '2xrNC0YeIdMjXItS1V0QnS', '47XIBIhxwEUX6o0gPMZ2mN', '5yZzLyY9SMI3ZjdqpXWYDp',
               '4ZpXHlV2vQVfPXUvbDSZ92', '2LIUQiR8QfZbWT0qOTzBVD', '5h3M1h279xrxl1WtMS71Jb', '5vn6pVxzLeWJweGa9zqxGD',
               '2UjGyCvayHjGihV3oLhD8a', '0R8Danl4L4Tq00OiYFygSM', '0AFZnXDUT5qbJboJMZ6zlp', '34tIBWjEV2F27FRz34HhHy',
               '3rheINeddqah49dElHlqbZ', '0gOdikUSCMDS90TypUuNbT', '4IZxal0c4bUnY5vn2yR580', '0rT9f5BSZrZ6DvzfBEckG0']
user_data = get_user_song_df(saved_songs)

# Find most similar user in train datset
start = time.perf_counter()

sim_user_id = get_sim_user(user_data) #'2df98aad3fdbc770c45774866d9508ccad472430'

end = time.perf_counter()
print('Most similar user in trainset: ', sim_user_id, 'Time_lapsed: ', end-start)

Most similar user in trainset:  ('015e581dd73cd110dc2ea9738b31a447bb7ea01e', 0.48132031526436686) Time_lapsed:  55.39336182177067


In [26]:
# Get top songs recommendations
start = time.perf_counter()

new_songs = get_new_songs(saved_songs)
top_songs, top_scores = generate_rec_songs(user_id=sim_user_id, top=20, pool=new_songs)
print(top_songs)
print(top_scores)

end = time.perf_counter()
print('Songs recommendation finished. Time_lapsed: ', end-start)

['2uGDqQsfkKgBH4IsTu23I3', '5LbLX9SlqtEOnvaIZsiUqd', '6BxiFSFVzviEqe2eIaruLV', '2mCoPoJKtIoUuqkvTsJs3Z', '7xAoWHiFihACP9v2kJyMxm', '3MTMXwWwacAmAcTlzIgC7l', '6INNs26zUPhIkxzJu7rlMF', '7vAw4LLIms6X8ZHcbtoazz', '1BhVIZ0fuduQfpB7AD3AeY', '3xpSbo5aiJUhcD1dpWYQ3A', '4WcspuNc2WUuq7UAvDMsTI', '3MvbFAlNARGUpMPEkn88Ho', '409C0oCpMq7DNRojawi1cH', '6OMEo84SNpwhT6GsZhwCms', '1EqpyzFn8Da906isLV9YXY', '3M5qFRK8iq7Tqect6YruY0', '69LpTXgwTVeIDfAcozZ8n5', '1Ee1SaTja5ZCLUWp0qEjhI', '61lAJ5gEEyWvYxMh9rP8kI', '74inHQGjQj6w630tZ9wb2J']
[4.679729998965469, 4.653479126517839, 4.630567956002808, 4.597680405778302, 4.535870763410921, 4.444121710587765, 4.411481925990442, 4.40577000458007, 4.387589816343546, 4.377255182580903, 4.325190842684965, 4.318194582452373, 4.292572554914666, 4.274248781150255, 4.268564660569292, 4.238645657641478, 4.237395581882888, 4.237072996197123, 4.235672977055275, 4.232334270949134]
Songs recommendation finished. Time_lapsed:  18.44023985415697


In [4]:
df_song_id_user = pd.read_csv('df_song_id_user.csv').sort_values(by=['user_id', 'song_id_SPF']).iloc[:100000,:10]
df_song_id_user.append(user_data)

song_user = df_song_id_user.pivot(index='user_id', columns='song_id_SPF', values='count').fillna(0)

# obtain a sparse matrix
song_user_mat = csr_matrix(song_user.values)

# take a look at the pivot table
song_user.head()

song_id_SPF,000u1dTg7y1XCDXi80hbBX,003FTlCpBTM4eSqYSWPv4H,0046quUYhSAFccrKIC3Iht,009G1RDIr3UgPrFzOPJPfb,009jzOyt9tXCqaUzeFIT03,00BHe2yBtdOzhKPmI7rpTE,00CCi9yHJh7NjXcNT565Ja,00CmjeeHvAVKvx3tcIiZTy,00FDHurakzVEiPutdUxXXq,00HgVIkZrAL8WjAN9Et6WW,...,7ziHnshbknkpFLDW5yGBjO,7ziicmUOzWXAkdPriOmFk9,7zkLlx4bTeeyUyzs1L7gV4,7zlkYAQdYPIwagUwO7Dyah,7zmDFfSrtSd2JeqBqRqUmV,7zmqbdEqmQWgU2wOoBPG83,7zrHnJ87htwgS20PzFmgTr,7zridhiD9txjjYAcDoC0qY,7zvMaTcCspbRMahT4DcjQG,7zydM7FeDlFUPCXdY2GCgZ
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
00004fb90a86beb8bed1e9e328f5d9b6ee7dc03e,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0000bb531aaa657c932988bc2f7fd7fc1b2050ec,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0000d3c803e068cf1da17724f1674897b2dd7130,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0000f88f8d76a238c251450913b0d070e4a77d19,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0
000138e252eea35fd73aaf66a9b34102b695a9c8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
