In [1]:
import pandas as pd
import boto3
from implicit.als import AlternatingLeastSquares
from scipy.sparse import coo_matrix
from IPython.display import display
import numpy as np

In [2]:
boto_session = boto3.session.Session(region_name='eu-west-3')

In [106]:
sample_taste_csv_path = 'https://s3.eu-west-3.amazonaws.com/sukikana-msd/sample_training_cf.csv'
sample_meta_csv_path = 'https://s3.eu-west-3.amazonaws.com/sukikana-msd/sample_training_meta.csv'

In [3]:
sample_taste_csv_path = '../../data/sample_training_cf.csv'
sample_meta_csv_path = '../../data/sample_training_meta.csv'

In [108]:
taste_csv_paths = [
    'https://s3.eu-west-3.amazonaws.com/sukikana-msd/training_cf.0.csv',
    'https://s3.eu-west-3.amazonaws.com/sukikana-msd/training_cf.1.csv',
    'https://s3.eu-west-3.amazonaws.com/sukikana-msd/training_cf.2.csv',
    'https://s3.eu-west-3.amazonaws.com/sukikana-msd/training_cf.3.csv'
]
meta_csv_path = 'https://s3.eu-west-3.amazonaws.com/sukikana-msd/sample_training_meta.csv'

In [13]:
taste_csv_paths = ['../../data/training_cf.0.csv', '../../data/training_cf.1.csv', '../../data/training_cf.2.csv', '../../data/training_cf.3.csv']
meta_csv_path = '../../data/training_meta.csv'

In [4]:
n_factors = 50

In [5]:
df_cf_training = pd.read_csv(sample_taste_csv_path, sep=';', dtype={'user_id': 'category', 'song_id': 'category'})
df_meta_training = pd.read_csv(sample_meta_csv_path, sep=';', index_col='song_id')

In [14]:
df_cf_training = pd.concat([pd.read_csv(taste_csv_path, sep=';', dtype={'user_id': 'category', 'song_id': 'category'}) for taste_csv_path in taste_csv_paths])
df_meta_training = pd.read_csv(meta_csv_path, sep=';', index_col='song_id')
df_cf_training.loc[:, 'song_id'] = df_cf_training.song_id.astype('category')
df_cf_training.loc[:, 'user_id'] = df_cf_training.user_id.astype('category')

In [16]:
als = AlternatingLeastSquares(factors=n_factors, calculate_training_loss=True)

In [17]:
matrix = coo_matrix((df_cf_training.play_count, (df_cf_training.user_id.cat.codes, df_cf_training.song_id.cat.codes)), shape=(df_cf_training.user_id.nunique(), df_cf_training.song_id.nunique()))


In [18]:
matrix = matrix.tocsr()

In [19]:
matrix

<1019100x296274 sparse matrix of type '<class 'numpy.int64'>'
	with 38334412 stored elements in Compressed Sparse Row format>

In [20]:
als.fit(matrix.T)

100%|██████████| 15.0/15 [01:46<00:00,  8.56s/it, loss=0.000288]


In [12]:
pd.DataFrame(als.user_factors, index=df_cf_training.user_id.cat.categories.values).to_csv('../../data/sample_user_factors.csv', sep=';', index_label='user_id')
pd.DataFrame(als.item_factors, index=df_cf_training.song_id.cat.categories.values).to_csv('../../data/sample_song_factors.csv', sep=';', index_label='song_id')

In [21]:
pd.DataFrame(als.user_factors, index=df_cf_training.user_id.cat.categories.values).to_csv('../../data/user_factors.csv', sep=';', index_label='user_id')
pd.DataFrame(als.item_factors, index=df_cf_training.song_id.cat.categories.values).to_csv('../../data/song_factors.csv', sep=';', index_label='song_id')

In [48]:
recommendations = als.recommend(859023, matrix, N=30)
songs_id = np.array(recommendations)[:, 0].astype(int)
recommendations

[(188560, 1.0331578),
 (34979, 0.95413053),
 (152565, 0.8016545),
 (220608, 0.77571255),
 (234231, 0.73492),
 (12441, 0.70731384),
 (137573, 0.6613155),
 (177254, 0.64270246),
 (98936, 0.6348306),
 (60252, 0.6253587),
 (53083, 0.6051022),
 (266130, 0.5872102),
 (19340, 0.5751554),
 (202394, 0.571092),
 (148744, 0.56020176),
 (62536, 0.55696696),
 (286784, 0.5481842),
 (218511, 0.5332135),
 (95901, 0.527173),
 (229790, 0.5114633),
 (69228, 0.49798298),
 (295568, 0.4961185),
 (1585, 0.4338003),
 (29533, 0.43218198),
 (164667, 0.4273398),
 (6708, 0.42503554),
 (176664, 0.41355523),
 (30321, 0.40935394),
 (121549, 0.40570605),
 (262364, 0.39367065)]

In [49]:
with pd.option_context('display.max_colwidth', -1):
    display(df_meta_training.loc[df_cf_training.song_id.cat.categories[songs_id]])

Unnamed: 0_level_0,artist_id,artist_name,audio_md5,song_hotttnesss,title,track_id,preview_id,remote_preview_url,preview_artist,preview_title,preview_service
song_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
SOPXKYD12A6D4FA876,ARJ7KF01187B98D717,Coldplay,53fcc748b1d6fcebcb783eb7c64baefe,0.920359,Yellow,TRIKGRK128E0780DB0,tra.2047583,http://listen.vo.llnwd.net/g3/7/0/5/9/0/1328109507.mp3,Coldplay,Yellow,napster
SOPXKYD12A6D4FA876,ARJ7KF01187B98D717,Coldplay,7d890551199b67e4d286cf1ec8be100e,0.920207,Yellow,TRTZNQZ12903CD044C,tra.2047583,http://listen.vo.llnwd.net/g3/7/0/5/9/0/1328109507.mp3,Coldplay,Yellow,napster
SOCVTLJ12A6310F0FD,ARJ7KF01187B98D717,Coldplay,231b33d55da1ea6f24b278196c277a77,1.0,Clocks,TRENTGL128E0780C8E,tra.2732141,http://listen.vo.llnwd.net/g3/8/7/4/2/2/1068922478.mp3,Coldplay,Clocks,napster
SOMRYYN12A6310F0F3,ARJ7KF01187B98D717,Coldplay,4ca824bc4abc287faceb8e629c4e0dc5,0.933407,In My Place,TRFWGOJ128E0780C8B,tra.2732138,http://listen.vo.llnwd.net/g3/3/0/0/3/2/1068923003.mp3,Coldplay,In My Place,napster
SOSVPIE12A6D4FA873,ARJ7KF01187B98D717,Coldplay,76aed35afedce7716c5786ca155eab0f,0.864931,Shiver,TRTHGPK128F147DF90,tra.2047580,http://listen.vo.llnwd.net/g3/6/5/5/0/1/1328110556.mp3,Coldplay,Shiver,napster
SOUCKDH12A8C138FF5,ARJ7KF01187B98D717,Coldplay,c998145a9cc1016324e5dde82beb9221,0.781932,Don't Panic,TRANBUW128F933C645,tra.2047579,http://listen.vo.llnwd.net/g3/0/1/0/0/1/1328110010.mp3,Coldplay,Don't Panic,napster
SOBABRB12A6701DF4B,ARUJZFJ1187B9B135F,Shania Twain,0a9f7f967a377fdcf9e6b8e8438d40cf,0.569505,Nah!,TRMNYZQ128F1459E10,tra.3901192,http://listen.vo.llnwd.net/g1/7/5/2/7/9/104797257.mp3,Shania Twain,Nah!,napster
SOLJWIQ12A6D4FA875,ARJ7KF01187B98D717,Coldplay,337727ef6776b16d771a6203b5d68fa2,0.846077,Sparks,TRNGAAK128F147DF92,tra.2047582,http://listen.vo.llnwd.net/g3/9/9/4/9/0/1328109499.mp3,Coldplay,Sparks,napster
SOOWPBJ12A8C137FB4,ARJ7KF01187B98D717,Coldplay,855b393e9e84c7432b27f7c1a52d3576,0.778495,Lost!,TRZCIWG128F4248B25,tra.24130223,http://listen.vo.llnwd.net/g3/7/1/3/5/2/1073625317.mp3,Coldplay,Lost!,napster
SOICNON12A8C140437,ARJ7KF01187B98D717,Coldplay,a28504538036136ca35811f463861496,0.826457,Life In Technicolor ii,TRTZPKI128F92FDBB7,tra.24462372,http://listen.vo.llnwd.net/g3/3/6/2/3/1/1325413263.mp3,Coldplay,Life In Technicolor ii,napster


In [44]:
df_cf_training.user_id.cat.categories.get_loc('d7d53f9aa55c1c824bcdeb16731830a39e3f63e6')

859023

In [43]:
with pd.option_context('display.max_colwidth', -1):
    display(df_cf_training[df_cf_training.user_id == 'd7d53f9aa55c1c824bcdeb16731830a39e3f63e6'].merge(df_meta_training, left_on='song_id', right_index=True).sort_values('play_count', ascending=False))
    

Unnamed: 0,user_id,song_id,play_count,artist_id,artist_name,audio_md5,song_hotttnesss,title,track_id,preview_id,remote_preview_url,preview_artist,preview_title,preview_service
2056396,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOKLRPJ12A8C13C3FE,536,ARJ7KF01187B98D717,Coldplay,532daf5f475ee8f32c52059cc70f410d,1.0,The Scientist,TRQFXKD128E0780CAE,tra.2732140,http://listen.vo.llnwd.net/g3/8/5/2/2/2/1068922258.mp3,Coldplay,The Scientist,napster
2056422,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOXJIDG12A8C13B006,321,ARMQ4HQ1187B9939CA,Hess Is More,d02267e7207bd0c976bd21d7f24f1788,0.838627,Yes Boss,TRJCBGM128F427F0FD,tra.28325777,http://listen.vo.llnwd.net/g2/6/4/7/5/7/951675746.mp3,Hess Is More,Yes Boss,napster
2056411,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOSMZTU12A67020AB6,40,AR0JKDD1187B9BA274,Snooks Eaglin,47bfc5b5f059d62b31f51843692550d7,0.0,Pine Top's Boogie Woogie,TRXOMGD128E078A422,tra.9560412,http://listen.vo.llnwd.net/g2/9/6/9/0/2/915620969.mp3,Snooks Eaglin,Pine Top's Boogie Woogie,napster
2056428,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOYFYHE12A8C142082,29,ARPUZ9M1187B9936FA,Yasmin Levy,3916c750d1feee997c62b2ef250e4a93,0.655953,Una Noche Mas,TRKEZWY128F42972C2,tra.29847568,http://listen.vo.llnwd.net/g3/8/2/5/1/5/1067651528.mp3,Yasmin Levy,Una Noche Mas,napster
2056408,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOQOTZT12A58A7E382,26,ARPUZ9M1187B9936FA,Yasmin Levy,ed82329e6c15be310b993e1e859e8ff3,0.611215,Komo la Roza,TRPSWEZ128F42972C6,tra.29847576,http://listen.vo.llnwd.net/g3/9/2/3/1/5/1067651329.mp3,Yasmin Levy,Komo la Roza,napster
2056424,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOXPPMN12A6D4F980B,8,AR5YRCN1187B98E3C8,Westlife,27d5383d228f6160ffaaeea05b01b7cf,0.593221,Amazing,TRBFBJF128F425A989,tra.55865518,http://listen.vo.llnwd.net/g2/5/4/7/4/1/902114745.mp3,Westlife,Amazing,napster
2056414,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOUKCIO12A67020E75,7,AR84OOM1187B994939,Jackie Ross,0eb71b2aeca9139388a5725838b6fe7f,0.591291,Selfish One,TRSVVQH128E0791CF9,tra.45131806,http://listen.vo.llnwd.net/g1/4/0/1/3/7/504573104.mp3,Jackie Ross,Selfish One,napster
2056426,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOYBUBG12AB01860F9,6,ARGC7XR1187B995AB7,Faramarz Aslani,0f4bf6ca0671d82e7b8eea8864586efa,0.225278,Age Ye Rooz,TRYFRWO128F93296A6,tra.21848435,http://listen.vo.llnwd.net/g1/4/2/0/3/9/118493024.mp3,Faramarz Aslani,Age Ye Rooz,napster
2056415,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOUKJBT12A6701C4D6,6,ARJ7KF01187B98D717,Coldplay,4659916ebcbe7ffe1587813193723fd4,0.96908,Speed Of Sound,TRYNYSX128E07897B3,tra.7372538,http://listen.vo.llnwd.net/g3/6/9/5/6/5/1267856596.mp3,Coldplay,Speed Of Sound,napster
2056417,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOVVRDZ12A6701C577,5,ARJ7KF01187B98D717,Coldplay,6568601a3837b847ad2453fe3acc5df3,0.746731,A Message,TRFWXUO128E0789D3D,tra.7372539,http://listen.vo.llnwd.net/g3/7/2/6/6/5/1267856627.mp3,Coldplay,A Message,napster


In [79]:
sample_training_cf[sample_training_cf.song_id == 'SOAFTRR12AF72A8D4D'].sort_values('play_count', ascending=False)

Unnamed: 0,user_id,song_id,play_count
475293,0f9ff24431527748aa98d5bc979e22b48a658ec3,SOAFTRR12AF72A8D4D,422
18782,6ccd111af9b4baa497aacd6d1863cbf5a141acc6,SOAFTRR12AF72A8D4D,215
925485,2f52595aa076766f67d32b12ff01f92fa3d8273a,SOAFTRR12AF72A8D4D,160
1076314,c58eba224a3cfcf17f415f29a59144a80740dbb3,SOAFTRR12AF72A8D4D,158
1322502,e6d120a0b32cbb94760ca418b8f220dedcea9d41,SOAFTRR12AF72A8D4D,139
17054,299b7b415da45bb7f1983aa9a37ac51643903eb2,SOAFTRR12AF72A8D4D,124
538751,15cafac6a57afcb6d3bd54ef41d9040153143488,SOAFTRR12AF72A8D4D,95
335938,6181935befd608279e6ef0105c94d33537b7b8ea,SOAFTRR12AF72A8D4D,93
900682,b7071cd8864b88cd9f0d5879efb288294c15fe4f,SOAFTRR12AF72A8D4D,90
134005,2bc13d288f4a8f0a0b4889b605f8012121231e45,SOAFTRR12AF72A8D4D,80


In [32]:
df_cf_training.groupby('song_id').sum().sort_values('play_count', ascending=False)

Unnamed: 0_level_0,play_count
song_id,Unnamed: 1_level_1
SOBONKR12A58A7A7E0,726885
SOAUWYT12A81C206F1,648239
SOSXLTC12AF72A7F54,527893
SOFRQTD12A81C233C0,425463
SONYKOW12AB01849C9,292642
SOPUCYA12A8C13A694,274627
SOUFTBI12AB0183F65,268353
SOVDSJC12A58A7A271,244730
SOOFYTN12A6D4F9B35,241669
SOBOUPA12A6D4F81F1,225652


In [31]:
df_meta_training.loc[df_cf_training.groupby('song_id').sum().sort_values('play_count', ascending=False).index.values]

Unnamed: 0_level_0,artist_id,artist_name,audio_md5,song_hotttnesss,title,track_id,preview_id,remote_preview_url,preview_artist,preview_title,preview_service
song_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
SOBONKR12A58A7A7E0,AR5E44Z1187B9A1D74,Dwight Yoakam,6045ec9fd2eb75898e4fd5a753390bf1,0.476435,You're The One,TRAEHHJ12903CF492F,tra.6375561,http://listen.vo.llnwd.net/g1/4/8/1/7/6/998367...,Dwight Yoakam,You're The One,napster
SOAUWYT12A81C206F1,ARAOQ5T1187FB435AB,Björk,6e186293f90b2c296112a60d9e2d7b2b,0.937443,Undo,TRGXQES128F42BA5EB,tra.9060524,http://listen.vo.llnwd.net/g1/1/9/5/4/9/151194...,Björk,Undo (LP Version),napster
SOSXLTC12AF72A7F54,ARF2EHS1187B994F4E,Kings Of Leon,cf116e78e53144970c75ea105526a63c,0.918427,Revelry,TRONYHY128F92C9D11,tra.22956604,http://listen.vo.llnwd.net/g3/0/3/3/1/5/128395...,Kings of Leon,Revelry,napster
SOFRQTD12A81C233C0,AR0IVTL1187B9AD520,Harmonia,62e738439ca0ffb213d68d074c504368,1.000000,Sehr kosmisch,TRDMBIJ128F4290431,tra.296623732,http://listen.vo.llnwd.net/g3/6/9/1/0/7/138617...,Harmonia,Sehr kosmisch,napster
SONYKOW12AB01849C9,AR73S4G1187B9A03C2,OneRepublic,c1228ac7d736f1263aa4a8aa14b3043b,1.000000,Secrets,TROAQBZ128F9326213,tra.30863531,http://listen.vo.llnwd.net/g1/9/1/0/2/6/245562...,OneRepublic,Secrets,napster
SOPUCYA12A8C13A694,AR2W94J1187B9B5C2C,Five Iron Frenzy,c4464638212f768afbebb3f35de17734,0.345802,Canada,TRFQOSP128F427C2B2,tra.5207641,http://listen.vo.llnwd.net/g1/7/5/8/2/6/250762...,Five Iron Frenzy,Canada [Live][*],napster
SOUFTBI12AB0183F65,ARJFYA51187B9A9EDB,Tub Ring,b1ded8f45d30cc4f69ea61ee75f749ef,0.413740,Invalid,TRIXAZF128F421EE64,tra.2868511,http://listen.vo.llnwd.net/g2/0/8/9/5/1/877415...,Tub Ring,Invalid,napster
SOVDSJC12A58A7A271,AR4K2P91187B9B2B35,Sam Cooke,d25a46d0e5ad86f4849b123ce6afa29c,0.541995,Ain't Misbehavin,TRRBUQL12903CCE501,tra.223976968,http://listen.vo.llnwd.net/g3/7/6/9/9/8/125498...,Sam Cooke,Ain't Misbehavin',napster
SOOFYTN12A6D4F9B35,ARZO9UQ1187FB4D261,Alliance Ethnik,ac68210a9c2c2e72c22409326f5b714a,0.441947,Représente,TRRGQKP128F1469E20,tra.56891875,http://listen.vo.llnwd.net/g2/7/0/8/1/4/905241...,Alliance Ethnik,Représente,napster
SOBOUPA12A6D4F81F1,ARZO9UQ1187FB4D261,Alliance Ethnik,44e789c478de9a926588800cbfd28213,0.000000,Sincerité Et Jalousie,TRMGUWH128F146903A,tra.56740456,http://listen.vo.llnwd.net/g2/1/4/3/2/5/904852...,Alliance Ethnik,Sincerité et jalousie,napster


In [42]:
df_cf_training[df_cf_training.song_id == 'SOKLRPJ12A8C13C3FE'].sort_values('play_count', ascending=False)

Unnamed: 0,user_id,song_id,play_count
2056396,d7d53f9aa55c1c824bcdeb16731830a39e3f63e6,SOKLRPJ12A8C13C3FE,536
3143604,89233b32eac4d3a961d0a5fbea4b9fd64e3935d5,SOKLRPJ12A8C13C3FE,310
4779746,fd0b27f40629fc8daafdea91751e8462f827c177,SOKLRPJ12A8C13C3FE,298
7463038,a316b88e5fa30c7c65e346a91f27e9af22c25308,SOKLRPJ12A8C13C3FE,199
8594460,9e9d74e7647c5633712284e0ac3f76fecefb938b,SOKLRPJ12A8C13C3FE,174
6849043,73d89bbbb77b16b7e2cc8a1d088d801059761cc1,SOKLRPJ12A8C13C3FE,129
7431955,bb63ee124324b6bbade1b5b8ba66df1b892f4d4a,SOKLRPJ12A8C13C3FE,118
9819,515e720b502372afeb2cb84b866d7cae4972022c,SOKLRPJ12A8C13C3FE,97
26904,f99f43e5d9ccc534304985097b4bc35649520e5e,SOKLRPJ12A8C13C3FE,91
1611760,c214abfadc2fb430b36c965bbf45be9e6044602b,SOKLRPJ12A8C13C3FE,85
