In [1]:
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt
from scipy import sparse
import sys
from time import time
from IPython.core.debugger import set_trace

from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import LabelEncoder

In [2]:
from NNS import Recommender, user_based_model, content_based_model

In [3]:
clf = Recommender("content_based_NN", verbose=True)

In [4]:
clf.load_encoders(path_songs="data/content_based_label_encoder_songs_classes_.npy", path_users=None)

In [5]:
X = sparse.load_npz("data/content_based_data_matrix.npz")

In [6]:
clf.fit(X=X)
#clf.save_model(model_name="Content_based_KNN_model")

In [7]:
_10_random_songs = np.random.choice(clf.song_label_encoder.classes_,10)
print(_10_random_songs)
song_list = clf.song_label_encoder.transform(np.array(_10_random_songs)).squeeze()
song_list

['SOWGCMD12AB01865EE' 'SOFONIE12A6D4F7BEC' 'SOAEMOK12A8C13DC66'
 'SOCUUOK12A8AE475FD' 'SOOMVUQ12AB018494D' 'SOLDCOY12AB017CA88'
 'SOJYFRN12A58A77D1E' 'SOFSCUQ12AB0181218' 'SOGMREH12A8C13992C'
 'SOCFBNF12AB018849A']


array([331122,  88202,   2706,  44435, 222950, 173402, 155570,  90368,
       102750,  35020], dtype=int64)

In [8]:
clf.recommend(song_list= _10_random_songs, full_name=True, return_song_id=True)


  recommended_songs_id  cosine distance
0   SOKVFRU12A58291508         0.273240
1   SOOCYNV12AB017B4B5         0.285545
2   SOFHIOQ12AB017D0D3         0.282800
3   SOEIZTI12AB0187D71         0.286567
4   SOKYXFA12A8C13C52F         0.283063
5   SONZIHI12AB018D777         0.285087
6   SOMQKSF12A8C13FA85         0.277438
7   SOQTAFZ12AB01857AC         0.255571
8   SODBCYD12AB0188CFF         0.281317
9   SOMECOI12A6D4FA233         0.279543


(array(['SOKVFRU12A58291508', 'SOOCYNV12AB017B4B5', 'SOFHIOQ12AB017D0D3',
        'SOEIZTI12AB0187D71', 'SOKYXFA12A8C13C52F', 'SONZIHI12AB018D777',
        'SOMQKSF12A8C13FA85', 'SOQTAFZ12AB01857AC', 'SODBCYD12AB0188CFF',
        'SOMECOI12A6D4FA233'], dtype='<U18'),
 array([0.27323973, 0.28554535, 0.2827996 , 0.28656662, 0.28306293,
        0.28508717, 0.2774377 , 0.25557053, 0.28131688, 0.27954292],
       dtype=float32))

In [9]:
clf.recommend(song_list=_10_random_songs, full_name=True, return_song_id=False)

   recommended_songs_code  cosine distance
0                  168824         0.273240
1                  217402         0.285545
2                   83938         0.282800
3                   69017         0.286567
4                  170955         0.283063
5                  215351         0.285087
6                  195670         0.277438
7                  254668         0.255571
8                   48332         0.281317
9                  188816         0.279543


(array([168824, 217402,  83938,  69017, 170955, 215351, 195670, 254668,
         48332, 188816], dtype=int64),
 array([0.27323973, 0.28554535, 0.2827996 , 0.28656662, 0.28306293,
        0.28508717, 0.2774377 , 0.25557053, 0.28131688, 0.27954292],
       dtype=float32))

In [10]:
clf.recommend(song_list= song_list, full_name=False, return_song_id=True)

  recommended_songs_id  cosine distance
0   SOKVFRU12A58291508         0.273240
1   SOOCYNV12AB017B4B5         0.285545
2   SOFHIOQ12AB017D0D3         0.282800
3   SOEIZTI12AB0187D71         0.286567
4   SOKYXFA12A8C13C52F         0.283063
5   SONZIHI12AB018D777         0.285087
6   SOMQKSF12A8C13FA85         0.277438
7   SOQTAFZ12AB01857AC         0.255571
8   SODBCYD12AB0188CFF         0.281317
9   SOMECOI12A6D4FA233         0.279543


(array(['SOKVFRU12A58291508', 'SOOCYNV12AB017B4B5', 'SOFHIOQ12AB017D0D3',
        'SOEIZTI12AB0187D71', 'SOKYXFA12A8C13C52F', 'SONZIHI12AB018D777',
        'SOMQKSF12A8C13FA85', 'SOQTAFZ12AB01857AC', 'SODBCYD12AB0188CFF',
        'SOMECOI12A6D4FA233'], dtype='<U18'),
 array([0.27323973, 0.28554535, 0.2827996 , 0.28656662, 0.28306293,
        0.28508717, 0.2774377 , 0.25557053, 0.28131688, 0.27954292],
       dtype=float32))

In [14]:
_1_song = np.random.choice(clf.song_label_encoder.classes_,1)
_1_same_song = np.array([_1_song,_1_song,_1_song,_1_song,_1_song,_1_song,_1_song,_1_song,_1_song,_1_song]).ravel()
_1_same_song

array(['SOGKVMW12A8C137F06', 'SOGKVMW12A8C137F06', 'SOGKVMW12A8C137F06',
       'SOGKVMW12A8C137F06', 'SOGKVMW12A8C137F06', 'SOGKVMW12A8C137F06',
       'SOGKVMW12A8C137F06', 'SOGKVMW12A8C137F06', 'SOGKVMW12A8C137F06',
       'SOGKVMW12A8C137F06'], dtype='<U18')

In [15]:
clf.recommend(song_list=_1_same_song, full_name=True, return_song_id=True)

  recommended_songs_id  cosine distance
0   SOBXKTH12A58A788E2         0.287749
1   SOEJCDX12AB017C0C0         0.287724
2   SOBIMMV12AB01813EC         0.287687
3   SOZTCMM12A58A7A66D         0.287657
4   SOOUTYI12A6D4F8C3B         0.287612
5   SOMDBTA12AB018A788         0.287601
6   SOAXGPZ12AB0189288         0.287595
7   SOGCLDV12AB01816E0         0.287448
8   SOMXFTU12A6D4F9897         0.287392
9   SONOWNO12AC468E70B         0.287345


(array(['SOBXKTH12A58A788E2', 'SOEJCDX12AB017C0C0', 'SOBIMMV12AB01813EC',
        'SOZTCMM12A58A7A66D', 'SOOUTYI12A6D4F8C3B', 'SOMDBTA12AB018A788',
        'SOAXGPZ12AB0189288', 'SOGCLDV12AB01816E0', 'SOMXFTU12A6D4F9897',
        'SONOWNO12AC468E70B'], dtype='<U18'),
 array([0.28774863, 0.2877239 , 0.28768718, 0.28765678, 0.28761208,
        0.28760064, 0.2875949 , 0.28744793, 0.28739196, 0.2873453 ],
       dtype=float32))

# simplified function

In [11]:
model1 = content_based_model(verbose=True)

In [12]:
model1.recommend(song_list= _10_random_songs, full_name=True, return_song_id=True)

  recommended_songs_id  cosine distance
0   SOKVFRU12A58291508         0.273240
1   SOOCYNV12AB017B4B5         0.285545
2   SOFHIOQ12AB017D0D3         0.282800
3   SOEIZTI12AB0187D71         0.286567
4   SOKYXFA12A8C13C52F         0.283063
5   SONZIHI12AB018D777         0.285087
6   SOMQKSF12A8C13FA85         0.277438
7   SOQTAFZ12AB01857AC         0.255571
8   SODBCYD12AB0188CFF         0.281317
9   SOMECOI12A6D4FA233         0.279543


(array(['SOKVFRU12A58291508', 'SOOCYNV12AB017B4B5', 'SOFHIOQ12AB017D0D3',
        'SOEIZTI12AB0187D71', 'SOKYXFA12A8C13C52F', 'SONZIHI12AB018D777',
        'SOMQKSF12A8C13FA85', 'SOQTAFZ12AB01857AC', 'SODBCYD12AB0188CFF',
        'SOMECOI12A6D4FA233'], dtype='<U18'),
 array([0.27323973, 0.28554535, 0.2827996 , 0.28656662, 0.28306293,
        0.28508717, 0.2774377 , 0.25557053, 0.28131688, 0.27954292],
       dtype=float32))

In [13]:
model1.recommend(song_list= song_list, full_name=False, return_song_id=True)

  recommended_songs_id  cosine distance
0   SOKVFRU12A58291508         0.273240
1   SOOCYNV12AB017B4B5         0.285545
2   SOFHIOQ12AB017D0D3         0.282800
3   SOEIZTI12AB0187D71         0.286567
4   SOKYXFA12A8C13C52F         0.283063
5   SONZIHI12AB018D777         0.285087
6   SOMQKSF12A8C13FA85         0.277438
7   SOQTAFZ12AB01857AC         0.255571
8   SODBCYD12AB0188CFF         0.281317
9   SOMECOI12A6D4FA233         0.279543


(array(['SOKVFRU12A58291508', 'SOOCYNV12AB017B4B5', 'SOFHIOQ12AB017D0D3',
        'SOEIZTI12AB0187D71', 'SOKYXFA12A8C13C52F', 'SONZIHI12AB018D777',
        'SOMQKSF12A8C13FA85', 'SOQTAFZ12AB01857AC', 'SODBCYD12AB0188CFF',
        'SOMECOI12A6D4FA233'], dtype='<U18'),
 array([0.27323973, 0.28554535, 0.2827996 , 0.28656662, 0.28306293,
        0.28508717, 0.2774377 , 0.25557053, 0.28131688, 0.27954292],
       dtype=float32))

In [16]:
model1.recommend(song_list=_1_same_song, full_name=True, return_song_id=True)

  recommended_songs_id  cosine distance
0   SOBXKTH12A58A788E2         0.287749
1   SOEJCDX12AB017C0C0         0.287724
2   SOBIMMV12AB01813EC         0.287687
3   SOZTCMM12A58A7A66D         0.287657
4   SOOUTYI12A6D4F8C3B         0.287612
5   SOMDBTA12AB018A788         0.287601
6   SOAXGPZ12AB0189288         0.287595
7   SOGCLDV12AB01816E0         0.287448
8   SOMXFTU12A6D4F9897         0.287392
9   SONOWNO12AC468E70B         0.287345


(array(['SOBXKTH12A58A788E2', 'SOEJCDX12AB017C0C0', 'SOBIMMV12AB01813EC',
        'SOZTCMM12A58A7A66D', 'SOOUTYI12A6D4F8C3B', 'SOMDBTA12AB018A788',
        'SOAXGPZ12AB0189288', 'SOGCLDV12AB01816E0', 'SOMXFTU12A6D4F9897',
        'SONOWNO12AC468E70B'], dtype='<U18'),
 array([0.28774863, 0.2877239 , 0.28768718, 0.28765678, 0.28761208,
        0.28760064, 0.2875949 , 0.28744793, 0.28739196, 0.2873453 ],
       dtype=float32))