In [1]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.preprocessing import normalize
import os

In [2]:
A = np.array([
    [0, 0, 100, 0, 0, 5],
    [0, 1, 0, 10, 0, 20],
    [0, 0, 20, 0, 0, 120],
    [0, 100, 100, 0, 0, 10],
    [0, 30, 12, 6, 13, 17]
])

A_norm = normalize(A, norm='l2', axis=1)

In [3]:
[ x.argsort()[::-1] for x in np.dot(A_norm, A_norm.T)]

[array([0, 3, 4, 2, 1]),
 array([1, 2, 4, 3, 0]),
 array([2, 1, 4, 0, 3]),
 array([3, 4, 0, 2, 1]),
 array([4, 3, 1, 2, 0])]

In [4]:
[ x.argsort()[::-1] for x in np.dot(np.dot(A_norm, A_norm.T) , A_norm)]

[array([2, 1, 5, 4, 3, 0]),
 array([5, 3, 1, 2, 4, 0]),
 array([5, 2, 1, 3, 4, 0]),
 array([2, 1, 5, 4, 3, 0]),
 array([5, 1, 2, 3, 4, 0])]

 # Music recommender system example

In [5]:
interactions_path = os.path.join("archive", "lastfm_user_scrobbles.csv")
titles_path = os.path.join("archive", "lastfm_artist_list.csv")

In [6]:
interactions_df = pd.read_csv(interactions_path)
titles_df = pd.read_csv(titles_path)

In [7]:
interactions_df

Unnamed: 0,user_id,artist_id,scrobbles
0,1,4562,13883
1,1,10191,11690
2,1,494,11351
3,1,6673,10300
4,1,8402,8983
...,...,...,...
92787,1892,10098,278
92788,1892,8660,263
92789,1892,3274,258
92790,1892,4240,232


In [8]:
titles_df

Unnamed: 0,artist_id,artist_name
0,1,__Max__
1,2,_Algol_
2,3,-123 Min.
3,4,-Oz-
4,5,-T De Sangre
...,...,...
17488,17489,鷺巣詩郎
17489,17490,黃立行
17490,17491,黄义达
17491,17492,黒木メイサ


In [9]:
titles_df.index = titles_df['artist_id']
title_dict = titles_df['artist_name'].to_dict()

In [10]:
rows, r_pos = np.unique(interactions_df.values[:,0], return_inverse=True)
cols, c_pos = np.unique(interactions_df.values[:,1], return_inverse=True)
interactions_sparse = sparse.csr_matrix((interactions_df.values[:,2],(r_pos,  c_pos)))

In [11]:
interactions_sparse.shape

(1892, 17493)

In [12]:
sparse.csr_matrix(
    interactions_df.pivot_table(
        values='scrobbles', 
        index='user_id', 
        columns='artist_id').fillna(0)
)

<1892x17493 sparse matrix of type '<class 'numpy.float64'>'
	with 92723 stored elements in Compressed Sparse Row format>

In [13]:
Pui = normalize(interactions_sparse, norm='l2', axis=1)
sim = Pui.T * Pui

In [14]:
sim.todense()

matrix([[0.00120405, 0.        , 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.00258841, 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.        , 0.        , 0.00383689, ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.        , 0.        , 0.        , ..., 0.0228355 , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.00827076,
         0.        ],
        [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
         0.0010429 ]])

In [24]:
list_artist = [x for x in title_dict.values() if 'ColdPlay'.upper() in x.upper()]
if len(list_artist) >= 1:
    artist = list_artist[0]
    print(artist)
else:
    print("Name is not avalible!")

Coldplay


In [16]:
# a = str(input('Enter an artist name to show recommendations: '))
a = artist
numb = titles_df[titles_df['artist_name']==a].index[0]

[title_dict[i+1] for i in sim[numb-1].toarray().argsort()[0][-20:]]

['Snow Patrol',
 'Green Day',
 'Sigur Ros',
 'Madonna',
 'The Strokes',
 'Lady Gaga',
 'Placebo',
 'Britney Spears',
 'Kings Of Leon',
 'Keane',
 'U2',
 'Arctic Monkeys',
 'Depeche Mode',
 'Oasis',
 'Paramore',
 'The Beatles',
 'The Killers',
 'Muse',
 'Radiohead',
 'Coldplay']

In [17]:
%%time
fit = Pui * Pui.T * Pui

CPU times: user 633 ms, sys: 72 ms, total: 705 ms
Wall time: 754 ms


In [18]:
fit

<1892x17493 sparse matrix of type '<class 'numpy.float64'>'
	with 18946119 stored elements in Compressed Sparse Row format>

In [19]:
client_numb = 1520
init_set = set([title_dict[i+1] for i in np.nonzero(interactions_sparse[client_numb])[1].tolist()])
predictied_set = set([title_dict[i+1] for i in fit[client_numb].toarray().argsort()[0][-70:].tolist()])

In [20]:
init_set

{'311',
 'Aj Mclean',
 'Alanis Morissette',
 'Alice In Chains',
 'Arctic Monkeys',
 'Backstreet Boys',
 'Brandon Boyd',
 'Britney Spears',
 'Cansei De Ser Sexy',
 'Daft Punk',
 'David Bowie',
 'Death Cab For Cutie',
 'Fischerspooner',
 'Franz Ferdinand',
 'Gossip',
 'Hot Hot Heat',
 'Imogen Heap',
 'Incubus',
 'Indochine',
 'Interpol',
 'Jason Castro',
 'John Frusciante',
 'John Mayer',
 'Justin Timberlake',
 'Kings Of Leon',
 'Klaxons',
 'Lady Gaga',
 'Ladytron',
 'Madonna',
 'Mcfly',
 'Metric',
 'Morrissey',
 'Muse',
 'Nick Carter',
 'Nirvana',
 'Pendulum',
 'Placebo',
 'Queens Of The Stone Age',
 'R.Sigma',
 'Radiohead',
 'Rammstein',
 'Red Hot Chili Peppers',
 'Silverchair',
 'Slipknot',
 'The Beatles',
 'The Cure',
 'The Smashing Pumpkins',
 'The Smiths',
 'The Strokes',
 'Yeah Yeah Yeahs'}

In [21]:
predictied_set - init_set

{'30 Seconds To Mars',
 'Amy Winehouse',
 'Arcade Fire',
 'Avril Lavigne',
 'Beyonce',
 'Björk',
 'Blink-182',
 'Blur',
 'Christina Aguilera',
 'Coldplay',
 'Crystal Castles',
 'Depeche Mode',
 'Elliott Smith',
 'Evanescence',
 'Florence + The Machine',
 'Foo Fighters',
 'Glee Cast',
 'Green Day',
 'Joy Division',
 'Katy Perry',
 'Ke$Ha',
 'Kylie Minogue',
 'Lily Allen',
 'Linkin Park',
 'Los Hermanos',
 'Mariah Carey',
 'Metallica',
 'Mgmt',
 'Michael Jackson',
 'Miley Cyrus',
 'My Chemical Romance',
 'Nine Inch Nails',
 'Oasis',
 'Paramore',
 'Pearl Jam',
 'Pink Floyd',
 'Rihanna',
 'Shakira',
 'Sigur Ros',
 'System Of A Down',
 'The Killers',
 'The Kooks',
 'U2'}

# Quality Metrics

**Normalized Discounted Cumulative Gain**  
This metric cares about position.  
Relevant items at the top = better score.  
Relevant items far down = lower score.  
Helps check if the system puts important things first.


**Mean Average Precision**  
Shows how well the system recommends relevant items on average.  
If relevant items appear at the top of the list — MAP is high.  
If they’re scattered or appear too late — MAP drops.

**Hit Rate**  
The simplest metric:  
Did the system manage to recommend at least one good item?  
If yes — it's a hit.  
If no — it's a miss.  
Doesn't care about position — just “did we hit or not?”

# Conclusion

The only reliable way to evaluate a recommender system is through A/B testing, as offline metrics often fail to capture real user preferences and context.