# Recommender System: Python, HetRec 2011 Last.FM Dataset
## Recommender based on user activity

Reference: 2nd Workshop on Information Heterogeneity and Fusion in Recommender Systems (HetRec 2011). I. Cantod, P Brusilovsky, T. Kuflik. Proceedings of the 5th ACM conference on Recommender systems.<br>
https://grouplens.org/datasets/hetrec-2011/<br>

In [172]:
import numpy as np
import pandas as pd

import sklearn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics import mean_squared_error
from math import sqrt

import matrix_factorization_utilities

In [34]:
artists = pd.read_csv('artists.dat',
                      delimiter='\t', low_memory=False)

In [35]:
artists.head(3)

Unnamed: 0,id,name,url,pictureURL
0,1,MALICE MIZER,http://www.last.fm/music/MALICE+MIZER,http://userserve-ak.last.fm/serve/252/10808.jpg
1,2,Diary of Dreams,http://www.last.fm/music/Diary+of+Dreams,http://userserve-ak.last.fm/serve/252/3052066.jpg
2,3,Carpathian Forest,http://www.last.fm/music/Carpathian+Forest,http://userserve-ak.last.fm/serve/252/40222717...


In [36]:
artists = artists.rename(columns = {'id':'artistID'})
artists = artists.set_index('artistID')
artists.head(3)

Unnamed: 0_level_0,name,url,pictureURL
artistID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,MALICE MIZER,http://www.last.fm/music/MALICE+MIZER,http://userserve-ak.last.fm/serve/252/10808.jpg
2,Diary of Dreams,http://www.last.fm/music/Diary+of+Dreams,http://userserve-ak.last.fm/serve/252/3052066.jpg
3,Carpathian Forest,http://www.last.fm/music/Carpathian+Forest,http://userserve-ak.last.fm/serve/252/40222717...


In [154]:
len(artists)

17632

In [37]:
user_artists = pd.read_csv('user_artists.dat',
                      delimiter='\t', low_memory=False)

In [38]:
user_artists.head()
#weight corresponds to listening count

Unnamed: 0,userID,artistID,weight
0,2,51,13883
1,2,52,11690
2,2,53,11351
3,2,54,10300
4,2,55,8983


In [39]:
user_artists_artperuser = user_artists.groupby(['userID', 'artistID']).size().groupby(['userID']).size()

In [40]:
user_artists_artperuser.head(10)

userID
2     50
3     50
4     50
5     50
6     50
7     50
8     50
9     50
10    50
11    50
dtype: int64

In [41]:
print('number userID = {}'.format(len(user_artists_artperuser)))

number userID = 1892


In [42]:
users_withenough_artists = user_artists_artperuser[user_artists_artperuser >= 5].reset_index()[['userID']]

In [43]:
print('number userID listening to at least 5 artists = {}'.format(len(users_withenough_artists)))

number userID listening to at least 5 artists = 1877


In [44]:
users_withenough_artists.head(5)

Unnamed: 0,userID
0,2
1,3
2,4
3,5
4,6


In [45]:
# combine user_artists with users_withenough_artists
sel_user_art = pd.merge(users_withenough_artists, user_artists,
                       how='inner', left_on='userID', right_on='userID')

In [46]:
sel_user_art.head(5)

Unnamed: 0,userID,artistID,weight
0,2,51,13883
1,2,52,11690
2,2,53,11351
3,2,54,10300
4,2,55,8983


In [117]:
w = sel_user_art[['weight']].values.astype(float)
min_max_scaler = preprocessing.MinMaxScaler()
w_scaled = min_max_scaler.fit_transform(w)
w_norm = pd.DataFrame(w_scaled)
w_norm.columns = ['norm_weight']
w_norm.head(5)

Unnamed: 0,norm_weight
0,0.03936
1,0.033142
2,0.032181
3,0.029201
4,0.025467


In [120]:
# combine sel_user_art and w_norm
sel_user_art_norm = pd.merge(sel_user_art, w_norm,
                       how='inner', left_index=True, right_index=True)
sel_user_art_norm = sel_user_art_norm.drop(['weight'], axis=1)
sel_user_art_norm.head(5)

Unnamed: 0,userID,artistID,norm_weight
0,2,51,0.03936
1,2,52,0.033142
2,2,53,0.032181
3,2,54,0.029201
4,2,55,0.025467


In [121]:
sel_userart_norm_train, sel_userart_norm_test = train_test_split(sel_user_art_norm,
                                                        stratify=sel_user_art_norm['userID'],
                                                        test_size=0.3,
                                                        random_state=42)

In [122]:
n_users = sel_user_art_norm.userID.unique().shape[0]
n_artists = sel_user_art_norm.artistID.unique().shape[0]
print('Number of users = {} and number of artists = {}'.format(n_users, n_artists))

Number of users = 1877 and number of artists = 17617


In [189]:
p_sel_user_art = pd.pivot_table(sel_user_art_norm, index='userID',
                                columns='artistID', aggfunc=np.max)
p_sel_user_art.head(5)

Unnamed: 0_level_0,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight
artistID,1,2,3,4,5,6,7,8,9,10,...,18736,18737,18738,18739,18740,18741,18742,18743,18744,18745
userID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,


In [186]:
p_sel_userart_train = pd.pivot_table(sel_userart_norm_train, index='userID',
                                columns='artistID', aggfunc=np.max)
p_sel_userart_test = pd.pivot_table(sel_userart_norm_test, index='userID',
                                columns='artistID', aggfunc=np.max)

In [195]:
p_sel_userart_train.head(5)

Unnamed: 0_level_0,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight
artistID,1,2,3,4,5,6,7,8,9,10,...,18727,18730,18734,18735,18737,18740,18741,18743,18744,18745
userID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,


In [196]:
p_sel_userart_test.head(5)

Unnamed: 0_level_0,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight
artistID,1,2,5,6,7,8,9,10,11,12,...,18714,18717,18724,18725,18728,18729,18736,18738,18739,18742
userID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,,,,,,...,,,,,,,,,,
6,,,,,,,,,,,...,,,,,,,,,,


In [188]:
p_userart_train = p_sel_userart_train.fillna(0)
p_userart_test = p_sel_userart_test.fillna(0)

In [193]:
p_userart_train.head(15)

Unnamed: 0_level_0,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight
artistID,1,2,3,4,5,6,7,8,9,10,...,18727,18730,18734,18735,18737,18740,18741,18743,18744,18745
userID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.001661,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [130]:
mean_p_train = p_sel_userart_train.mean(axis=1)
mean_p_train.head(15)

userID
2     2.572552e-05
3     3.546764e-06
4     3.974884e-06
5     1.772583e-06
6     1.234039e-07
7     2.172408e-05
8     4.666984e-06
9     3.617652e-06
10    3.834707e-06
11    6.591127e-06
12    3.078249e-05
13    2.795558e-08
14    4.944743e-06
15    2.794560e-06
16    1.963480e-06
dtype: float64

In [131]:
user_sim = pairwise_distances(p_sel_userart_train, metric='cosine')
art_sim = pairwise_distances(p_sel_userart_train.T, metric='cosine')

In [59]:
#def pred(weight, similarity, type='user'):
#    if type == 'user':
#        mean_user_weight = weight.mean(axis=1)
#        weight_diff = (weight - mean_user_weight[:, np.newaxis])
#        pred = mean_user_weight[:, np.newaxis] + similarity.dot(weight_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
#    elif type == 'artist':
#        pred = weight.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
#    return pred

In [132]:
def pred(weight, similarity, type='user'):
    if type == 'user':
        mean_user_weight = weight.mean(axis=1)
        weight_diff = weight.sub(mean_user_weight, axis=0)
        pred = pd.DataFrame(similarity.dot(weight_diff) / np.array([np.abs(similarity).sum(axis=1)]).T).add(mean_user_weight, axis=0)
        #pred = pred.as_matrix()
    elif type == 'artist':
        pred = weight.dot(similarity) / np.array([np.abs(similarity).sum(axis=1)])
    return pred.as_matrix()

In [133]:
user_prediction = pred(p_sel_userart_train, user_sim, type='user')

In [76]:
artist_prediction = pred(p_sel_userart_train, art_sim, type='artist')

ValueError: Unable to coerce to DataFrame, shape must be (1877, 14199): given (1, 14199)

In [134]:
def rmse(prediction, ground_truth):
    prediction = prediction[ground_truth.nonzero()].flatten()
    ground_truth = ground_truth[ground_truth.nonzero()].flatten()
    mse = mean_squared_error(np.nan_to_num(prediction),
                             np.nan_to_num(ground_truth))
    return sqrt(mse)

In [135]:
m_sel_user_art_test = p_sel_userart_test.as_matrix()

In [136]:
print('User-based CF RMSE: ' + str(rmse(user_prediction, m_sel_user_art_test)))

User-based CF RMSE: 0.014135628133673339


In [146]:
# due to time it takes to run, output was saved to csv, and loaded for subsequent analysis

'''
U, M = matrix_factorization_utilities.low_rank_matrix_factorization(p_sel_user_art.as_matrix(),
                                                                   num_features=5,
                                                                   regularization_amount=0.1)
pred_weight = np.matmul(U, M)
'''

Optimization terminated successfully.
         Current function value: 2.212841
         Iterations: 914
         Function evaluations: 1276
         Gradient evaluations: 1276


In [175]:
'''
pred_weight_results = pd.DataFrame(index=p_sel_user_art.index,
                             columns=p_sel_user_art.columns,
                             data=pred_weight)
pred_weight_results.to_csv('pred_weight_results.csv')
pred_weight_results.head(5)
'''

In [185]:
pred_weight_df = pd.read_csv('pred_weight_results.csv',
                             delimiter=',', header=[0,1],
                             index_col=0, low_memory=False)

pred_weight_df.head(5)

Unnamed: 0_level_0,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight
artistID,1,2,3,4,5,6,7,8,9,10,...,18736,18737,18738,18739,18740,18741,18742,18743,18744,18745
userID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,1.304602e-05,6.39131e-05,1.243568e-06,3.679395e-06,1.425435e-05,0.0001019868,0.001303,0.000334,0.0001564882,9.339306e-05,...,6.875998e-06,4.377771e-06,3.187393e-06,2.960281e-06,2.498227e-06,2.34943e-06,2.29461e-06,2.23979e-06,2.231958e-06,1.610477e-05
3,1.482237e-07,9.258858e-07,1.919372e-08,5.001578e-08,1.79908e-07,1.234436e-06,3.3e-05,4e-06,2.236097e-06,9.321376e-07,...,9.295518e-08,5.918217e-08,4.30897e-08,4.001943e-08,3.377301e-08,3.176145e-08,3.102035e-08,3.027925e-08,3.017338e-08,1.864148e-07
4,4.846747e-06,1.193999e-05,4.260899e-07,5.279614e-07,2.080239e-06,1.551336e-05,0.000122,5.2e-05,6.013529e-05,1.484893e-05,...,1.710689e-06,1.089152e-06,7.929958e-07,7.364924e-07,6.215373e-07,5.845178e-07,5.708791e-07,5.572403e-07,5.552919e-07,2.016107e-06
5,1.638916e-06,8.456203e-06,1.955225e-07,4.92914e-07,1.616318e-06,1.22016e-05,0.000267,3.6e-05,2.740684e-05,1.015651e-05,...,8.813397e-07,5.611263e-07,4.085481e-07,3.794378e-07,3.202134e-07,3.011411e-07,2.941145e-07,2.870879e-07,2.860841e-07,1.757022e-06
6,4.261186e-08,2.746639e-07,5.54462e-09,1.486421e-08,5.581256e-08,4.05324e-07,1e-05,1e-06,5.969183e-07,3.233597e-07,...,2.694114e-08,1.715273e-08,1.248866e-08,1.159881e-08,9.78841e-09,9.205401e-09,8.990608e-09,8.775816e-09,8.745131e-09,6.104573e-08


In [148]:
print("Enter a user_id to get recommendations:")
user_id_to_search = int(input())

Enter a user_id to get recommendations:
11


In [149]:
print("Sample artists listened to by user_id {}:".format(user_id_to_search))

art_userID = sel_user_art_norm[sel_user_art_norm['userID'] == user_id_to_search]
art_userID = art_userID.join(artists, on='artistID')

art_userID.head(5)

Sample artists listened to by user_id 11:


Unnamed: 0,userID,artistID,norm_weight,name,url,pictureURL
450,11,67,0.000646,Madonna,http://www.last.fm/music/Madonna,http://userserve-ak.last.fm/serve/252/340387.jpg
451,11,89,0.009592,Lady Gaga,http://www.last.fm/music/Lady+Gaga,http://userserve-ak.last.fm/serve/252/47390093...
452,11,157,0.002852,Michael Jackson,http://www.last.fm/music/Michael+Jackson,http://userserve-ak.last.fm/serve/252/30501527...
453,11,230,0.00093,Green Day,http://www.last.fm/music/Green+Day,http://userserve-ak.last.fm/serve/252/15291249...
454,11,234,0.001046,Nirvana,http://www.last.fm/music/Nirvana,http://userserve-ak.last.fm/serve/252/380269.jpg


In [153]:
user_pref = pred_weight[user_id_to_search - 1]
user_pref.shape

(17617,)

In [163]:
user_pref_df = pd.DataFrame(user_pref)
user_pref_df.columns = ['pred_norm_weight']
user_pref_df.index = user_pref_df.index + 1
user_pref_df.head(5)

Unnamed: 0,pred_norm_weight
1,4.1e-05
2,0.000352
3,5e-06
4,2e-05
5,7.3e-05


In [165]:
user_pref_art = pd.merge(artists, user_pref_df, how='inner', left_index=True, right_index=True)
user_pref_art.head(5)

Unnamed: 0,name,url,pictureURL,pred_norm_weight
1,MALICE MIZER,http://www.last.fm/music/MALICE+MIZER,http://userserve-ak.last.fm/serve/252/10808.jpg,4.1e-05
2,Diary of Dreams,http://www.last.fm/music/Diary+of+Dreams,http://userserve-ak.last.fm/serve/252/3052066.jpg,0.000352
3,Carpathian Forest,http://www.last.fm/music/Carpathian+Forest,http://userserve-ak.last.fm/serve/252/40222717...,5e-06
4,Moi dix Mois,http://www.last.fm/music/Moi+dix+Mois,http://userserve-ak.last.fm/serve/252/54697835...,2e-05
5,Bella Morte,http://www.last.fm/music/Bella+Morte,http://userserve-ak.last.fm/serve/252/14789013...,7.3e-05


In [171]:
print("Artists to recommend:")
already_listening = sel_user_art_norm.loc[sel_user_art_norm['userID'] == user_id_to_search,
                                          ['artistID']]
recommend_art = user_pref_art[user_pref_art.index.isin(already_listening) == False]
recommend_art = recommend_art.sort_values(by=['pred_norm_weight'], ascending=False)

recommend_art.head(5)

Artists to recommend:


Unnamed: 0,name,url,pictureURL,pred_norm_weight
84,Cut Copy,http://www.last.fm/music/Cut+Copy,http://userserve-ak.last.fm/serve/252/9539517.jpg,0.129583
182,Keane,http://www.last.fm/music/Keane,http://userserve-ak.last.fm/serve/252/8891209.jpg,0.091227
287,Monica,http://www.last.fm/music/Monica,http://userserve-ak.last.fm/serve/252/43749117...,0.088627
373,Jay Park,http://www.last.fm/music/Jay+Park,http://userserve-ak.last.fm/serve/252/54124413...,0.081956
702,Ester Dean,http://www.last.fm/music/Ester+Dean,http://userserve-ak.last.fm/serve/252/61470597...,0.081144


In [190]:
UT, AT = matrix_factorization_utilities.low_rank_matrix_factorization(p_sel_userart_train.as_matrix(),
                                                                   num_features=5,
                                                                   regularization_amount=0.1)
pred_weight_train = np.matmul(UT, AT)

pred_weight_train_results = pd.DataFrame(index=p_sel_userart_train.index,
                             columns=p_sel_userart_train.columns,
                             data=pred_weight_train)
pred_weight_train_results.to_csv('pred_weight_train_results.csv')
pred_weight_train_results.head(5)

Optimization terminated successfully.
         Current function value: 1.372669
         Iterations: 505
         Function evaluations: 989
         Gradient evaluations: 989


Unnamed: 0_level_0,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight,norm_weight
artistID,1,2,3,4,5,6,7,8,9,10,...,18727,18730,18734,18735,18737,18740,18741,18743,18744,18745
userID,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2,1.263913e-05,6.427055e-05,9.099519e-07,4.420276e-06,1.494101e-05,0.000108823,0.003123,0.0003156652,0.0002854012,0.0001312676,...,3.486624e-07,3.08413e-07,6.25637e-06,5.425242e-06,3.393699e-06,1.936146e-06,1.81774e-06,1.735206e-06,1.726923e-06,1.628512e-05
3,1.182131e-07,3.73973e-07,6.320767e-09,2.532622e-08,1.002817e-07,6.950003e-07,2.3e-05,1.558938e-06,1.027907e-06,4.335312e-07,...,2.481867e-09,2.205567e-09,4.112904e-08,3.566935e-08,2.23089e-08,1.273287e-08,1.196028e-08,1.141651e-08,1.136978e-08,8.300818e-08
4,9.784468e-06,1.223711e-05,2.274089e-07,5.119218e-07,4.251732e-06,2.169366e-05,0.000958,3.609741e-05,3.411083e-05,1.122506e-05,...,9.689708e-08,8.662184e-08,1.687577e-06,1.463633e-06,9.153488e-07,5.224968e-07,4.909416e-07,4.687299e-07,4.671291e-07,2.78111e-06
5,1.967803e-06,4.301147e-06,7.249136e-08,2.594597e-07,1.216079e-06,7.807103e-06,0.000268,1.820773e-05,1.337631e-05,5.676878e-06,...,2.939153e-08,2.617189e-08,4.756643e-07,4.125475e-07,2.580104e-07,1.47251e-07,1.383185e-07,1.320522e-07,1.31499e-07,1.063216e-06
6,3.851239e-08,1.265874e-07,2.147753e-09,8.548562e-09,3.346405e-08,2.312995e-07,7e-06,5.506446e-07,3.352075e-07,1.402174e-07,...,8.420353e-10,7.484536e-10,1.379874e-08,1.196685e-08,7.484626e-09,4.271588e-09,4.012456e-09,3.830189e-09,3.814315e-09,2.749638e-08


In [192]:
mf_rmse = matrix_factorization_utilities.RMSE(p_sel_user_art.as_matrix(),
                                              pred_weight_train_results.as_matrix())
print('User-based MF RMSE: {}'.format(mf_rmse))

ValueError: operands could not be broadcast together with shapes (1877,17617) (1877,14199) 