In [2]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics.pairwise import cosine_similarity

from sklearn.metrics import mean_squared_error

In [3]:
df = pd.read_csv('preprocessed_original.csv', header=None) #There are no headers in the data file
df_user = pd.read_csv('preprocessed_user.csv', header=None)
df_copy = df.copy(deep=True)

In [15]:
rows, columns = df.shape
print("No of rows = ", rows)
print("No of columns = ", columns)

No of rows =  11504
No of columns =  16


In [16]:
df.columns=['product_id','product_name','category','discounted_price','actual_price','discount_percentage','rating','rating_count','about_product','user_id','user_name','review_id','review_title','review_content','img_link','product_link']
df_user.columns=['user_id','user_name','product_id','rating']

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11504 entries, 0 to 11503
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   product_id           11504 non-null  object
 1   product_name         11504 non-null  object
 2   category             11504 non-null  object
 3   discounted_price     11504 non-null  object
 4   actual_price         11504 non-null  object
 5   discount_percentage  11504 non-null  object
 6   rating               11504 non-null  object
 7   rating_count         11504 non-null  object
 8   about_product        11504 non-null  object
 9   user_id              11504 non-null  object
 10  user_name            11504 non-null  object
 11  review_id            11504 non-null  object
 12  review_title         11504 non-null  object
 13  review_content       11504 non-null  object
 14  img_link             11504 non-null  object
 15  product_link         11504 non-null  object
dtypes: o

In [18]:
df.isna().sum()

product_id             0
product_name           0
category               0
discounted_price       0
actual_price           0
discount_percentage    0
rating                 0
rating_count           0
about_product          0
user_id                0
user_name              0
review_id              0
review_title           0
review_content         0
img_link               0
product_link           0
dtype: int64

In [19]:
df['rating'].describe()

count     11504
unique       26
top         4.1
freq       1913
Name: rating, dtype: object

In [20]:
df = df.drop(df.index[0])
df_user = df_user.drop(df_user.index[0])
print(df_user)

                            user_id         user_name  product_id rating
1      AG3D6O4STAQKAY2UVGEUV46KN35Q             Manav  B07JW9H4J1    4.2
2      AHMY5CWJMMK5BJRBBSNLYT3ONILA      Adarsh gupta  B07JW9H4J1    4.2
3      AHCTC6ULH4XB6YHDY6PCH2R772LQ           Sundeep  B07JW9H4J1    4.2
4      AGYHHIERNXKA6P5T7CZLXKVPT7IQ    S.Sayeed Ahmed  B07JW9H4J1    4.2
5      AG4OGOFWXJZTQ2HKYIOCOY3KXF2Q    jaspreet singh  B07JW9H4J1    4.2
...                             ...               ...         ...    ...
11499  AHXCDNSXAESERITAFELQABFVNLCA           PARDEEP  B01486F4G6    4.3
11500  AGRZD6CHLCUNOLMMIMIHUCG7PIFA  Anindya Pramanik  B01486F4G6    4.3
11501  AFQZVGSOSOJHKFQQMCEI4725QEKQ       Vikas Singh  B01486F4G6    4.3
11502  AEALVGXXIP46OZVXKRUXSDWZJMEA   Harshada Pimple  B01486F4G6    4.3
11503  AGEFL3AY7YXEFZA4ZJU3LP7K7OJQ            Saw a.  B01486F4G6    4.3

[11503 rows x 4 columns]


In [21]:
print(df.columns)

Index(['product_id', 'product_name', 'category', 'discounted_price',
       'actual_price', 'discount_percentage', 'rating', 'rating_count',
       'about_product', 'user_id', 'user_name', 'review_id', 'review_title',
       'review_content', 'img_link', 'product_link'],
      dtype='object')


In [22]:
# df = df.drop(df.index[0])
df['rating'] = df['rating'].str.replace(',', '').astype(float)


In [23]:
print(df['rating'])

1        4.2
2        4.2
3        4.2
4        4.2
5        4.2
        ... 
11499    4.3
11500    4.3
11501    4.3
11502    4.3
11503    4.3
Name: rating, Length: 11503, dtype: float64


In [24]:
df['rating_count'] = df['rating_count'].str.replace(',', '').astype(int)
# Calculate the average rating for each product 
average_rating = df.groupby('product_id')['rating'].mean()

#Calculate the count of ratings for each product
count_rating = df.groupby('product_id')['rating_count'].mean()

#Create a dataframe with calculated average and count of ratings
final_rating = pd.DataFrame({'avg_rating':average_rating, 'rating_count':count_rating})

#Sort the dataframe by average of ratings
final_rating = final_rating.sort_values(by='avg_rating',ascending=False)

final_rating.head()

Unnamed: 0_level_0,avg_rating,rating_count
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1
B0BP7XLX48,5.0,5.0
B0BQRJ3C47,5.0,0.0
B09ZHCJDP1,5.0,23.0
B0BR4F878Q,4.8,53803.0
B0B53DS4TF,4.8,3964.0


In [25]:
#defining a function to get the top n products based on highest average rating and minimum interactions
def top_n_products(final_rating, n, min_interaction):
    
    #Finding products with minimum number of interactions
    recommendations = final_rating[final_rating['rating_count']>min_interaction]
    
    #Sorting values w.r.t average rating 
    recommendations = recommendations.sort_values('avg_rating',ascending=False)
    
    return recommendations.index[:n]

In [26]:
list(top_n_products(final_rating, 5, 50))

['B0BR4F878Q', 'B0B53DS4TF', 'B0BP89YBC1', 'B0B23LW7NV', 'B09WN3SRC7']

In [27]:
df_user['rating'] = df_user['rating'].str.replace(',', '').astype(float)

In [28]:
df_grouped = df_user.groupby(['user_id', 'product_id'])['rating'].mean().reset_index()

#Creating the interaction matrix of products and users based on ratings and replacing NaN value with 0
final_ratings_matrix = df_grouped.pivot(index = 'user_id', columns ='product_id', values = 'rating').fillna(0)
print('Shape of final_ratings_matrix: ', final_ratings_matrix.shape)

#Finding the number of non-zero entries in the interaction matrix 
given_num_of_ratings = np.count_nonzero(final_ratings_matrix)
print('given_num_of_ratings = ', given_num_of_ratings)

#Finding the possible number of ratings as per the number of users and products
possible_num_of_ratings = final_ratings_matrix.shape[0] * final_ratings_matrix.shape[1]
print('possible_num_of_ratings = ', possible_num_of_ratings)

#Density of ratings
density = (given_num_of_ratings/possible_num_of_ratings)
density *= 100
print ('density: {:4.2f}%'.format(density))

final_ratings_matrix.head()

Shape of final_ratings_matrix:  (9050, 1351)
given_num_of_ratings =  10604
possible_num_of_ratings =  12226550
density: 0.09%


product_id,B002PD61Y4,B002SZEOLG,B003B00484,B003L62T7W,B004IO5BMQ,B005FYNT3G,B005LJQMCK,B005LJQMZC,B006LW0WDQ,B0073QGKAS,...,B0BP18W8TM,B0BP7XLX48,B0BP89YBC1,B0BPBG712X,B0BPBXNQQT,B0BPCJM7TB,B0BPJBTB3F,B0BQ3K23Y1,B0BQRJ3C47,B0BR4F878Q
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AE22E2AXODSPNK3EBIHNGYS5LOSA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE22MK2NXQD3ZARLIOL3SLD4GU6A,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE22Y3KIS7SE6LI3HE2VS6WWPU4Q,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE23RS3W7GZO7LHYKJU6KSKVM4MQ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
AE23WGYTUMB5R6JJMBU4V43JIW7Q,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [29]:
final_ratings_matrix['user_index'] = np.arange(0, final_ratings_matrix.shape[0])
final_ratings_matrix.set_index(['user_index'], inplace=True)

# Actual ratings given by users
final_ratings_matrix.head()

product_id,B002PD61Y4,B002SZEOLG,B003B00484,B003L62T7W,B004IO5BMQ,B005FYNT3G,B005LJQMCK,B005LJQMZC,B006LW0WDQ,B0073QGKAS,...,B0BP18W8TM,B0BP7XLX48,B0BP89YBC1,B0BPBG712X,B0BPBXNQQT,B0BPCJM7TB,B0BPJBTB3F,B0BQ3K23Y1,B0BQRJ3C47,B0BR4F878Q
user_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
# defining a function to get similar users
def similar_users(user_index, interactions_matrix):
    similarity = []
    for user in range(0, interactions_matrix.shape[0]): #  .shape[0] gives number of rows
        
        #finding cosine similarity between the user_id and each user
        sim = cosine_similarity([interactions_matrix.loc[user_index]], [interactions_matrix.loc[user]])
        
        #Appending the user and the corresponding similarity score with user_id as a tuple
        similarity.append((user,sim))
        
    similarity.sort(key=lambda x: x[1], reverse=True)
    most_similar_users = [tup[0] for tup in similarity] #Extract the user from each tuple in the sorted list
    similarity_score = [tup[1] for tup in similarity] ##Extracting the similarity score from each tuple in the sorted list
   
    #Remove the original user and its similarity score and keep only other similar users 
    most_similar_users.remove(user_index)
    similarity_score.remove(similarity_score[0])
       
    return most_similar_users, similarity_score

In [31]:
similar = similar_users(3919,final_ratings_matrix)[0][0:10]
similar

[3, 1292, 1598, 2834, 5101, 6731, 8439, 0, 1, 2]

In [32]:
similar = similar_users(121, final_ratings_matrix)[0][0:10]
similar

[166, 1454, 1463, 3654, 5597, 7180, 7201, 0, 1, 2]

In [33]:
#Print the similarity score
similar_users(1521,final_ratings_matrix)[1][0:10]

[array([[1.]]),
 array([[1.]]),
 array([[1.]]),
 array([[1.]]),
 array([[1.]]),
 array([[1.]]),
 array([[1.]]),
 array([[0.]]),
 array([[0.]]),
 array([[0.]])]

In [34]:
# defining the recommendations function to get recommendations by using the similar users' preferences
def recommendations(user_index, num_of_products, interactions_matrix):
    
    #Saving similar users using the function similar_users defined above
    most_similar_users = similar_users(user_index, interactions_matrix)[0]
    
    #Finding product IDs with which the user_id has interacted
    prod_ids = set(list(interactions_matrix.columns[np.where(interactions_matrix.loc[user_index] > 0)]))
    recommendations = []
    
    observed_interactions = prod_ids.copy()
    for similar_user in most_similar_users:
        if len(recommendations) < num_of_products:
            
            #Finding 'n' products which have been rated by similar users but not by the user_id
            similar_user_prod_ids = set(list(interactions_matrix.columns[np.where(interactions_matrix.loc[similar_user] > 0)]))
            recommendations.extend(list(similar_user_prod_ids.difference(observed_interactions)))
            observed_interactions = observed_interactions.union(similar_user_prod_ids)
        else:
            break
    
    return recommendations[:num_of_products]

In [35]:
recommendations(3,5,final_ratings_matrix)

['B0B31FR4Y2', 'B0B8XNPQPN', 'B07ZR4S1G4', 'B09163Q5CD', 'B09XJ1LM7R']

In [36]:
from scipy.sparse import csr_matrix, find
import numpy as np
from scipy.sparse.linalg import svds

# Assuming final_ratings_matrix.columns are the actual product IDs

# Create a sparse matrix
final_ratings_sparse = csr_matrix(final_ratings_matrix.values)

# Specify the target column index
target_col_index = 335

# Find the indices and values of non-zero elements in the sparse matrix for the given column index
_, col_indices, values = find(final_ratings_sparse[:, target_col_index])

# Create a set to store unique product IDs
unique_product_ids = set()

# Iterate through the column indices and retrieve product IDs
for col_index in col_indices:
    product_id = final_ratings_matrix.columns[col_index]
    unique_product_ids.add(product_id)

# Print the unique product IDs and their associated values
for product_id in unique_product_ids:
    value = values[np.where(col_indices == col_index)[0][0]]  # Get the corresponding value
    print("Product ID:", product_id, "Value:", value)


Product ID: B002PD61Y4 Value: 3.8


In [37]:
# Singular Value Decomposition
U, s, Vt = svds(final_ratings_sparse, k = 1000) # here k is the number of latent features

# Construct diagonal array in SVD
sigma = np.diag(s)

In [38]:
U.shape

(9050, 1000)

In [39]:
sigma.shape

(1000, 1000)

In [40]:
Vt.shape

(1000, 1351)

In [41]:
all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) 

# Predicted ratings
preds_df = pd.DataFrame(abs(all_user_predicted_ratings), columns = final_ratings_matrix.columns)
print(preds_df)
preds_matrix = csr_matrix(preds_df.values)

product_id    B002PD61Y4    B002SZEOLG    B003B00484    B003L62T7W  \
0           7.647080e-16  3.073377e-18  4.728362e-16  2.490724e-16   
1           7.803771e-16  3.047028e-16  3.748689e-16  9.711719e-17   
2           9.729586e-17  5.080223e-16  9.025823e-16  1.321304e-16   
3           3.766553e-16  7.053617e-17  2.138019e-17  1.017895e-15   
4           5.551115e-17  1.225628e-16  1.416940e-16  2.066051e-16   
...                  ...           ...           ...           ...   
9045        7.005725e-17  4.877418e-17  5.322249e-18  1.095866e-16   
9046        3.949844e-16  2.658803e-17  3.487079e-17  3.330669e-16   
9047        2.988147e-16  2.257160e-16  3.841079e-17  6.553405e-16   
9048        2.744861e-16  8.838765e-17  2.172505e-16  7.771561e-16   
9049        5.307415e-16  9.115886e-17  7.286609e-18  7.857530e-17   

product_id    B004IO5BMQ    B005FYNT3G    B005LJQMCK    B005LJQMZC  \
0           8.152153e-17  5.960711e-17  8.966316e-17  8.966316e-17   
1           4.27075

In [42]:
import numpy as np

def recommend_items(user_index, interactions_matrix, preds_matrix, num_recommendations):
    
    # Get the user's ratings from the actual and predicted interaction matrices
    user_ratings = interactions_matrix[user_index,:].toarray().reshape(-1)
    user_predictions = preds_matrix[user_index,:].toarray().reshape(-1)

    #Creating a dataframe with actual and predicted ratings columns
    temp = pd.DataFrame({'user_ratings': user_ratings, 'user_predictions': user_predictions})
    temp['Recommended Products'] = np.arange(len(user_ratings))
    temp = temp.set_index('Recommended Products')
    
    #Filtering the dataframe where actual ratings are 0 which implies that the user has not interacted with that product
    temp = temp.loc[temp.user_ratings == 0]   
    
    
    #Recommending products with top predicted ratings
    temp = temp.sort_values('user_predictions',ascending=False)#Sort the dataframe by user_predictions in descending order
    print('\nBelow are the recommended products for user(user_id = {}):\n'.format(user_index))
    print(temp['user_predictions'].head(num_recommendations))


In [43]:
#Enter 'user index' and 'num_recommendations' for the user
recommend_items(50,final_ratings_sparse,preds_matrix,5)



Below are the recommended products for user(user_id = 50):

Recommended Products
1113    5.986510e-15
1112    3.443426e-15
532     3.132309e-15
1322    2.509412e-15
1134    2.432008e-15
Name: user_predictions, dtype: float64


In [44]:
recommend_items(100,final_ratings_sparse,preds_matrix,10)


Below are the recommended products for user(user_id = 100):

Recommended Products
873     5.575829e-15
1202    4.890267e-15
384     4.352145e-15
627     3.118529e-15
612     3.019759e-15
799     2.910677e-15
954     2.816830e-15
782     2.766974e-15
957     2.725042e-15
606     2.640723e-15
Name: user_predictions, dtype: float64


In [45]:
final_ratings_matrix['user_index'] = np.arange(0, final_ratings_matrix.shape[0])
final_ratings_matrix.set_index(['user_index'], inplace=True)

# Actual ratings given by users
final_ratings_matrix.head()

product_id,B002PD61Y4,B002SZEOLG,B003B00484,B003L62T7W,B004IO5BMQ,B005FYNT3G,B005LJQMCK,B005LJQMZC,B006LW0WDQ,B0073QGKAS,...,B0BP18W8TM,B0BP7XLX48,B0BP89YBC1,B0BPBG712X,B0BPBXNQQT,B0BPCJM7TB,B0BPJBTB3F,B0BQ3K23Y1,B0BQRJ3C47,B0BR4F878Q
user_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [46]:
average_rating = final_ratings_matrix.mean()
average_rating.head()

product_id
B002PD61Y4    0.003624
B002SZEOLG    0.003713
B003B00484    0.003801
B003L62T7W    0.003801
B004IO5BMQ    0.003978
dtype: float64

In [47]:
preds_df.head()

product_id,B002PD61Y4,B002SZEOLG,B003B00484,B003L62T7W,B004IO5BMQ,B005FYNT3G,B005LJQMCK,B005LJQMZC,B006LW0WDQ,B0073QGKAS,...,B0BP18W8TM,B0BP7XLX48,B0BP89YBC1,B0BPBG712X,B0BPBXNQQT,B0BPCJM7TB,B0BPJBTB3F,B0BQ3K23Y1,B0BQRJ3C47,B0BR4F878Q
0,7.64708e-16,3.0733770000000002e-18,4.728362e-16,2.490724e-16,8.152153000000001e-17,5.960711e-17,8.966316000000001e-17,8.966316000000001e-17,1.92349e-16,2.440592e-16,...,6.765143e-16,2.142311e-15,3.13321e-16,2.693522e-16,4.363269e-16,5.961601e-16,2.9001899999999998e-30,2.400626e-16,4.311003e-30,7.296132e-30
1,7.803771e-16,3.047028e-16,3.748689e-16,9.711719000000001e-17,4.27075e-17,2.149668e-17,3.050311e-16,3.69682e-16,1.159107e-16,1.34256e-17,...,3.9262350000000004e-17,7.595783e-17,3.8716950000000004e-17,2.7709460000000003e-17,1.016213e-16,2.1021340000000003e-17,1.310291e-16,3.999253e-16,8.225247e-16,6.500958e-16
2,9.729586e-17,5.080223e-16,9.025823e-16,1.321304e-16,1.695482e-16,1.083257e-16,5.07071e-16,4.748775e-16,1.022147e-16,9.728101e-17,...,1.015942e-16,4.379372e-17,2.298306e-16,1.040224e-16,2.472856e-18,1.237739e-16,4.884981e-16,1.988932e-16,3.663736e-16,3.053113e-16
3,3.766553e-16,7.053617000000001e-17,2.138019e-17,1.017895e-15,2.31843e-16,5.231437e-16,3.095299e-17,7.791213e-17,5.385746e-16,3.150218e-16,...,5.179229e-16,2.853069e-18,2.072383e-17,2.856074e-16,5.823995e-16,1.040437e-17,3.9246910000000005e-17,1.578594e-17,8.472485e-17,3.392995e-17
4,5.5511150000000004e-17,1.225628e-16,1.41694e-16,2.066051e-16,5.572323e-16,7.121379e-16,4.7067230000000006e-17,1.6648960000000002e-17,6.635315e-16,2.267597e-16,...,3.957323e-16,1.871494e-16,2.2252850000000003e-17,2.104412e-16,6.684183e-16,3.332302e-16,1.943532e-16,3.2275690000000003e-17,1.018432e-16,2.463411e-16


In [48]:
avg_preds=preds_df.mean()
avg_preds.head()

product_id
B002PD61Y4    0.003624
B002SZEOLG    0.003713
B003B00484    0.005303
B003L62T7W    0.003801
B004IO5BMQ    0.003978
dtype: float64

In [49]:
rmse_df = pd.concat([average_rating, avg_preds], axis=1)

rmse_df.columns = ['Avg_actual_ratings', 'Avg_predicted_ratings']

rmse_df.head()

Unnamed: 0_level_0,Avg_actual_ratings,Avg_predicted_ratings
product_id,Unnamed: 1_level_1,Unnamed: 2_level_1
B002PD61Y4,0.003624,0.003624
B002SZEOLG,0.003713,0.003713
B003B00484,0.003801,0.005303
B003L62T7W,0.003801,0.003801
B004IO5BMQ,0.003978,0.003978


In [50]:
RMSE=mean_squared_error(rmse_df['Avg_actual_ratings'], rmse_df['Avg_predicted_ratings'], squared=False)
print(f'RMSE SVD Model = {RMSE} \n')

RMSE SVD Model = 0.0009413941585254984 

