In [2]:
#Data processing and computations
import numpy as np
import pandas as pd

import sklearn  #calculating Similartiy scores
import matplotlib.pyplot as plt 
import seaborn as sns # visualisation

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

ratings = pd.read_csv("/content/ratings.csv")
ratings.head()

products = pd.read_csv("/content/products.csv")
products.head()

n_ratings = len(ratings)
n_products = len(ratings['productId'].unique())
n_users = len(ratings['userId'].unique())

print(f"Number of ratings: {n_ratings}")
print(f"Number of unique productId's: {n_products}")
print(f"Number of unique users: {n_users}")
print(f"Average ratings per user: {round(n_ratings/n_users, 2)}")
print(f"Average ratings per product: {round(n_ratings/n_products, 2)}")

user_freq = ratings[['userId', 'productId']].groupby('userId').count().reset_index()
user_freq.columns = ['userId', 'n_ratings']
user_freq.head()

"""
Find Lowest and Highest rated products
"""
mean_rating = ratings.groupby('productId')[['rating']].mean()
"""
Lowest rated products
"""
lowest_rated = mean_rating['rating'].idxmin()
products.loc[products['productId'] == lowest_rated]
"""
Highest rated products
"""
highest_rated = mean_rating['rating'].idxmax()
products.loc[products['productId'] == highest_rated]
"""
show number of people who rated products rated product highest
"""
ratings[ratings['productId']==highest_rated]
"""
show number of people who rated products rated product lowest
"""
ratings[ratings['productId']==lowest_rated]
"""
the above products has very low dataset. We will use bayesian average
"""
product_stats = ratings.groupby('productId')[['rating']].agg(['count', 'mean'])
product_stats.columns = product_stats.columns.droplevel()

"""
Now, we create user-item matrix using scipy csr matrix
"""
from scipy.sparse import csr_matrix

def create_matrix(df):
	
	N = len(df['userId'].unique())
	M = len(df['productId'].unique())
	"""
	Map Ids to indices
  """
	user_mapper = dict(zip(np.unique(df["userId"]), list(range(N))))
	product_mapper = dict(zip(np.unique(df["productId"]), list(range(M))))
	"""
	Map indices to IDs
  """
	user_inv_mapper = dict(zip(list(range(N)), np.unique(df["userId"])))
	product_inv_mapper = dict(zip(list(range(M)), np.unique(df["productId"])))
	
	user_index = [user_mapper[i] for i in df['userId']]
	product_index = [product_mapper[i] for i in df['productId']]

	X = csr_matrix((df["rating"], (product_index, user_index)), shape=(M, N))
	
	return X, user_mapper, product_mapper, user_inv_mapper, product_inv_mapper

X, user_mapper, product_mapper, user_inv_mapper, product_inv_mapper = create_matrix(ratings)

from sklearn.neighbors import NearestNeighbors
"""
Find similar products using KNN
"""
def find_similar_products(product_id, X, k, metric='cosine', show_distance=False):
	
	neighbour_ids = []
	
	product_ind = product_mapper[product_id]
	product_vec = X[product_ind]
	k+=1
	kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
	kNN.fit(X)
	product_vec = product_vec.reshape(1,-1)
	neighbour = kNN.kneighbors(product_vec, return_distance=show_distance)
	for i in range(0,k):
		n = neighbour.item(i)
		neighbour_ids.append(product_inv_mapper[n])
	neighbour_ids.pop(0)
	return neighbour_ids


product_titles = dict(zip(products['productId'], products['title']))

product_id = 3

similar_ids = find_similar_products(product_id, X, k=10)
product_title = product_titles[product_id]

print(f"Since you bought {product_title} here are some recommendations")
for i in similar_ids:
	print(product_titles[i])

Number of ratings: 100836
Number of unique productId's: 9724
Number of unique users: 610
Average ratings per user: 165.3
Average ratings per product: 10.37
Since you bought Organic Celery Hearts here are some recommendations
Orange Bell Pepper
Organic Original Hommus
Plain Bagels
Organic Garden Vegetable Pasta Sauce
Lightly Smoked Sardines in Olive Oil
Marinara Pasta Sauce
Natural Chicken & Maple Breakfast Sausage Patty
Organic Lemon Lowfat Yogurt
Organic Hass Avocado
Kids! Chewables Probiotic Tablets Natural Bursting Berry Flavor
