In [11]:
#Mobile Product Recommendation using KNN by Manasi Gawade

In [1]:
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)


In [2]:
ratings=pd.read_csv("Ratings.csv")
ratings.head()

Unnamed: 0,Product ID,Customer ID,Rating
0,1,1001,4.5
1,2,1001,4.2
2,3,1002,4.0
3,4,1003,4.8
4,5,1004,3.9


In [4]:
product=pd.read_csv("mobile_Product.csv")
product.head()

Unnamed: 0,Product ID,Product Name,Brand,Price,Operating System,RAM
0,1,iPhone 12,Apple,999,iOS,4GB
1,2,Galaxy S21,Samsung,899,Android,8GB
2,3,Redmi Note 10,Xiaomi,249,Android,6GB
3,4,iPhone SE,Apple,399,iOS,3GB
4,5,OnePlus 9,OnePlus,699,Android,8GB


In [6]:
n_ratings = len(ratings)
n_product = len(ratings['Product ID'].unique())
n_customer = len(ratings['Customer ID'].unique())

In [8]:
print(f"Number of Mobiles: {n_ratings}")
print(f"Number of unique movieId's: {n_product}")
print(f"Number of unique users: {n_customer}")
print(f"Average ratings per user: {round(n_ratings/n_customer, 2)}")
print(f"Average ratings per movie: {round(n_ratings/n_product, 2)}")


Number of Mobiles: 80
Number of unique movieId's: 80
Number of unique users: 60
Average ratings per user: 1.33
Average ratings per movie: 1.0


In [10]:
customer_freq = ratings[['Customer ID','Product ID']].groupby('Customer ID').count().reset_index()
customer_freq.columns=['Customer ID','n_ratings']
customer_freq.head()

Unnamed: 0,Customer ID,n_ratings
0,1001,2
1,1002,1
2,1003,1
3,1004,2
4,1005,2


In [12]:
#Find Lowest and Highest rated mobile product:  first find Mean Rating
mean_rating = ratings.groupby('Product ID')[['Rating']].mean()
mean_rating

Unnamed: 0_level_0,Rating
Product ID,Unnamed: 1_level_1
1,4.5
2,4.2
3,4.0
4,4.8
5,3.9
...,...
76,4.6
77,4.0
78,4.7
79,4.2


In [13]:
#lowest rated product
lowest_rated = mean_rating['Rating'].idxmin()
product.loc[product['Product ID'] == lowest_rated]

Unnamed: 0,Product ID,Product Name,Brand,Price,Operating System,RAM
17,18,Redmi 9,Xiaomi,199,Android,4GB


In [14]:
# Highest rated mobile product
highest_rated = mean_rating['Rating'].idxmax()
product.loc[product['Product ID'] == highest_rated]

Unnamed: 0,Product ID,Product Name,Brand,Price,Operating System,RAM
3,4,iPhone SE,Apple,399,iOS,3GB


In [15]:
#show number of people who rated mobile rated mobile highest
ratings[ratings['Product ID']==highest_rated]

Unnamed: 0,Product ID,Customer ID,Rating
3,4,1003,4.8


In [16]:
#show number of people who rated mobile rated mobile lowest
ratings[ratings['Product ID']==lowest_rated]

Unnamed: 0,Product ID,Customer ID,Rating
17,18,1013,3.8


In [18]:
## the above mobile has very low dataset. We will use bayesian average
mobile_stats = ratings.groupby('Product ID')[['Rating']].agg(['count', 'mean'])
mobile_stats.columns = mobile_stats.columns.droplevel()
mobile_stats


Unnamed: 0_level_0,count,mean
Product ID,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,4.5
2,1,4.2
3,1,4.0
4,1,4.8
5,1,3.9
...,...,...
76,1,4.6
77,1,4.0
78,1,4.7
79,1,4.2


In [20]:
# Now, we create customer-product matrix using scipy csr matrix
from scipy.sparse import csr_matrix

def create_matrix(df):
    
    N = len(df['Customer ID'].unique())
    M = len(df['Product ID'].unique())
    
    customer_mapper=dict(zip(np.unique(df["Customer ID"]),list(range(N))))
    mobile_mapper = dict(zip(np.unique(df["Product ID"]), list(range(M))))
    
    customer_inv_mapper = dict(zip(list(range(N)), np.unique(df["Customer ID"])))
    mobile_inv_mapper = dict(zip(list(range(M)), np.unique(df["Product ID"])))
    
    customer_index = [customer_mapper[i] for i in df['Customer ID']]
    mobile_index = [mobile_mapper[i] for i in df['Product ID']]
    
    X = csr_matrix((df["Rating"], (mobile_index, customer_index)), shape=(M, N))
    
    return X, customer_mapper, mobile_mapper, customer_inv_mapper, mobile_inv_mapper
    
X, customer_mapper, mobile_mapper, customer_inv_mapper, mobile_inv_mapper=create_matrix(ratings)    
    

In [21]:
from sklearn.neighbors import NearestNeighbors


In [22]:
#find similar mobiles using KNN

In [25]:
def find_similar_mobile(mobile_id, X, k, metric='cosine', show_distance=False):
    neighbour_ids = []
    
    mobile_ind = mobile_mapper[mobile_id]
    mobile_vec = X[mobile_ind]
    k+=1
    kNN = NearestNeighbors(n_neighbors=k, algorithm="brute", metric=metric)
    kNN.fit(X)
    mobile_vec = mobile_vec.reshape(1,-1)
    neighbour = kNN.kneighbors(mobile_vec, return_distance=show_distance)
    for i in range(0,k):
        n = neighbour.item(i)
        neighbour_ids.append(mobile_inv_mapper[n])
    neighbour_ids.pop(0)
    return neighbour_ids



Product_Name = dict(zip(product['Product ID'], product['Product Name']))

Product_id=3

similar_ids = find_similar_mobile(Product_id, X, k=10)
product_name = Product_Name[Product_id]

print(f"Since you purchased : {product_name}")
print("We recommend you few more products")
for i in similar_ids:
    print(Product_Name[i])
        
    

Since you purchased : Redmi Note 10
We recommend you few more products
iPhone 5S
Mi A3
iPhone 4S
Galaxy A10
Mi 8 Lite
iPhone 5C
Galaxy J7
Redmi 7A
OnePlus 6T
Redmi 6A
