In [5]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
import os

In [6]:
if not os.path.exists('../data'):
  os.makedirs('../data')

In [7]:
if not os.path.exists('../data/products.csv'):
  !wget https://raw.githubusercontent.com/MBA-IA-GROUP-A/recomendation/master/data/products.csv -O ../data/products.csv

In [8]:
products_df = pd.read_csv('../data/products.csv')
products_df.drop(['created_at', 'updated_at', 'deleted_at'], axis=1, inplace=True)
products_df.head()

Unnamed: 0,id,name,description,category,price,stock
0,1,T-shirt,A simple and comfortable T-shirt,1,20,100
1,2,Jeans,Classic blue jeans,1,50,50
2,3,Sneakers,Sporty and stylish sneakers,1,80,30
3,4,Hoodie,A warm and cozy hoodie,1,40,80
4,5,Running Shoes,Lightweight and comfortable shoes for running,1,70,20


In [9]:
if not os.path.exists('../data/rating.csv'):
  !wget https://raw.githubusercontent.com/MBA-IA-GROUP-A/recomendation/master/data/rating.csv -O ../data/rating.csv

In [10]:
ratings_df = pd.read_csv('../data/rating.csv')
ratings_df.head()

Unnamed: 0,user,product,rating,created_at,updated_at,deleted_at
0,1,16,0,2023-03-17 17:58:05,2023-03-17 17:58:05,
1,1,17,3,2023-03-17 17:58:05,2023-03-17 17:58:05,
2,1,24,5,2023-03-17 17:58:05,2023-03-17 17:58:05,
3,1,38,1,2023-03-17 17:58:05,2023-03-17 17:58:05,
4,1,47,3,2023-03-17 17:58:05,2023-03-17 17:58:05,


In [11]:
data = pd.merge(ratings_df, products_df, left_on='product', right_on='id')
data.head()

Unnamed: 0,user,product,rating,created_at,updated_at,deleted_at,id,name,description,category,price,stock
0,1,16,0,2023-03-17 17:58:05,2023-03-17 17:58:05,,16,Headphones,Premium noise-cancelling headphones,3,350,20
1,5,16,4,2023-03-17 17:58:05,2023-03-17 17:58:05,,16,Headphones,Premium noise-cancelling headphones,3,350,20
2,8,16,1,2023-03-17 17:58:05,2023-03-17 17:58:05,,16,Headphones,Premium noise-cancelling headphones,3,350,20
3,10,16,1,2023-03-17 17:58:05,2023-03-17 17:58:05,,16,Headphones,Premium noise-cancelling headphones,3,350,20
4,12,16,4,2023-03-17 17:58:05,2023-03-17 17:58:05,,16,Headphones,Premium noise-cancelling headphones,3,350,20


In [12]:
product_avg_ratings = data.groupby(['id', 'name', 'category'], as_index=False)['rating'].mean()

In [13]:
user_product_ratings = data.pivot_table(index='user', columns='id', values='rating')

In [14]:
def similarity(product1, product2):
    p1 = user_product_ratings[product1].fillna(0)
    p2 = user_product_ratings[product2].fillna(0)
    return cosine_similarity([p1, p2])[0,1]

In [15]:
def recommend(product_id, size = 5):
    category = products_df[products_df['id'] == product_id]['category'].values[0]
    category_products = product_avg_ratings[product_avg_ratings['category'] == category]

    similarities = category_products['id'].apply(lambda x: similarity(product_id, x))

    sim_df = pd.DataFrame({'id': category_products['id'], 'similarity': similarities})
    sim_df = sim_df.sort_values(by='similarity', ascending=False)

    top = sim_df[sim_df['id'] != product_id].head(5)

    # Top Products In category
    if len(top) < size:
        remaining = size - len(top)
        all_products = product_avg_ratings[product_avg_ratings['id'] != product_id]
        all_similarities = all_products['id'].apply(lambda x: similarity(product_id, x))
        all_sim_df = pd.DataFrame({'id': all_products['id'], 'similarity': all_similarities})
        all_sim_df = all_sim_df.sort_values(by='similarity', ascending=False)
        all_top = all_sim_df[~all_sim_df['id'].isin(top['id'])].head(remaining)
        recommended = pd.merge(pd.concat([top, all_top]), products_df, on='id')
    # Top Products Out of category
    else:
        recommended = pd.merge(top, products_df, on='id')
    # Ramdom
    if len(recommended) < size:
        remaining = size - len(recommended)
        all_products = product_avg_ratings[~product_avg_ratings['id'].isin(recommended['id'])].sample(n=remaining)
        recommended = pd.concat([recommended, all_products])
    return recommended

In [16]:
product_id = 60
print('Recommendations for product {}:'.format(products_df[products_df['id'] == product_id]['name'].values[0]))
(recommend(product_id, 10))

Recommendations for product Google Nest Learning Thermostat:


Unnamed: 0,id,similarity,name,description,category,price,stock
0,59,0.493215,Amazon Echo Dot (4th gen),A smart speaker with Alexa voice assistant,11,50,30
1,82,0.428571,DJI Mini 2,Ultra-compact drone with 4K video and 31-minut...,11,449,20
2,58,0.169031,Sony WH-1000XM4,Noise-cancelling wireless headphones from Sony,11,350,20
3,57,0.099258,Samsung Galaxy S21,The flagship smartphone from Samsung,11,900,15
4,77,0.078811,Samsung T7 Touch,Portable SSD with fingerprint security and up ...,11,189,50
5,69,0.901498,Sony WH-1000XM4,Wireless noise-cancelling headphones with up t...,16,349,40
6,14,0.766652,Laptop,A high-performance and reliable laptop,3,1200,5
7,70,0.740744,Nintendo Switch OLED,Handheld gaming console with OLED display and ...,17,349,25
8,65,0.520266,Mizuno Pro Limited Edition Baseball Glove,A high glove,20,300,10
9,19,0.512952,Tripod,A sturdy and reliable tripod for cameras,4,150,30
