In [110]:
import pandas as pd
from surprise import Dataset, Reader
from surprise import KNNWithMeans
import random

In [90]:
products_df = pd.read_csv('../data/products.csv')
products_df.drop(['created_at', 'updated_at', 'deleted_at', 'price', 'stock', 'description'], axis=1, inplace=True)
products_df.head()

Unnamed: 0,id,name,category
0,1,T-shirt,1
1,2,Jeans,1
2,3,Sneakers,1
3,4,Hoodie,1
4,5,Running Shoes,1


In [91]:
orders_df = pd.read_csv('../data/orders.csv')
orders_df.drop(['created_at', 'updated_at', 'deleted_at', 'address', 'status', 'total'], axis=1, inplace=True)
orders_df.head()

Unnamed: 0,id,user
0,1,20
1,2,19
2,3,2
3,4,3
4,5,5


In [92]:
order_items_df = pd.read_csv('../data/order_items.csv')
order_items_df.drop(['price'], axis=1, inplace=True)
order_items_df.head()

Unnamed: 0,order,product,quantity
0,1,1,2
1,1,2,1
2,1,3,1
3,1,4,1
4,1,5,1


In [93]:
df = order_items_df.merge(orders_df[['id', 'user']], left_on='order', right_on='id')
df.drop('id', axis=1, inplace=True)



In [94]:
df = df.merge(products_df[['id', 'name', 'category']], left_on='product', right_on='id')
df.drop(['id'], axis=1, inplace=True)

In [98]:
reader = Reader(rating_scale=(0, 1))
data = Dataset.load_from_df(df[['user', 'name', 'quantity']], reader)

In [99]:
sim_options = {'name': 'cosine', 'user_based': False}
model = KNNWithMeans(sim_options=sim_options)

In [100]:
trainset = data.build_full_trainset()
model.fit(trainset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x7f79e35cf970>

In [159]:
def get_recomendations(product_id, n = 10):
    product_category = products_df[products_df['id'] == product_id]['category'].iloc[0]
    product_neighbors = model.get_neighbors(product_id, k=n)
    similar_products = []
    for pid in product_neighbors:
        if pid != product_id:
            sim = model.sim[product_id, pid]
            similar_products.append((pid, sim))
        if len(similar_products) >= n:
            break
    similar_products = sorted(similar_products, key=lambda x: x[1], reverse=True)[:n]
    if len(similar_products) < n:
        category_products = products_df[products_df['category'] == product_category]
        for pid in category_products['id'].tolist():
            if pid not in [p[0] for p in similar_products]:
                similar_products.append((pid, 0))
            if len(similar_products) >= n:
                break
    if len(similar_products) < n:
        all_products = products_df['id'].tolist()
        for pid in all_products:
            if pid not in [p[0] for p in similar_products]:
                similar_products.append((pid, 0))
            if len(similar_products) >= n:
                break
    return products_df.loc[[p[0] for p in similar_products]]

In [160]:
product_id = 1

print(f"Users who bought '{products_df[products_df['id'] == product_id]['name'].iloc[0]}' also bought:")
get_recomendations(product_id, 30)

Users who bought 'T-shirt' also bought:


Unnamed: 0,id,name,category
2,3,Sneakers,1
3,4,Hoodie,1
4,5,Running Shoes,1
6,7,Backpack,2
7,8,Suitcase,2
8,9,Messenger Bag,2
15,16,Headphones,3
19,20,Flash,4
20,21,Drone,4
5,6,Shorts,1
