In [23]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import sigmoid_kernel
import random

In [2]:
df = pd.read_csv("../datasets/recommendation_data.csv")
df.columns

Index(['uniq_id', 'crawl_timestamp', 'product_url', 'product_name',
       'product_category_tree', 'pid', 'retail_price', 'discounted_price',
       'image', 'is_FK_Advantage_product', 'description', 'product_rating',
       'overall_rating', 'brand', 'product_specifications'],
      dtype='object')

In [4]:
df = df[["product_name", "description", "product_specifications", "brand"]]
df.shape

(20000, 4)

In [6]:
vectorizer = TfidfVectorizer(
    max_features=None,
    strip_accents="unicode",
    analyzer="word",
    token_pattern=r'\w{1,}',
    ngram_range=(1,3)
)
df["description"] = df["description"].fillna("")

In [15]:
df_vect = vectorizer.fit_transform(df.apply(lambda x: x["product_name"] + "\n" + x["description"], axis=1))

In [16]:
df_vect.shape

(20000, 524685)

In [17]:
sig = sigmoid_kernel(df_vect, df_vect)

In [18]:
sig.shape

(20000, 20000)

In [19]:
indices = pd.Series(df.index,index=df['product_name']).drop_duplicates()

In [20]:
indices.head(5)

product_name
Alisha Solid Women's Cycling Shorts      0
FabHomeDecor Fabric Double Sofa Bed      1
AW Bellies                               2
Alisha Solid Women's Cycling Shorts      3
Sicons All Purpose Arnica Dog Shampoo    4
dtype: int64

In [88]:
def product_recommendation(title,num_recommendations=3, sig=sig):
    indx = indices[title]
    if not isinstance(indx, np.int64):
        indx = indx[0]   
    # print("index", indx)
    sig_scores = list(enumerate(sig[indx]))
    # print(sig_scores)
    sig_scores = sorted(sig_scores, key=lambda x: x[1], reverse=True)
    sig_scores = sig_scores[1:min(num_recommendations+1, len(sig_scores))]
    product_indices = [i[0] for i in sig_scores]
    return df['product_name'].iloc[product_indices]

In [93]:
rand_title = df.iloc[random.randint(0, len(df)-1), 0]
print(rand_title)
product_recommendation(rand_title)

ORKA Leatherette XXXL Teardrop Kid Bean Bag


18961    ORKA Printed Leatherette XXL Chair Kid Bean Bag
16646             ABCD Small Bean Bag  With Foam Filling
16649                ABCD XL Bean Bag  With Foam Filling
Name: product_name, dtype: object