In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from sklearn.feature_extraction.text import TfidfVectorizer
from operator import itemgetter
from nltk.corpus import stopwords
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import sigmoid_kernel

from joblib import Parallel, delayed
import joblib

In [2]:
data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,description,title,asin,ratings,n_ratings
0,After a long day of handling thorny situations...,Crabtree &amp; Evelyn - Gardener's Ultra-Moist...,B00004U9V2,4.6,582
1,If you haven't experienced the pleasures of ba...,AHAVA Bath Salts,B0000531EN,4.5,6
2,"Rich, black mineral mud, harvested from the ba...","AHAVA Dead Sea Mineral Mud, 8.5 oz, Pack of 4",B0000532JH,3.1,8
3,This liquid soap with convenient pump dispense...,"Crabtree &amp; Evelyn Hand Soap, Gardeners, 10...",B00005A77F,4.7,18
4,Remember why you love your favorite blanket? T...,Soy Milk Hand Crme,B00005NDTD,4.9,80


In [3]:
documents = data['description'].values.astype("U")

vectorizer = TfidfVectorizer(stop_words='english')
features = vectorizer.fit_transform(documents)

In [4]:
model = joblib.load('kmeans.pkl')
label = model.fit_predict(features)
unique_labels = np.unique(label)

In [5]:
data['label']=np.array(label)

In [6]:
tfidf = TfidfVectorizer(min_df=3, max_features=None,
                     strip_accents='unicode', analyzer='word',token_pattern=r'\w{1,}',
                     ngram_range=(1,3), stop_words='english')

tfidf_matrix=tfidf.fit_transform(data['description'])

In [7]:
sig = sigmoid_kernel(tfidf_matrix,tfidf_matrix)

In [8]:
indices = pd.Series(data.index,index=data['title'])

In [9]:
stop = stopwords.words('english')

extras=['benefits<br />','','a','this','that','these','those','to','of','at','with','for','also','is']

stop.append(extras)

In [10]:
def similarity(result, title):
    desc_vector = tfidf.fit_transform(result['description'].apply(lambda x:x.lower()).apply(lambda x: ' '.join([word for word in x.split() if word not in (stop)])))
    similarity_matrix = linear_kernel(desc_vector, desc_vector)
    
    mapping = pd.Series(result.index, index=result['title'])
    product_index = mapping[title]
    
    similarity_score = list(enumerate(similarity_matrix[product_index]))
    
    similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)
    
    list_similarity = []
    for i in range(len(similarity_score)):
        list_similarity.append(similarity_score[i][1])
    
    result['similarity'] = list_similarity
    
    return result

In [11]:
def get_rec(title,sig=sig):
    i=indices[title]
    x=data.iloc[i]['label']
    t=[x]
    idx=list(data[data['label'].isin(t)].index)
    sig_temp=list(enumerate(sig[i]))

    sig_scores=itemgetter(*idx)(sig_temp)
    
    scores=sorted(sig_scores, key=lambda x:x[0], reverse=True)
    
    product_indices=[i[0] for i in scores]
    
    result = data.iloc[product_indices]
    result=result.reset_index(drop=True)

    output = similarity(result, title)
    output.drop(['label'], axis=1, inplace=True)
    
    return output[['title','description','asin','ratings','n_ratings']][:20]

In [12]:
result=get_rec('OPI Nail Lacquer Top Coat')
result

Unnamed: 0,title,description,asin,ratings,n_ratings
0,"CND Shellac, Leather Satchel",CND Shellac was designed to be used as a syste...,B01HIQEOLO,5.0,11
1,CND Shellac power polish denim patch,CND Shellac was designed to be used as a syste...,B01HIQHQU0,4.5,14
2,"CND Shellac Power Polish, Patina Buckle",CND Craft Culture Collection: Patina Buckle D...,B01HIQIEYC,4.4,25
3,"CND Shellac, Brick Knit",CND Shellac was designed to be used as a syste...,B01HIQCSBC,4.0,6
4,ELEMIS Frangipani Monoi Hand Cream and Nail Cr...,"This luxurious hand and nail cream, enriched w...",B01HGSJPWM,5.0,3
5,The Beachwaver Co. S.75 Curling Iron,Rotate your way to perfect waves with the Beac...,B01HG7OIT8,4.0,23
6,"JINsoon Nail Lacquer, Crush",All jinsoon formulas are 5-free: formulated wi...,B01H353AB0,4.2,4
7,butter LONDON Nail Polish,Get the perfect nude pout with our plush rush ...,B01H353HOK,4.3,25
8,butter LONDON Glazen Eye Gloss,"A new trick in your beauty arsenal, glazen eye...",B01H353CWW,4.0,54
9,"JINsoon Floral Nail Lacquer, Blush, 0.37 fl. oz.",All jinsoon formulas are 5-free: formulated wi...,B01H353ABU,4.0,2
