In [28]:
# importing necessary packages
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.neighbors import NearestNeighbors
from sklearn.cluster import KMeans
from sklearn.metrics import adjusted_rand_score

In [29]:
product_descriptions=pd.read_csv("product_descriptions.csv")
product_descriptions.head()

Unnamed: 0,product_uid,product_description
0,100001,"Not only do angles make joints stronger, they ..."
1,100002,BEHR Premium Textured DECKOVER is an innovativ...
2,100003,Classic architecture meets contemporary design...
3,100004,The Grape Solar 265-Watt Polycrystalline PV So...
4,100005,Update your bathroom with the Delta Vero Singl...


In [30]:
# Missing values
product_descriptions = product_descriptions.dropna()
product_descriptions.shape
product_descriptions.head()

Unnamed: 0,product_uid,product_description
0,100001,"Not only do angles make joints stronger, they ..."
1,100002,BEHR Premium Textured DECKOVER is an innovativ...
2,100003,Classic architecture meets contemporary design...
3,100004,The Grape Solar 265-Watt Polycrystalline PV So...
4,100005,Update your bathroom with the Delta Vero Singl...


In [31]:
product_descriptions1 = product_descriptions.head(500)
# product_descriptions1.iloc[:,1]

product_descriptions1["product_description"].head(10)

0    Not only do angles make joints stronger, they ...
1    BEHR Premium Textured DECKOVER is an innovativ...
2    Classic architecture meets contemporary design...
3    The Grape Solar 265-Watt Polycrystalline PV So...
4    Update your bathroom with the Delta Vero Singl...
5    Achieving delicious results is almost effortle...
6    The Quantum Adjustable 2-Light LED Black Emerg...
7    The Teks #10 x 1-1/2 in. Zinc-Plated Steel Was...
8    Get the House of Fara 3/4 in. x 3 in. x 8 ft. ...
9    Valley View Industries Metal Stakes (4-Pack) a...
Name: product_description, dtype: object

In [32]:
#Feature extraction from product descriptions¶
#Converting the text in product description into numerical data for analysis
vectorizer = TfidfVectorizer(stop_words='english')
X1 = vectorizer.fit_transform(product_descriptions1["product_description"])
X1

<500x8932 sparse matrix of type '<class 'numpy.float64'>'
	with 34817 stored elements in Compressed Sparse Row format>

In [34]:
# # Optimal clusters containing top words in each cluster based on product description

k = 10

model = KMeans(n_clusters=k, init='k-means++', max_iter=100, n_init=1)
model.fit(X1)

print("Top terms per cluster:")
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
terms = vectorizer.get_feature_names()
for i in range(true_k):
    print_cluster(i)

Top terms per cluster:
Cluster 0:
 panels
 finished
 roofing
 applied
 wall
 apply
 wood
 plastic
 sheet
 waxes
Cluster 1:
 light
 energy
 ft
 oven
 door
 wash
 cooking
 cycle
 fan
 large
Cluster 2:
 wall
 piece
 finish
 tile
 design
 floor
 installation
 use
 recommended
 residential
Cluster 3:
 insulation
 radiant
 attic
 48
 ecotouch
 film
 cover
 corning
 owens
 shrink
Cluster 4:
 ft
 20
 cutting
 ladder
 easy
 use
 roller
 paint
 steel
 10
Cluster 5:
 storage
 shelves
 cabinet
 room
 adjustable
 nickel
 wall
 shelf
 32
 add
Cluster 6:
 post
 outdoor
 vary
 bamboo
 patio
 rug
 wood
 product
 steel
 frame
Cluster 7:
 snow
 watt
 power
 engine
 start
 light
 protection
 joe
 leaf
 blower
Cluster 8:
 metal
 drill
 use
 hole
 screw
 steel
 gauge
 plated
 screws
 bit
Cluster 9:
 water
 air
 easy
 heater
 tank
 installation
 free
 cooling
 temperature
 use


In [35]:
# Predicting clusters based on key search words
def show_recommendations(product):
    #print("Cluster ID:")
    Y = vectorizer.transform([product])
    prediction = model.predict(Y)
    #print(prediction)
    print_cluster(prediction[0])

In [36]:
#recommendations based on cutting tool
show_recommendations("cutting tool")

Cluster 4:
 ft
 20
 cutting
 ladder
 easy
 use
 roller
 paint
 steel
 10


In [37]:
#recommendations based on roofing
show_recommendations("roofing")

Cluster 0:
 panels
 finished
 roofing
 applied
 wall
 apply
 wood
 plastic
 sheet
 waxes


In [None]:
# Once a cluster is identified based on the user's search words, the recommendation system can display items from the corresponding product clusters based on the product descriptions.