In [3]:
import numpy as np

In [4]:
import difflib
import pandas as pd

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [7]:
fashion_data = pd.read_csv('fashion_products.csv')
fashion_data.head()

Unnamed: 0,User ID,Product ID,Product Name,Brand,Category,Price,Rating,Color,Size
0,19,1,Dress,Adidas,Men's Fashion,40,1.043159,Black,XL
1,97,2,Shoes,H&M,Women's Fashion,82,4.026416,Black,L
2,25,3,Dress,Adidas,Women's Fashion,44,3.337938,Yellow,XL
3,57,4,Shoes,Zara,Men's Fashion,23,1.049523,White,S
4,79,5,T-shirt,Adidas,Men's Fashion,79,4.302773,Black,M


In [8]:
fashion_data.shape

(1000, 9)

In [9]:
fashion_data.columns

Index(['User ID', 'Product ID', 'Product Name', 'Brand', 'Category', 'Price',
       'Rating', 'Color', 'Size'],
      dtype='object')

In [10]:
selected_features = ['Brand','Category','Size','Color']
print(selected_features)

['Brand', 'Category', 'Size', 'Color']


In [12]:
for feature in selected_features:
    fashion_data[feature]=fashion_data[feature].fillna('')

In [14]:
combined_features = fashion_data['Brand']+' '+fashion_data['Category']+' '+fashion_data['Size']+' '+fashion_data['Color']
print(combined_features)

0         Adidas Men's Fashion XL Black
1           H&M Women's Fashion L Black
2      Adidas Women's Fashion XL Yellow
3            Zara Men's Fashion S White
4          Adidas Men's Fashion M Black
                     ...               
995        Zara Women's Fashion M Black
996          Nike Kids' Fashion L Green
997          Zara Men's Fashion L White
998         Zara Women's Fashion S Blue
999      Adidas Women's Fashion L White
Length: 1000, dtype: object


In [15]:
vectorizer=TfidfVectorizer()

In [16]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [17]:
print(feature_vectors)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 4060 stored elements and shape (1000, 15)>
  Coords	Values
  (0, 0)	0.5109967130316281
  (0, 7)	0.41636673195963975
  (0, 3)	0.19537621514596712
  (0, 12)	0.46255147672080915
  (0, 1)	0.5598172645843971
  (1, 3)	0.2703050747673901
  (1, 1)	0.7745131486271642
  (1, 11)	0.5718955753969703
  (2, 0)	0.5187498105295739
  (2, 3)	0.19834056072036338
  (2, 12)	0.46956953888320374
  (2, 11)	0.419637290181628
  (2, 13)	0.5431101627577031
  (3, 7)	0.4860097015353472
  (3, 3)	0.22805553066953882
  (3, 14)	0.5908085907715567
  (3, 10)	0.6022710801749368
  (4, 0)	0.5763603404955568
  (4, 7)	0.4696258611519227
  (4, 3)	0.22036756600290106
  (4, 1)	0.631425723498115
  (5, 0)	0.5865903504212805
  (5, 7)	0.47796140557336897
  (5, 3)	0.22427894266976003
  (5, 13)	0.6141364762411556
  :	:
  (993, 4)	0.7712750598860559
  (994, 0)	0.5793013193460831
  (994, 3)	0.22149202982426539
  (994, 6)	0.45297857822882226
  (994, 9)	0.6404386542003917
  (995

In [18]:
similarity = cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.48639701 0.52103056 ... 0.2469149  0.04392937 0.34877018]
 [0.48639701 1.         0.29360117 ... 0.06164457 0.3328354  0.33702785]
 [0.52103056 0.29360117 1.         ... 0.04523266 0.24422316 0.5562037 ]
 ...
 [0.2469149  0.06164457 0.04523266 ... 1.         0.39541803 0.41405175]
 [0.04392937 0.3328354  0.24422316 ... 0.39541803 1.         0.28034632]
 [0.34877018 0.33702785 0.5562037  ... 0.41405175 0.28034632 1.        ]]


In [19]:
print(similarity.shape)

(1000, 1000)


In [31]:
product_name = input("Enter the product name: ")
list_of_all_products=fashion_data['Product Name'].tolist()
find_close_match=difflib.get_close_matches(product_name,list_of_all_products)
close_match=find_close_match[0]
index_of_the_product = fashion_data[fashion_data['Product Name'] == close_match]['Product ID'].values[0]
similarity_score = list(enumerate(similarity[index_of_the_product]))
sorted_similar_products = sorted(similarity_score,key=lambda x:x[1],reverse=1)
print("products suggested for you :\n")
i=1
for products in sorted_similar_products:
    index = products[0]
    product_names = fashion_data[fashion_data['Product ID'] == index]['Product Name'].values
    if product_names.size > 0:
        title_from_index = fashion_data[fashion_data['Product ID']==index]['Product Name'].values[0]
        if(i<30):
            print(i, '.',title_from_index)
            i+=1
    else: 
            print(f"No product found with Product ID: {index}")

Enter the product name:  shirt


products suggested for you :

1 . T-shirt
2 . T-shirt
3 . Dress
4 . Dress
5 . Dress
6 . T-shirt
7 . T-shirt
8 . T-shirt
9 . Shoes
10 . Shoes
11 . Jeans
12 . Dress
13 . Jeans
14 . Dress
15 . Sweater
16 . Dress
17 . Shoes
18 . Jeans
19 . T-shirt
20 . Jeans
21 . Jeans
22 . Shoes
23 . Dress
24 . Shoes
25 . Dress
26 . Shoes
27 . T-shirt
28 . T-shirt
29 . Dress
No product found with Product ID: 0
