In [None]:
import pandas as pd
import regex as re
import json
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.stem.porter import PorterStemmer

In [None]:
paths = [
        "/bangles/bracelet",
        "/bangles/mantasa",
        "/bangles/golden-bangle",
        "/bangles/oxydized-bangle",
        "/cosmetics/makeup",
        "/cosmetics/makeup",
        "/earrings/adstone-earring",
        "/earrings/adstone-earring",
        "/earrings/fancy-earring",
        "/earrings/funky-earring",
        "/earrings/golden-earring",
        "/earrings/oxydized-earring",
        "/earrings/terracotta-earring",
        "/necklaces/chemicalbead-necklace",
        "/necklaces/choker",
        "/necklaces/fancy-necklace",
        "/necklaces/golden-necklace",
        "/necklaces/kundan-necklace",
        "/necklaces/mangalsutra",
        "/necklaces/oxydized-necklace",
        "/necklaces/terracotta-necklace",
        "/otherproducts/chain",
        "/otherproducts/kamarband",
        "/otherproducts/payal",
        "/otherproducts/ring"
        ]

In [None]:
data = []
for path in paths:
  try:
    with open('/content/drive/MyDrive/recomendation'+ path +'.json', 'r') as f:
      jsdata = json.load(f)
      for item in jsdata:
        item['path'] = path
      data.extend(jsdata)
  except FileNotFoundError:
    print(f"File /content/drive/MyDrive/recomendation{path}.json not found.")

In [None]:
df = pd.DataFrame(data)

In [None]:
print(df.size)
df.head()

4158


Unnamed: 0,id,url,description,detail,price,type,path
0,12,https://res.cloudinary.com/di3zlyh9o/image/upl...,Bracelet,Beautiful Bracelet \n Size: Free size. \n Colo...,120,[{'url': 'https://res.cloudinary.com/di3zlyh9o...,/bangles/bracelet
1,1,https://res.cloudinary.com/di3zlyh9o/image/upl...,Bracelet,Beautiful Bracelet \n Type: AD stone. \n Size:...,40,,/bangles/bracelet
2,2,https://res.cloudinary.com/di3zlyh9o/image/upl...,Bracelet,Beautiful Bracelet \n Type: AD stone. \n Size:...,40,,/bangles/bracelet
3,3,https://res.cloudinary.com/di3zlyh9o/image/upl...,Bracelet,Beautiful Bracelet \n Type: AD stone. \n Size:...,180,,/bangles/bracelet
4,4,https://res.cloudinary.com/di3zlyh9o/image/upl...,Bracelet,Beautiful Bracelet \n Type: stone. \n Size: Fr...,80,,/bangles/bracelet


In [None]:
def process_df(df):
  df['detail']=df['detail'].apply(lambda x: x.replace('[\n]',''))
  df['detail'] = df['detail'].apply(lambda x: ' '.join(re.sub('[".,:()]', ' ', x).replace('\n', ' ').lower().split()).strip())
  df['description'] = df["description"].apply(lambda x: x.lower().strip())
  return df

df = process_df(df)

In [None]:
def join_columns(row):
    columns_to_join = ['price', 'detail', 'description']
    return ' '.join(str(row[col]) for col in columns_to_join)
df['tags'] = df.apply(join_columns, axis=1)

In [None]:
df["tags"] = df["tags"].apply(lambda x: x)
df['path_id'] = df['path'] + '/' + df['id'].astype(str)
df.drop(['price', 'detail', 'description', 'type', 'path', 'id'], axis=1, inplace=True)

In [None]:
ps = PorterStemmer()
def stem(text):
  y = []
  for i in text.split():
    y.append(ps.stem(i))
  return " ".join(y)

In [None]:
df['tags'] = df['tags'].apply(stem)

In [None]:
print(df.size)
df.head()

1782


Unnamed: 0,url,tags,path_id
0,https://res.cloudinary.com/di3zlyh9o/image/upl...,120 beauti bracelet size free size color blue ...,/bangles/bracelet/12
1,https://res.cloudinary.com/di3zlyh9o/image/upl...,40 beauti bracelet type ad stone size free siz...,/bangles/bracelet/1
2,https://res.cloudinary.com/di3zlyh9o/image/upl...,40 beauti bracelet type ad stone size free siz...,/bangles/bracelet/2
3,https://res.cloudinary.com/di3zlyh9o/image/upl...,180 beauti bracelet type ad stone size free si...,/bangles/bracelet/3
4,https://res.cloudinary.com/di3zlyh9o/image/upl...,80 beauti bracelet type stone size free size b...,/bangles/bracelet/4


In [None]:
cv = CountVectorizer(max_features=1000, stop_words='english')
vectors = cv.fit_transform(df['tags']).toarray()
vectors.shape

(594, 218)

In [None]:
cv.get_feature_names_out()

array(['00000', '100', '1000', '104', '105', '108', '110', '115', '118',
       '120', '125', '128', '130', '135', '140', '144', '149', '150',
       '155', '159', '160', '165', '168', '169', '170', '175', '180',
       '184', '185', '190', '195', '199', '20', '200', '209', '210',
       '218', '220', '225', '230', '235', '240', '249', '250', '259',
       '260', '267', '270', '278', '279', '280', '289', '290', '299',
       '30', '300', '310', '320', '325', '330', '340', '350', '36', '360',
       '370', '380', '399', '40', '400', '405', '410', '430', '440', '45',
       '50', '500', '510', '54', '540', '55', '560', '580', '60', '600',
       '630', '64', '65', '650', '66', '68', '70', '72', '75', '80', '81',
       '85', '850', '90', '95', '96', '99', 'ad', 'age', 'alphabet',
       'avail', 'available', 'availavl', 'bangl', 'bead', 'beauti',
       'black', 'blue', 'bracelet', 'brand', 'brown', 'butterfli',
       'camera', 'centr', 'chain', 'chemicalbead', 'choker', 'circular',
   

In [None]:
cos_sim = cosine_similarity(vectors)

array([[1.        , 0.58321184, 0.50062617, ..., 0.08908708, 0.08908708,
        0.08908708],
       [0.58321184, 1.        , 0.85839508, ..., 0.10910895, 0.10910895,
        0.10910895],
       [0.50062617, 0.85839508, 1.        , ..., 0.09365858, 0.09365858,
        0.09365858],
       ...,
       [0.08908708, 0.10910895, 0.09365858, ..., 1.        , 1.        ,
        0.83333333],
       [0.08908708, 0.10910895, 0.09365858, ..., 1.        , 1.        ,
        0.83333333],
       [0.08908708, 0.10910895, 0.09365858, ..., 0.83333333, 0.83333333,
        1.        ]])

In [None]:
def recomendation(path_id):
  product_index = df[df['path_id'] == path_id].index[0]
  distances = cos_sim[product_index]
  product_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x:x[1])[1:11]
  recomends = []
  for i in product_list:
    recomends.append(
        {
          'path': df.iloc[i[0]].path_id,
          'url': df.iloc[i[0]].url
        }
    )
  return recomends

In [None]:
recomendation('/bangles/bracelet/12')

[{'path': '/bangles/bracelet/5',
  'url': 'https://res.cloudinary.com/di3zlyh9o/image/upload/v1697659213/bangles/bracelets/bracelet7.1787f2a6297deb615797_cidpvc.jpg'},
 {'path': '/bangles/bracelet/6',
  'url': 'https://res.cloudinary.com/di3zlyh9o/image/upload/v1697659211/bangles/bracelets/bracelet6.7b1ec626d6df7932b074_o42bf9.jpg'},
 {'path': '/bangles/bracelet/7',
  'url': 'https://res.cloudinary.com/di3zlyh9o/image/upload/v1697659210/bangles/bracelets/bracelet4.1800c72354675fd75338_ddmt4e.jpg'},
 {'path': '/bangles/bracelet/8',
  'url': 'https://res.cloudinary.com/di3zlyh9o/image/upload/v1697659207/bangles/bracelets/bracelet5.1958642142174021329c_bz1rzl.jpg'},
 {'path': '/bangles/bracelet/9',
  'url': 'https://res.cloudinary.com/di3zlyh9o/image/upload/v1697659204/bangles/bracelets/bracelet2.a5f373be83ea35c22a55_jaejq0.jpg'},
 {'path': '/bangles/bracelet/10',
  'url': 'https://res.cloudinary.com/di3zlyh9o/image/upload/v1697659202/bangles/bracelets/bracelet1.b8b53fa502e327469734_y2p1t