In [83]:
from collections import Counter
from string import punctuation
import spacy
import numpy as np
import requests
from tqdm import tqdm
from collections import defaultdict
from pprint import pprint
import pandas as pd
from SimilarityFinder import SimilarityFinder
from heapq import heappop, heappush, heapify
import random


In [52]:
data = {
    "occasions": ["wedding", "party", "interview", "birthday", "promotion", "anniversary", "trek", "trip"],
    "tops": ["shirt", "top", "t-shirt", "crop top", "tank top", "blouse"],
    "bottoms": ["jeans", "skirt", "shorts", "pants", "trousers", "leggings", "joggers"],
    "onepieces": ["suit", "dress", "jumpsuit", "romper", "saree", "playsuit", "gown", "maxi dress", "midi dress", "mini dress"],
    "coveralls": ["blazer", "jacket", "coat", "sweater", "sweatshirt", "hoodie", "cardigan"],
    "footwear": ["shoes", "boots", "sandals", "sneakers", "heels"],
    "accessories": ["bag", "wallet", "watch", "sunglasses", "earrings", "necklaces"],
    "seasons": ["summer", "monsoons", "winter", "spring", "rainy", "cold", "hot"],
    "adjectives": ["pair", "red", "long", "short", "black", "sleeveless", "two"]
}
imp_words = ["party", "interview", "birthday", "dress", "shirt", "jacket", "co-ord", "skirt", "jeans", "top",
                "shoes", "boots", "sandals", "sneakers", "bag", "wallet", "watch", "sunglasses", "summer",
                "monsoons", "winter", "spring"]

categories = ["accessories", "tops","bottoms","onepieces","coveralls","adjectives", "footwear", "occasions", "seasons"]



In [41]:
activated = spacy.prefer_gpu()
nlp = spacy.load("en_core_web_lg")
def get_hotwords(text):
    result = []
    pos_tag = ['PROPN', 'ADJ', 'NOUN'] 
    doc = nlp(text.lower()) 
    for token in doc:
        # print(token, token.pos_)
        if(token.text in nlp.Defaults.stop_words or token.text in punctuation) and token.text not in imp_words:
            continue
        if(token.pos_ in pos_tag):
            result.append(token.text)
    return result
new_text = """
A simple tshirt or blouse with jeans or leggings can be dressed up with heels or boots. Add a cardigan or blazer if you want to look more professional. 
"""
output = set(get_hotwords(new_text))
most_common_list = Counter(output).most_common(10)
for item in output:
  print(item)

simple
blouse
professional
jeans
heels
blazer
tshirt
cardigan
leggings
boots


In [53]:
nlp = spacy.blank("en")
nlp.add_pipe(
    "text_categorizer",
    config={
        "data": data,
        "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
        "multi_label": True,
        "device": "gpu"
    }
)
results = {}

for item in tqdm(output):
    doc = nlp(item)
    if max(zip(doc._.cats.values(), doc._.cats.keys()))[1] not in results:
        results[max(zip(doc._.cats.values(), doc._.cats.keys()))[1]] = [item]
    else:
        results[max(zip(doc._.cats.values(), doc._.cats.keys()))[1]].append(item)
    print(item, doc._.cats)

100%|██████████| 10/10 [00:00<00:00, 48.95it/s]

simple {'accessories': 0.06462316364760907, 'adjectives': 0.3615231512131528, 'bottoms': 0.012092571512705408, 'coveralls': 0.025483706668709628, 'footwear': 0.008897059929942221, 'occasions': 0.1235050067699653, 'onepieces': 0.06486170605962945, 'seasons': 0.04217084153853702, 'tops': 0.006859145917687679}
blouse {'accessories': 0.06134332927652995, 'adjectives': 0.05512569385955568, 'bottoms': 0.10952261082924049, 'coveralls': 0.03911378348969648, 'footwear': 0.0010580575245019233, 'occasions': 0.010464042518696709, 'onepieces': 0.1720464758382707, 'seasons': 0.003959340102104, 'tops': 0.9178156750013554}
professional {'accessories': 0.04933143106226782, 'adjectives': 0.1381305124411007, 'bottoms': 0.013458096546879805, 'coveralls': 0.059304638870764065, 'footwear': 0.003926774778228762, 'occasions': 0.527116919972589, 'onepieces': 0.28840490400825514, 'seasons': 0.0022484628354264512, 'tops': 0.006888982236346029}
jeans {'accessories': 0.017456357081122895, 'adjectives': 0.030708928




In [54]:
pprint(results)

{'adjectives': ['simple'],
 'bottoms': ['jeans', 'leggings'],
 'coveralls': ['blazer', 'cardigan'],
 'footwear': ['heels', 'boots'],
 'occasions': ['professional'],
 'onepieces': ['tshirt'],
 'tops': ['blouse']}


In [56]:
queries = []
cats = ["tops","bottoms","coveralls","onepieces", "accessories", "footwear"]

for cat in cats:
    if cat in results:
        for item in results[cat]:
            queries.append(item)

print(queries)


['blouse', 'jeans', 'leggings', 'blazer', 'cardigan', 'tshirt', 'heels', 'boots']


In [57]:

occasions = ""
if "occasions" in results:
    for item in results["occasions"]:
        occasions += "+" + item

seasons = ""
if "seasons" in results:
    for item in results["seasons"]:
        seasons += "+" + item

for i in range(len(queries)):
    queries[i] += occasions + "+female"

print(queries)

['blouse+professional+female', 'jeans+professional+female', 'leggings+professional+female', 'blazer+professional+female', 'cardigan+professional+female', 'tshirt+professional+female', 'heels+professional+female', 'boots+professional+female']


In [100]:
response = requests.get(f"https://flipkart-scraper-api.dvishal485.workers.dev/search/{queries[0]}").json()

In [106]:
total_search_results = response["total_result"]
search_products = response["result"]
response

{'total_result': 33,
 'query': 'blouse+professional+female',
 'fetch_from': 'https://www.flipkart.com/search?marketplace=FLIPKART&q=blouse+professional+female',
 'result': [{'name': 'U-Neck Women Blouse',
   'link': 'https://www.flipkart.com/scube-designs-u-neck-women-blouse/p/itm281eaa5a73c28',
   'current_price': 296,
   'original_price': 1299,
   'discounted': True,
   'thumbnail': 'https://rukminim2.flixcart.com/image/612/612/l1mh7rk0/blouse/n/y/f/34-bw-sc-bl-5004-dobby-elbow-black-scube-designs-original-imagd5h7c6djaftb.jpeg?q=70',
   'query_url': 'https://flipkart-scraper-api.dvishal485.workers.dev/product/scube-designs-u-neck-women-blouse/p/itm281eaa5a73c28'},
  {'name': 'Boat Neck Women Blouse',
   'link': 'https://www.flipkart.com/s-grant-boat-neck-women-blouse/p/itm64fea2a5355f4',
   'current_price': 499,
   'original_price': 1299,
   'discounted': True,
   'thumbnail': 'https://rukminim2.flixcart.com/image/612/612/xif0q/blouse/l/m/a/free-begampuri-white-s-grant-original-imag

In [102]:
users = pd.read_pickle('users.pkl')
users.head()

Unnamed: 0,UserID,FirstName,LastName,Age,Gender,Country,ProductsBought,ProductsViewedInLast30Days,ProductsInWishlist
0,613707,Mark,Drake,69,F,Uzbekistan,"[[ETHFZ7GZ5YMFZW6Z, MYKUKI Women Lycra Blend E...","[[KDEGM6XFEYD3HTQ9, Flip The Style Disney Prin...","[[SHTGM92WNJWTTC5H, BLUE MARTIN Men Regular Fi..."
1,762381,Emily,Moore,72,F,Romania,"[[None, VTEXX Men Regular Fit Solid Spread Col...","[[ETHGK7WN2R7ANPFZ, RAJ FASHIONS Women Viscose...","[[ETHGKD52TDAZHKZF, Bisso Fashion Women Viscos..."
2,507939,Randy,Lawson,67,M,Croatia,"[[ETHFZGYAFRWZAEGV, MYKUKI Women Lycra Blend E...","[[ETHFZZ75QGF67ZCP, MYKUKI Women Cotton Blend ...","[[SHTG89X4H9Z432KD, EVIQE Men Regular, Slim Fi..."
3,177633,Kelly,Jones,34,M,Burundi,"[[SRTGDF9HYPTQ9JFZ, Adrenex Pack of 2 Solid Me...","[[SHTGB79SGHYVBMXG, FUBAR Men Slim Fit Solid S...","[[KTAGGF8UCCJFR6EZ, DSK STUDIO Pack of 2 Women..."
4,932181,Leslie,Tate,64,F,Georgia,"[[SHTGMNGQFMU25UEV, VTEXX Men Regular Fit Soli...","[[DREGHH44GBFGQWYX, SClassicFashion Women Empi...","[[KDEGZBRQS9NGXJY8, Flip The Style Girls Midi/..."


In [103]:
name = "Mark"
user = users[users['FirstName'] == name]
user = user.reset_index(drop=True)
totalproducts_user = len(user.iloc[0]["ProductsBought"]) + len(user.iloc[0]["ProductsViewedInLast30Days"]) + len(user.iloc[0]["ProductsInWishlist"])
products_user = { "productsBoughtUser" : [] 
                , "productsViewedUser" : []
                , "productsWishlistUser" : []}

if totalproducts_user > 20  :
    if len(user.iloc[0]["ProductsViewedInLast30Days"]) < 9:
        products_user["productsViewedUser"] = user.iloc[0]["ProductsViewedInLast30Days"]
    else:
        products_user["productsViewedUser"] = random.sample(user.iloc[0]["ProductsViewedInLast30Days"],8)
    if len(user.iloc[0]["ProductsBought"]) < 8:
        products_user["productsBoughtUser"] = user.iloc[0]["ProductsBought"]
    else:
        products_user["productsBoughtUser"] = random.sample(user.iloc[0]["ProductsBought"],7)
    if len(user.iloc[0]["ProductsInWishlist"]) < 6:
        products_user["productsWishlistUser"] = user.iloc[0]["ProductsInWishlist"]
    else:
        products_user["productsWishlistUser"] = random.sample(user.iloc[0]["ProductsInWishlist"],5)
else:
    products_user["productsViewedUser"] = user.iloc[0]["ProductsViewedInLast30Days"]
    products_user["productsBoughtUser"] = user.iloc[0]["ProductsBought"]
    products_user["productsWishlistUser"] = user.iloc[0]["ProductsInWishlist"]

pprint(products_user)

{'productsBoughtUser': [['DREGGSSRNWGKHFFG',
                         'Nidhi Collection\xa0Women A-line Multicolor Dress',
                         'https://www.flipkart.com/nidhi-collection-women-a-line-orange-dress/p/itmc6b361e2ef2e6'],
                        ['ETHFZ7GZ5YMFZW6Z',
                         'MYKUKI\xa0Women Lycra Blend Ethnic Jacket, Top and '
                         'Palazzo Set',
                         'https://www.flipkart.com/mykuki-women-ethnic-jacket-top-palazzo-set/p/itm25b2dcba3bf8f'],
                        ['DREG9Z7GHHTQ9HNK',
                         'Nidhi Collection\xa0Women A-line Multicolor Dress',
                         'https://www.flipkart.com/nidhi-collection-women-a-line-multicolor-dress/p/itm0420b5b72563f'],
                        [nan, nan, nan],
                        ['SHTGG3Z5AMHEVHKN',
                         'ARROW\xa0Men Slim Fit Printed Spread Collar Formal '
                         'Shirt',
                         'https://www.f

In [104]:
check = SimilarityFinder('sentence-transformers/all-mpnet-base-v2')
heap = []
heapify(heap)
weights = { "productsViewedUser" : 0.5, "productsBoughtUser" : 0.3, "productsWishlistUser": 0.2}
if len(search_products) > 17:
    search_products = search_products[2:17]
for i in search_products:
    # print("Search Product: ", search_products[i])
    # print("Search Result: ", search_results[i])
    val = 0
    for cat in products_user:
        for j in products_user[cat]:
            val += weights[cat]*check.calculate_similarity(
            check.calculate_embeddings(str(j[1])),
            check.calculate_embeddings(i["name"])).item()
    sim = val/min(20, totalproducts_user)
    heappush(heap, (-sim , i["name"], i["link"]))

In [105]:
sorted_tuples = []
max_heap = heap
while max_heap:
    max_value_neg, value, link = heappop(max_heap)
    sorted_tuples.append((abs(max_value_neg), value, link))

for item in sorted_tuples:
    print(item)

(0.14519683212041856, 'Women Solid Pure Cotton Straight Kurta', 'https://www.flipkart.com/rimeline-fashion-women-solid-straight-kurta/p/itmb297cba9d2dc5')
(0.1367976926639676, 'Sweetheart Neck Women Blouse', 'https://www.flipkart.com/s-grant-sweetheart-neck-women-blouse/p/itm26dfab43f188c')
(0.12636108122766015, 'Round Neck Women Blouse', 'https://www.flipkart.com/guptatrendz-round-neck-women-blouse/p/itm2f58d86639ab9')
(0.12514409922063352, 'U-Neck Women Blouse', 'https://www.flipkart.com/s-grant-u-neck-women-blouse/p/itm06fb6a0af5c0f')
(0.12074350390583277, 'Printed Bollywood Art Silk Saree', 'https://www.flipkart.com/ksh-trendz-printed-bollywood-art-silk-saree/p/itm312b1c62dc88c')
(0.11687463480979203, 'V-Neck Women Blouse', 'https://www.flipkart.com/s-grant-v-neck-women-blouse/p/itm098dd26dc0be7')
(0.11687463480979203, 'V-Neck Women Blouse', 'https://www.flipkart.com/s-grant-v-neck-women-blouse/p/itm4454cf2935bf8')
(0.11687463480979203, 'V-Neck Women Blouse', 'https://www.flipkart.