In [1]:
import os
import pandas as pd
import plotly.express as px
import progressbar
import json
import numpy as np
import nltk
from nltk.corpus import stopwords
import string
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from nltk.tokenize import word_tokenize
pd.set_option('display.max_colwidth', None)

In [2]:
def remove_stopwords(token_vector, stopwords_list):
    return token_vector.apply(lambda token_list: [word for word in token_list if word not in stopwords_list])

In [3]:
def remove_punctuation(token_vector):
    return token_vector.apply(lambda token_list: [word for word in token_list if word not in string.punctuation])

In [4]:
def jaccard_similarity_score(original, translation):
    intersect = set(original).intersection(set(translation))
    union = set(original).union(set(translation))
    try:
        return len(intersect) / len(union)
    except ZeroDivisionError:
        return 0

## Get the paths and build table names for iteration

In [5]:
data_path = '../src/data'
mapping_corpus_path = data_path + r'/product/lspc2020_to_tablecorpus'
mapping_corpus_path_2 = data_path + r'/product/lspc2020_to_tablecorpus/Cleaned'
table_corpus_path = data_path + r'/product/product_top100/cleaned'
table_corpus_path_with_id = data_path + r'/product/product_top100/cleaned/with_id'
table_corpus_path2 = data_path + r'/product/product_minimum3/cleaned/with_id'

In [4]:
zip_files_mapping = [file for file in os.listdir(mapping_corpus_path_2) if file.endswith('.json.gz')]
zip_files_tables = [file for file in os.listdir(table_corpus_path) if file.endswith('.json.gz')]

## Match the number dictionaries with the information about the brand


In [None]:
df_electronics_cleaned = pd.read_json(mapping_corpus_path_2 + '/cleaned_electronics_all_brands', compression='gzip', orient='records', lines=True)
df_clothes_cleaned = pd.read_json(mapping_corpus_path_2 + '/cleaned_clothes_all_brands', compression='gzip', orient='records', lines=True)

In [None]:
df_large = pd.read_json(os.path.join(mapping_corpus_path_2, 'df_large_matched.json'), compression='gzip', orient='records', lines=True)

In [None]:
df_joined_electronics = df_large.merge(df_electronics_cleaned, left_on=['table_id','row_id'], right_on = ['table_id','row_id'], how='left')

In [None]:
df_joined_clothes = df_large.merge(df_clothes_cleaned, left_on=['table_id','row_id'], right_on = ['table_id','row_id'], how='left')

In [None]:
df_joined_electronics.to_json(mapping_corpus_path_2 + '/joined_electronics_v2.json', compression='gzip', orient='records', lines=True)

In [6]:
df_joined_clothes.to_json(mapping_corpus_path_2 + '/joined_clothes_v2.json', compression='gzip', orient='records', lines=True)

In [6]:
df_joined_electronics = pd.read_json(os.path.join(mapping_corpus_path_2, 'joined_electronics_v2.json'), compression='gzip', orient='records', lines=True)

In [7]:
df_joined_clothes = pd.read_json(os.path.join(mapping_corpus_path_2, 'joined_clothes_v2.json'), compression='gzip', orient='records', lines=True)

In [7]:
df_joined_clothes = pd.read_csv(os.path.join(mapping_corpus_path_2, 'clothes_clusters_all_8_tables_post_processed.csv'))

In [8]:
df_joined_electronics = pd.read_csv(os.path.join(mapping_corpus_path_2, 'electronics_clusters_all_10_tables_post_processed.csv'))

In [9]:
df_joined_clothes

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,cluster_id,url,row_id,table_id,Valid,name,description,brand,tokens
0,1,5871,66437123,https://www.febshoe.com/product/26859,324,Product_febshoe.com_September2020.json.gz,1,nike air max 97 worldwide pack white blue black designer christian cz5607-100,,,"['nike', 'air', 'max', '97', 'worldwide', 'pack', 'white', 'blue', 'black', 'designer', 'christian', 'cz5607-100']"
1,4,15925,78110534,https://www.cn.forzieri.com/chn/product_view.asp?l=chn&c=chn&dept_id=18&sku=dg130320-027-00,3400,Product_forzieri.com_September2020.json.gz,1,"small \""devotion\"" bag","Leather handle, detachable shoulder strap, front plate with logo jewel heart, magnetic closure, inner flat pocket, leather inner, gold finish metal accessories, made in italy.Sensorial gift wrap available",Dolce & Gabbana żΰ,"['small', ""''"", 'devotion\\', ""''"", 'bag']"
2,5,18456,79412,https://www.lookingglassphoto.com/peak-design-the-everyday-messenger-charcoal-15.html,2600,Product_lookingglassphoto.com_September2020.json.gz,1,peak design peak design the everyday messenger- charcoal 15”,,PEAK DESIGN,"['peak', 'design', 'peak', 'design', 'everyday', 'messenger-', 'charcoal', '15', '”']"
3,6,19121,587935,https://soloptical.net/es/rayban-reg-justin-color-mix-rb4165-622-55.html,558,Product_soloptical.net_September2020.json.gz,1,rayban ® justin color mix rb4165 622/55,Gafas de sol de última colección y diseños novedosos que representan uno de los principales accesorios de la moda actual,,"['rayban', '®', 'justin', 'color', 'mix', 'rb4165', '622/55']"
4,7,19932,391605,https://www.whisknyc.com/cake-stand-w-glass-dome-anchor.html,3052,Product_whisknyc.com_September2020.json.gz,1,"13\"" glass cake stand with dome",,,"['13\\', ""''"", 'glass', 'cake', 'stand', 'dome']"
...,...,...,...,...,...,...,...,...,...,...,...
6273,9730,24668368,5121322,https://urbanpalms.com/product/lg-bottle-palm-tree-hyophorbe-lagenicaulis,5,Product_urbanpalms.com_September2020.json.gz,1,lg. bottle palm tree (hyophorbe lagenicaulis),Unique and unlike any other palm Bottle Palm is a distinctive addition to any plant collection. Contained in a planter the Bottle Palm is a shapely centerpiece to any porch patio or well lit indoor area. Native to the Mascarene Islands mature trunks have sometimes attained heights of 20 feet.,Urban Palms,"['lg', 'bottle', 'palm', 'tree', 'hyophorbe', 'lagenicaulis']"
6274,9731,24671086,5121322,https://spectrumtelescope.com/product/glass-solar-filter-st1150g,30,Product_spectrumtelescope.com_September2020.json.gz,1,glass solar filter st1150g,"Your O.D. Measurement:11 1/8” (283mm)to11 3/8” (289mm) Fits:Celestron 10\"" Newt;Orion SkyWatcher 10\""",Spectrum Telescope,"['glass', 'solar', 'filter', 'st1150g']"
6275,9732,24671279,1512681,https://www.abiza.co.uk/product/raw-garnet-necklace-january-birthday-gifts-raw-stone-necklace-garnet-jewelry,32,Product_abiza.co.uk_September2020.json.gz,1,raw garnet necklace,Gorgeous deep red Raw Garnet stones in their most natural state have been hand crafted into pendants and suspending from beautiful shimmering chains. Available in 14K Gold Fill or 925 Sterling Silver. Great for layering but just as good as a single and a very versatile piece. Comes with a crystal information card in a beautiful Abiza gift box. Perfect for January Birthdays or healing purposes! Over 700 Sold with lots of 5⭐️ Reviews!,Abiza Jewellery,"['raw', 'garnet', 'necklace']"
6276,9733,24684217,329538,https://www.thebikerack.com/product/sram-pc-1071-10-speed-hollow-pin-chain-158351-1.htm,113,Product_thebikerack.com_September2020.json.gz,1,pc-1071 10-speed hollow pin chain,"SRAM's 1071 chain is lightweight, smooth, and precise-shifting. It features the PowerLock connecting link and HollowPin construction for weight savings with no sacrifice of strength.- SRAM Road...",,"['pc-1071', '10-speed', 'hollow', 'pin', 'chain']"


In [10]:
df_joined_electronics

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,cluster_id,url,row_id,table_id,Valid,name,description,brand,tokens
0,9,11530,1001446,https://www.bts.com.au/store/p102/Apple_Lightning_Digital_AV_Adapter.html,90,Product_bts.com.au_September2020.json.gz,1,apple lightning digital av adapter,"Use the Lightning Digital AV Adapter with your iPhone, iPad or iPod with Lightning connector. The Lightning Digital AV Adapter supports mirroring of what is displayed on your device screen — including apps, presentations, websites, slideshows and more — to your HDMI-equipped TV, display, projector or other compatible display in up to 1080p HD.It also outputs video content — movies, TV shows, captured video — to your big screen in up to 1080p HD. Simply attach the Lightning Digital AV Adapter to the Lightning connector on your device and then to your TV or projector via an HDMI cable (sold separately).",,"['apple', 'lightning', 'digital', 'adapter']"
1,10,11739,2859551,https://www.icentre.com.mt/apple-lightning-to-usb-cable-1m-2,511,Product_icentre.com.mt_September2020.json.gz,1,apple lightning to usb cable,"This USB 2.0 cable connects your iPhone, iPad, or iPod with Lightning connector to your computer’s USB port for syncing and charging. Or you can connect to the Apple USB Power Adapter for convenient charging from a wall outlet.",,"['apple', 'lightning', 'usb', 'cable']"
2,14,18006,323989,https://store.macs-4-u.com.au/usb-c-charge-cable-1-m.html,192,Product_macs-4-u.com.au_September2020.json.gz,1,usb-c charge cable (1 m),USB-C Charge Cable (1 m),,"['usb-c', 'charge', 'cable', '1']"
3,15,20473,210849,https://www.cingolanibikeshop.com/specialized-sella-power-comp-nero-143.html,447,Product_cingolanibikeshop.com_September2020.json.gz,1,specialized sella power comp nero 143,SELLA POWER COMP NERO 143,Specialized,"['specialized', 'sella', 'power', 'comp', 'nero', '143']"
4,16,20908,345616,https://www.scan.co.uk/products/6tb-wd-red-pro-wd6003ffbx-35-nas-hdd-sata-iii-6gb-s-7200rpm-256mb-cache-shock-sensor-ncq-oem,4945,Product_scan.co.uk_September2020.json.gz,1,"wd red pro 6tb 3.5\"" sata nas hdd/hard drive","6TB WD Red Pro WD6003FFBX, 3.5\"" NAS HDD, SATA III 6Gb/s, 7200rpm, 256MB Cache, Shock Sensor, NCQ, OEM",WD,"['wd', 'red', 'pro', '6tb', '3.5\\', ""''"", 'sata', 'hdd/hard', 'drive']"
...,...,...,...,...,...,...,...,...,...,...,...
9322,25297,24677078,28526191,https://www.applianceliquidators.com/products/samsung/nk36k7000wg.html,107,Product_applianceliquidators.com_September2020.json.gz,1,"36"" wall mount hood in black stainless steel",,,"['36', ""''"", 'wall', 'mount', 'hood', 'black', 'stainless', 'steel']"
9323,25299,24678488,929690,https://www.teknolojipazar.com/canon-2311b003-pp-201-parlak-10x15cm-265gr-50-yaprakli-fotograf-kagidi,376,Product_teknolojipazar.com_September2020.json.gz,1,"canon 2311b003 pp-201 parlak, 10x15cm, 265gr, 50 yaprakli fotoğraf kağidi","Kağıt Tipi:Dar Format KağıtKağıt Türü:Fotoğraf KağıdıKağıt Ebadı-Genişlik (mm):100 MMKağıt Ebadı-Yükseklik:150 MMGramajı:260 g/m2ÖzelliklerPaket başı sayfa sayısı50 KağıtlarOrtam yüzeyiYüksek-parlaklıkOrtam ağırlığı260 g/m²Baskı malzemesi kalınlığı270Ürün rengiKırmızıDesteklenen ürünlerCanon Lucia, ChromaLife100+",,"['canon', '2311b003', 'pp-201', 'parlak', '10x15cm', '265gr', '50', 'yaprakli', 'fotoğraf', 'kağidi']"
9324,25303,24681750,941003,https://biondivino.com/products/bisson-bianchetta-genovese-u-pastine-2014,271,Product_biondivino.com_September2020.json.gz,1,bisson bianchetta genovese u pastine 2016,"Bianchetta Genovese is grown in the area south of Genoa, and often only found as part of a field blend. This wine is an electric storm of citrus, minerality, and salinity. The mouthfeel is surprisingly full for such a zippy wine. The finish is super clean. Perfect for seafood of all kinds!",,"['bisson', 'bianchetta', 'genovese', 'pastine', '2016']"
9325,25304,24681818,99153,https://citys-bg.com/asus-ph-gt1030-o2g-64bit-2gb-gddr5-dvi-d-hdmi-90yv0au0-m0na00,113,Product_citys-bg.com_September2020.json.gz,1,"asus ph-gt1030-o2g 64bit, 2gb gddr5 dvi-d, hdmi","Вземете ASUS PH-GT1030-O2G 64bit, 2GB GDDR5 DVI-D, HDMI - на топцена от CityS",ASUS,"['asus', 'ph-gt1030-o2g', '64bit', '2gb', 'gddr5', 'dvi-d', 'hdmi']"


## Get information about electronic clusters and train model

In [11]:
df_grouped_electronics = df_joined_electronics.groupby('cluster_id').count()
# only look at clusters that have at least one brand associated
df_set_electronics = df_grouped_electronics.reset_index()[['cluster_id','table_id']].rename(columns={'table_id':'Amount'})

In [12]:
df_10_electronics=df_set_electronics[df_set_electronics['Amount']>10]

In [13]:
df_10_electronics

Unnamed: 0,cluster_id,Amount
2,3668,11
3,6076,14
4,6443,13
5,6505,13
6,6690,17
...,...,...
615,52362093,20
616,53457772,61
617,64651308,11
618,66614988,12


In [10]:
#df_10_electronics.to_csv('electronics10.csv')

In [15]:
# We discard all clusters with less than 2 entries, cause we cannot match anything there, so 1,6 million clusters remain
df_set_electronics=df_set_electronics[df_set_electronics['Amount']>1]
df_15_electronics=df_set_electronics[df_set_electronics['Amount']>12]
df_15_electronics

Unnamed: 0,cluster_id,Amount
3,6076,14
4,6443,13
5,6505,13
6,6690,17
8,7366,13
...,...,...
611,48051979,13
613,48051984,13
615,52362093,20
616,53457772,61


In [None]:
#merge brand name to cluster amount
df_cluster_brand = df_15_electronics[df_15_electronics['Amount']<200].merge(df_joined_electronics.dropna()[['cluster_id','brand_y']].drop_duplicates('cluster_id', keep='last'), left_on=['cluster_id'], right_on = ['cluster_id'], how='left')
df_cluster_brand

In [16]:
#clean product column and lowercase
df_joined_electronics=df_joined_electronics.dropna(subset = ['name'])
df_joined_electronics['name'] = df_joined_electronics['name'].apply(lambda row: row.lower())
df_joined_electronics
#get only cluster ids with at least one brand electronics
df_compare_electronics = df_joined_electronics[df_joined_electronics['cluster_id'].isin(df_set_electronics['cluster_id'].tolist())]
#merge with set to get amount of tables per cluster in overview
df_compare_electronics = df_compare_electronics.merge(df_set_electronics, left_on=['cluster_id'], right_on = ['cluster_id'], how='left')

In [17]:
#use tokenizer for product names to get tokes for training the model
df_compare_electronics['product_tokes'] = df_compare_electronics['name'].apply(lambda row: word_tokenize(row))
df_compare_electronics['product_tokes'] = remove_stopwords(df_compare_electronics['product_tokes'],stopwords.words())
df_compare_electronics['product_tokes'] = remove_punctuation (df_compare_electronics['product_tokes'])
#get tagged words
tagged_data = [TaggedDocument(words=_d, tags=[str(i)]) for i, _d in enumerate(df_compare_electronics['product_tokes'])]
# build model and vocabulary
model = Doc2Vec(vector_size=50, min_count = 5, epochs = 25, dm = 0)
model.build_vocab(tagged_data)
# Train model
model.train(tagged_data, total_examples=model.corpus_count, epochs=25)

In [26]:
#get cluster ids for basline products and with that indices of top products to use model
#1524820,47566,6076,14418,28307,33570,39040,51314,99153,215254,685416, 984421 , 1808651,2887810,34506065,47841827,620473,56116,94055, 150211,182246, 516888, 562955 
top_clusters_list = df_15_electronics['cluster_id'].tolist()
index_top_clusters_list=[]
for id in top_clusters_list:
    index_top_clusters_list.append(df_compare_electronics[df_compare_electronics['cluster_id']==id].index[0])

In [28]:
# get most similar products for each of the base clusters and save them if they have more than 5 tables
electronics_clusters_search=[]
for i in index_top_clusters_list:
    similar_doc = model.docvecs.most_similar(f'{i}', topn = 20)
    electronics_clusters_search.append(int(i))
    for index, similarity in similar_doc:
        if df_compare_electronics.iloc[int(index)]['Amount']>10:
            electronics_clusters_search.append(int(index))
    jaccard_score = df_compare_electronics['product_tokes'].apply(lambda row: jaccard_similarity_score(row,df_compare_electronics.iloc[int(i)]['product_tokes']) )
    indizes=sorted(range(len(jaccard_score)), key=lambda i: jaccard_score[i])[-20:]
    for index in indizes:
         if df_compare_electronics.iloc[int(index)]['Amount']>10:
            electronics_clusters_search.append(int(index))    
df_electroncis_final = df_compare_electronics.iloc[electronics_clusters_search]

In [29]:
df_electroncis_final.drop_duplicates('cluster_id', keep='first').to_excel("Final_Electronics_v4.xlsx")

In [84]:
df_compare_electronics[df_compare_electronics['cluster_id']==728150][['cluster_id','table_id','name']]

Unnamed: 0,cluster_id,table_id,name
344,728150,Product_wcslmall.com_September2020.json.gz,asus dual geforce rtx 2070 super oc 8gb gddr6 (dual-rtx2070s-o8g-evo)
1480,728150,Product_pcdubai.com_September2020.json.gz,"asus dual geforce rtx 2070 super oc evo dual 8gb gddr6, hdmi, 3x dp | 90yv0dk0-m0na00"
1525,728150,Product_powertechstore.com_September2020.json.gz,vga pci-e 2070 asus geforce rog strix super 8g dual oc evo 90yv0dk0-m0na00
2423,728150,Product_asus.com_September2020.json.gz,asus dual-rtx2070s-o8g-evo
2470,728150,Product_novatech.co.uk_September2020.json.gz,asus dual geforce rtx 2070 super evo oc 8gb graphics card
3659,728150,Product_barrioscomputacion.com.ar_September2020.json.gz,placa video asus ddr6 8gb dual gefor gtx 2070super
3766,728150,Product_banleong.com_September2020.json.gz,asus dual-rtx2070s-o8g-evo ddr6 super graphics card
3955,728150,Product_gccgamers.com_September2020.json.gz,"asus dual geforce rtx 2070 super oc evo dual 8gb gddr6, hdmi, 3x dp | 90yv0dk0-m0na00"
4223,728150,Product_powertechbcn.com_September2020.json.gz,asus dual -rtx2070s-o8g-evo nvidia geforce rtx 2070 super 8 gb gddr6
4288,728150,Product_scorptec.com.au_September2020.json.gz,"asus dual geforce rtx 2070 super evo oc edition (base:1635mhz,boost:1845mhz), 8gb gddr6 (14000mhz), pci-e 3.0, 3x displayport 1.4, hdmi 2.0b"


## Cluster statistics for product category clothes

In [30]:
df_grouped_clothes = df_joined_clothes.groupby('cluster_id').count()

In [31]:
# only look at clusters that have at least one brand associated
df_set_clothes = df_grouped_clothes.reset_index()[['cluster_id','table_id']].rename(columns={'table_id':'Amount'})

In [32]:
# We discard all clusters with less than 2 entries, cause we cannot match anything there, so 1,6 million clusters remain
df_set_clothes=df_set_clothes[df_set_clothes['Amount']>1]
df_set_clothes

Unnamed: 0,cluster_id,Amount
0,5310,10
1,6559,3
2,16573,7
3,18496,8
4,33292,8
...,...,...
411,77104640,9
412,77602818,15
413,78110534,12
414,78499693,15


In [33]:
df_8_clothes=df_set_clothes[df_set_clothes['Amount']>8]
df_8_clothes

Unnamed: 0,cluster_id,Amount
0,5310,10
6,37366,9
7,37525,10
9,42322,9
11,58043,10
...,...,...
411,77104640,9
412,77602818,15
413,78110534,12
414,78499693,15


In [15]:
df_8_clothes.to_csv('clothes8.csv')

In [34]:
df_10_clothes=df_set_clothes[df_set_clothes['Amount']>10]
df_10_clothes

Unnamed: 0,cluster_id,Amount
19,90549,11
27,131823,11
28,133789,11
30,138313,12
33,148199,20
...,...,...
404,68554513,12
412,77602818,15
413,78110534,12
414,78499693,15


In [19]:
#merge brand name to cluster amount
df_cluster_brand_clothes = df_10_clothes[df_10_clothes['Amount']<400].merge(df_joined_clothes.dropna()[['cluster_id','brand_y']].drop_duplicates('cluster_id', keep='last'), left_on=['cluster_id'], right_on = ['cluster_id'], how='left')
df_cluster_brand_clothes

Unnamed: 0,cluster_id,Amount,brand_y
0,5310,11,armani
1,37366,14,tag heuer
2,37525,12,coach
3,42322,14,tag heuer
4,58043,12,
...,...,...,...
262,68554513,13,
263,77602818,16,valentino
264,78110534,13,dolce & gabbana
265,78499693,16,dolce & gabbana


In [35]:
df_joined_clothes=df_joined_clothes.dropna(subset = ['name'])
#clean product column and lowercase
df_joined_clothes['name'] = df_joined_clothes['name'].apply(lambda row: row.lower())
df_joined_clothes
#get only cluster ids with at least one brand electronics
df_compare_clothes = df_joined_clothes[df_joined_clothes['cluster_id'].isin(df_set_clothes['cluster_id'].tolist())]
#merge with set to get amount of tables per cluster in overview
df_compare_clothes = df_compare_clothes.merge(df_set_clothes, left_on=['cluster_id'], right_on = ['cluster_id'], how='left')

In [36]:
#use tokenizer for product names to get tokes for training the model
df_compare_clothes['product_tokes'] = df_compare_clothes['name'].apply(lambda row: word_tokenize(row))
df_compare_clothes['product_tokes'] = remove_stopwords(df_compare_clothes['product_tokes'],stopwords.words())
df_compare_clothes['product_tokes'] = remove_punctuation (df_compare_clothes['product_tokes'])
#get tagged words
tagged_data = [TaggedDocument(words=_d, tags=[str(i)]) for i, _d in enumerate(df_compare_clothes['product_tokes'])]
# build model and vocabulary
model = Doc2Vec(vector_size=50, min_count = 5, epochs = 25, dm = 0)
model.build_vocab(tagged_data)
# Train model
model.train(tagged_data, total_examples=model.corpus_count, epochs=25)

In [49]:
#get cluster ids and with that indices of top products to use model
#5310, 58043,104343,142594,174327, 186753,421372,677207,834201, 881202,  895708,939889, 1249086,1290229, 1852022,2459966, 2732926 , 22374915, 22374918, 26097914,44159446, 58592784, 78110534,135583,148199, 200956, 950691, 1592417,2464591
top_clusters_list = df_10_clothes['cluster_id'].tolist()
index_top_clusters_list=[]
for id in top_clusters_list:
    index_top_clusters_list.append(df_compare_clothes[df_compare_clothes['cluster_id']==id].index[0])

In [50]:
# get most similar products for each of the base clusters and save them if they have more than 5 tables
clothes_clusters_search=[]
for i in index_top_clusters_list:
    similar_doc = model.docvecs.most_similar(f'{i}', topn = 20)
    clothes_clusters_search.append(int(i))
    for index, similarity in similar_doc:
        if df_compare_clothes.iloc[int(index)]['Amount']>8:
            clothes_clusters_search.append(int(index))
    jaccard_score = df_compare_clothes['product_tokes'].apply(lambda row: jaccard_similarity_score(row,df_compare_clothes.iloc[int(i)]['product_tokes']) )
    indizes=sorted(range(len(jaccard_score)), key=lambda i: jaccard_score[i])[-20:]
    for index in indizes:
         if df_compare_clothes.iloc[int(index)]['Amount']>8:
            clothes_clusters_search.append(int(index))    
df_clothes_final = df_compare_clothes.iloc[clothes_clusters_search]

In [51]:
df_clothes_final.drop_duplicates('cluster_id', keep='first').to_excel("Final_Clothes_v4.xlsx")