### Create Target Embedding for customers who bought

In [1]:
import pandas as pd, os, numpy as np
import plotly.express as px
pd.options.display.max_columns = 50
import swifter, datetime, pickle as pkl
import tensorflow_hub as hub
from tqdm.notebook import tqdm

In [2]:
with open('./data/emb_map.pkl', 'rb') as handle:
    emb_map = pkl.load(handle)

In [3]:
adf = pd.read_parquet('./data/articles.parquet')

In [4]:
tdf = pd.read_parquet('./data/train.parquet')

In [5]:
bdf = pd.read_parquet('./data/bought_articles_in_order.parquet')

In [6]:
bdf_dict = bdf.set_index('cust_id').to_dict(orient='dict')['article_id']

In [7]:
bdf.head()

Unnamed: 0,cust_id,article_id
0,0,[797065001]
1,13,"[693242018, 661794006, 763037004, 640176008, 6..."
2,21,"[513512003, 535035001, 677930066]"
3,22,"[805947002, 705966002, 803290002, 797710001, 7..."
4,29,"[730683003, 787558001]"


In [8]:
cids = bdf['cust_id'].unique()

In [9]:
ctarget_map = {}

In [10]:
for cid in tqdm(cids, total=len(cids)):
    keys = bdf_dict[cid][:12]
    embs = [emb_map[x] for x in keys]
    embs = np.array(embs)
    cemb = embs.mean(axis=0)
    cemb /= np.linalg.norm(cemb)
    ctarget_map[cid] = cemb

  0%|          | 0/97992 [00:00<?, ?it/s]

In [11]:
bdf_dict[13]

array([693242018, 661794006, 763037004, 640176008, 688326010], dtype=int64)

In [12]:
f = [693242018, 661794006, 763037004, 640176008, 688326010]

In [13]:
af = adf[adf['article_id'].isin(f)].copy()

In [14]:
akeys = list(emb_map.keys())
aembs = list(emb_map.values())

In [15]:
aembs = np.array(aembs)

In [16]:
akeys = np.array(akeys)

In [17]:
def calculateSimilarity(queryVector):
    scores = queryVector.dot(aembsT).flatten() # cosine similarity scores. (assumes vectors are normalized to unit length)
    return scores

In [18]:
aembsT = aembs.T

In [19]:
def rankPostsBySimilarity(posts_ids, scores):
    return sorted([(post_id, score) for (post_id, score) in zip(posts_ids, scores)], key=lambda x:x[1], reverse=True)[:12]

In [20]:
p = ctarget_map[13]

In [21]:
sims = calculateSimilarity(p)
temp = np.argpartition(sims, -12)[-12:]
r = rankPostsBySimilarity(akeys[temp], sims[temp])

In [22]:
r

[(607884001, 0.9143403),
 (549477005, 0.91267955),
 (836142001, 0.9107467),
 (578997004, 0.9105623),
 (832817003, 0.90895796),
 (903994001, 0.9084431),
 (933662001, 0.9078496),
 (561218003, 0.9066959),
 (693242018, 0.90669584),
 (666319001, 0.9056667),
 (870531001, 0.9053226),
 (640176008, 0.90502495)]

In [None]:
adf[adf['article_id'] == 515262001]

In [23]:
af

Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_name,colour_group_name,perceived_colour_value_name,perceived_colour_master_name,department_no,department_name,index_name,index_group_name,section_no,section_name,garment_group_name,detail_desc
33970,640176008,640176,Isa Cardigan,245,Cardigan,Garment Upper body,Solid,Black,Dark,Black,1647,Tops Knitwear,Divided,Divided,53,Divided Collection,Knitwear,Long cardigan in a soft knit with dropped shou...
39544,661794006,661794,Flamingo,263,Coat,Garment Upper body,Solid,Black,Dark,Black,1201,Outwear,Ladieswear,Ladieswear,19,Womens Jackets,Outdoor,Short coat in a woven wool blend with a high s...
47223,688326010,688326,Kurt Cord Pile,262,Jacket,Garment Upper body,Solid,Black,Dark,Black,5252,Jacket Casual,Menswear,Menswear,31,Mens Outerwear,Outdoor,Jacket in corduroy with a faux shearling-lined...
48786,693242018,693242,Bama(1),252,Sweater,Garment Upper body,Solid,Black,Dark,Black,1626,Knitwear,Ladieswear,Ladieswear,15,Womens Everyday Collection,Knitwear,"Wide jumper in a soft, fine knit containing so..."
71169,763037004,763037,River,258,Blouse,Garment Upper body,Solid,Light Pink,Dusty Light,Pink,1515,Blouse,Ladieswear,Ladieswear,11,Womens Tailoring,Blouses,Blouse in an airy crêpe weave with a small sta...


In [None]:
with open('./data/ctarget_map.pkl', 'wb') as handle:
    pkl.dump(ctarget_map, handle, protocol=3)