Creates user embeddings for all users who have made atleast 1 purchase in training data

In [1]:
import pandas as pd, os, numpy as np
import plotly.express as px
pd.options.display.max_columns = 50
import swifter, datetime, pickle as pkl
import tensorflow_hub as hub
from tqdm.notebook import tqdm

In [2]:
df = pd.read_parquet('./data/train.parquet')

In [3]:
with open('./data/emb_map.pkl', 'rb') as handle:
    emb_map = pkl.load(handle)

In [4]:
df.sort_values(['cust_id', 'date'], ascending=[True, True], inplace=True)

In [5]:
cids = df['cust_id'].unique()
cgroups = df[['article_id', 'cust_id']].groupby('cust_id')

In [6]:
all_cemb_map = {}

In [7]:
def moveVector(sourceVector, destinationVector, direction, magnitude = None):
    magnitude = magnitude if magnitude is not None else 0.5
    new_q = sourceVector + direction*magnitude*(destinationVector - sourceVector)
    return new_q

In [8]:
def getCustEmb(purchases):
    statevector = None
    for i, v in enumerate(purchases):
        if i == 0:
            statevector = v
        else:
            statevector = moveVector(statevector, v, direction=1, magnitude=0.5)
    statevector = statevector / np.linalg.norm(statevector)        
    return statevector

In [9]:
all_embs = []
for cid in tqdm(cids, total=len(cids)):
    curr_group = cgroups.get_group(cid)
    keys = curr_group['article_id'].values
    embs = [emb_map[x] for x in keys]
    embs = np.array(embs)
    cemb = getCustEmb(embs)
    all_embs.append(cemb)
    all_cemb_map[cid] = cemb

  0%|          | 0/1362281 [00:00<?, ?it/s]

In [10]:
with open('./data/all_cemb_map.pkl', 'wb') as handle:
    pkl.dump(all_cemb_map, handle, protocol=3)