In [1]:
%load_ext autoreload

## Product Side Testing

In [2]:
from src.Products import Products, prepare_prod_batch
from src.Reviews import Reviews
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from transformers import DistilBertTokenizer, DistilBertModel
import torch
import torch.nn as nn

  from ._conv import register_converters as _register_converters


Sample Batch:

In [3]:
%time
#domain = 'AMAZON_FASHION'
domain = 'Gift_Cards'
p = Products(domain)
r = Reviews(domain)
r.train_test_split(0.2)
batch_size = 2

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 6.91 µs


In [4]:
#rerun this cell to resample
train_idx_batch = r.get_batch_bikey(batch_size, from_train=True)
train_prod_idx_batch = [p[1] for p in train_idx_batch]
train_prod_batch = [p.get_product(idx) for idx in train_prod_idx_batch]

Import ProductTower:

In [5]:
%autoreload
from src.Layers import ProductTower

Example of pretrained embedding layer: (Compatible with models provided by HuggingFace)

In [6]:
%autoreload
embed_tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')
embed_model = DistilBertModel.from_pretrained('distilbert-base-cased')
embed_dim = 768
seq_len = 100 #this is model independent, up to user how to truncate/pad sequences

In [7]:
%autoreload
def embed_model_wrapped(text):
    return embed_model(text)[0]

In [8]:
%autoreload
text, bop = prepare_prod_batch(train_prod_batch, embed_tokenizer)

In [9]:
text.shape

torch.Size([2, 100])

RNN and FM params:

In [10]:
rnn_hidden_dim = 20
rnn_num_layers = 1

In [11]:
fm_n = 2000 #nfeatures
fm_embed_dim = 16

Initialize product tower and forward pass:

In [12]:
%autoreload
producttower = ProductTower(embed_model_wrapped, embed_dim, rnn_hidden_dim, rnn_num_layers, rnn_type='GRU', 
                            fm_type='fm', fm_field_dims=[2]*fm_n, fm_embed_dim=fm_embed_dim)

In [13]:
%autoreload
rnn_out, fm_out = producttower(text, bop)

In [14]:
rnn_out.shape

torch.Size([2, 100, 20])

In [15]:
fm_out.shape

torch.Size([2])

## DSSM testing

In [16]:
n_head = 2
seq_len = seq_len
n_sen = 4
d_k = d_v = 64
n_rnn = 1

In [17]:
%autoreload
from src.model import RecomModel
rm = RecomModel(rnn_hidden_dim, rnn_hidden_dim, 
                n_head, seq_len, n_sen,
                d_k, d_v,
                embed_model_wrapped,
                embed_dim,
                n_rnn,
                fm_field_dims=[2]*fm_n, 
                fm_embed_dim=fm_embed_dim,
                rnn_type='GRU',
                fm_type='fm',
                dropout=0.1,)

In [18]:
dssm_out = rm(text, bop)

In [19]:
dssm_out.shape

torch.Size([2, 20])

In [20]:
dssm_out

tensor([[ 1.1695,  0.3691,  0.5449, -0.0016,  0.3618,  0.0930,  0.5924,  0.0135,
          0.0350,  0.2788,  0.0644,  0.5338,  0.0500,  0.5039,  0.4434, -0.2754,
          0.5863,  0.0995,  0.6417, -0.0808],
        [ 1.0850,  0.4091,  0.2870,  0.0464,  0.1272,  0.0292,  0.1008, -0.0152,
          0.0320,  0.0865,  0.4006,  0.2102,  0.2628,  1.0070,  0.5608, -0.0365,
          0.0985,  0.1187,  0.3364, -0.1904]], grad_fn=<MulBackward0>)