In [1]:
import numpy as np

from deepctr_torch.inputs import (DenseFeat, SparseFeat, VarLenSparseFeat,
                                  get_feature_names)

from deepctr_torch.models.din import DIN

In [2]:
feature_columns = [SparseFeat('user',vocabulary_size=3,embedding_dim=10),
                    SparseFeat('gender', vocabulary_size=2,embedding_dim=4), 
                    SparseFeat('item_id', vocabulary_size=3,embedding_dim=8), 
                    SparseFeat('cate_id', vocabulary_size=2,embedding_dim=4),
                    DenseFeat('pay_score', 1)]

# 指定历史行为序列对应的特征
behavior_feature_list = ["item_id", "cate_id"]

In [3]:
# 构造 ['item_id', 'cate_id'] 这两个属性历史序列数据的数据结构: hist_item_id, hist_cate_id
# 由于历史行为是不定长数据序列，需要用 VarLenSparseFeat 封装起来，并指定序列的最大长度为 4 
# 注意,对于长度不足4的部分会用0来填充,因此 vocabulary_size 应该在原来的基础上 + 1
# 详细内容参考：https://deepctr-doc.readthedocs.io/en/latest/Examples.html#multi-value-input-movielens
feature_columns += [
    VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
                     maxlen=4, length_name="seq_length"),
    VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
                     length_name="seq_length")]

In [4]:
feature_columns

[SparseFeat(name='user', vocabulary_size=3, embedding_dim=10, use_hash=False, dtype='int32', embedding_name='user', group_name='default_group'),
 SparseFeat(name='gender', vocabulary_size=2, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='gender', group_name='default_group'),
 SparseFeat(name='item_id', vocabulary_size=3, embedding_dim=8, use_hash=False, dtype='int32', embedding_name='item_id', group_name='default_group'),
 SparseFeat(name='cate_id', vocabulary_size=2, embedding_dim=4, use_hash=False, dtype='int32', embedding_name='cate_id', group_name='default_group'),
 DenseFeat(name='pay_score', dimension=1, dtype='float32'),
 VarLenSparseFeat(sparsefeat=SparseFeat(name='hist_item_id', vocabulary_size=4, embedding_dim=8, use_hash=False, dtype='int32', embedding_name='item_id', group_name='default_group'), maxlen=4, combiner='mean', length_name='seq_length'),
 VarLenSparseFeat(sparsefeat=SparseFeat(name='hist_cate_id', vocabulary_size=3, embedding_dim=4, use_hash=Fals

In [12]:
use_neg = True
# 基础特征数据
uid = np.array([0, 1, 2])
ugender = np.array([0, 1, 0])
iid = np.array([1, 2, 3])
cate_id = np.array([1, 2, 2]) 
score = np.array([0.1, 0.2, 0.3])

In [13]:
# 构造历史行为序列数据
# 构造长度为 4 的 item_id 序列,不足的部分用0填充
hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]])
# 构造长度为 4 的 cate_id 序列,不足的部分用0填充
hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])
# 行为序列长度
behavior_length = np.array([3, 3, 2])

In [14]:
# 构造实际的输入数据
feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
                'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
                'pay_score': score}

In [15]:
# 使用负采样
if use_neg:
    feature_dict['neg_hist_item_id'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
    feature_dict['neg_hist_cate_id'] = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]])
    feature_columns += [
        VarLenSparseFeat(SparseFeat('neg_hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
                         maxlen=4, length_name="seq_length"),
        VarLenSparseFeat(SparseFeat('neg_hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'),
                         maxlen=4, length_name="seq_length")]

In [16]:
get_feature_names(feature_columns)

['user',
 'gender',
 'item_id',
 'cate_id',
 'pay_score',
 'hist_item_id',
 'seq_length',
 'hist_cate_id',
 'neg_hist_item_id',
 'neg_hist_cate_id']

In [17]:
x = {name:feature_dict[name] for name in get_feature_names(feature_columns)}
y = np.array([1, 0, 1])

KeyError: 'seq_length'

In [18]:
x

NameError: name 'x' is not defined

In [21]:
def get_xy_fd(use_neg=False):
    # 对基础特征进行 embedding
    feature_columns = [SparseFeat('user',vocabulary_size=3,embedding_dim=10),
                        SparseFeat('gender', vocabulary_size=2,embedding_dim=4), 
                        SparseFeat('item_id', vocabulary_size=3,embedding_dim=8), 
                        SparseFeat('cate_id', vocabulary_size=2,embedding_dim=4),
                        DenseFeat('pay_score', 1)]
    
    # 指定历史行为序列对应的特征
    behavior_feature_list = ["item_id", "cate_id"]
    
    # 构造 ['item_id', 'cate_id'] 这两个属性历史序列数据的数据结构: hist_item_id, hist_cate_id
    # 由于历史行为是不定长数据序列，需要用 VarLenSparseFeat 封装起来，并指定序列的最大长度为 4 
    # 注意,对于长度不足4的部分会用0来填充,因此 vocabulary_size 应该在原来的基础上 + 1
    # 详细内容参考：https://deepctr-doc.readthedocs.io/en/latest/Examples.html#multi-value-input-movielens
    feature_columns += [
        VarLenSparseFeat(SparseFeat('hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
                         maxlen=4, length_name=None),
        VarLenSparseFeat(SparseFeat('hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'), maxlen=4,
                         length_name=None)]

    # 基础特征数据
    uid = np.array([0, 1, 2])
    ugender = np.array([0, 1, 0])
    iid = np.array([1, 2, 3])
    cate_id = np.array([1, 2, 2]) 
    score = np.array([0.1, 0.2, 0.3])

    # 构造历史行为序列数据
    # 构造长度为 4 的 item_id 序列,不足的部分用0填充
    hist_iid = np.array([[1, 2, 3, 0], [3, 2, 1, 0], [1, 2, 0, 0]])
    # 构造长度为 4 的 cate_id 序列,不足的部分用0填充
    hist_cate_id = np.array([[1, 2, 2, 0], [2, 2, 1, 0], [1, 2, 0, 0]])
    # 行为序列长度
#     behavior_length = np.array([3, 3, 2])
    
    
    # 构造实际的输入数据
    feature_dict = {'user': uid, 'gender': ugender, 'item_id': iid, 'cate_id': cate_id,
                    'hist_item_id': hist_iid, 'hist_cate_id': hist_cate_id,
                    'pay_score': score}
    
    # 使用负采样
    if use_neg:
        feature_dict['neg_hist_item_id'] = np.array([[1, 2, 3, 0], [1, 2, 3, 0], [1, 2, 0, 0]])
        feature_dict['neg_hist_cate_id'] = np.array([[1, 2, 2, 0], [1, 2, 2, 0], [1, 2, 0, 0]])
        feature_columns += [
            VarLenSparseFeat(SparseFeat('neg_hist_item_id', vocabulary_size=3 + 1, embedding_dim=8, embedding_name='item_id'),
                             maxlen=4, length_name=None),
            VarLenSparseFeat(SparseFeat('neg_hist_cate_id', 2 + 1, embedding_dim=4, embedding_name='cate_id'),
                             maxlen=4, length_name=None)]

    x = {name:feature_dict[name] for name in get_feature_names(feature_columns)}
    y = np.array([1, 0, 1])
    return x, y, feature_columns, behavior_feature_list  

In [22]:
x, y, feature_columns, behavior_feature_list = get_xy_fd()
device = 'cpu'
use_cuda = False

if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:0'

print("\nTraining......")
model = DIN(feature_columns, behavior_feature_list)
model.compile('adagrad', 'binary_crossentropy',
              metrics=['binary_crossentropy', "auc"])

history = model.fit(x, y, batch_size=3, epochs=10, verbose=2, validation_split=0.0)


Training......
cpu
Train on 3 samples, validate on 0 samples, 1 steps per epoch
Epoch 1/10
0s - loss:  0.6984 - binary_crossentropy:  0.6984 - auc:  0.5000
Epoch 2/10
0s - loss:  0.6661 - binary_crossentropy:  0.6661 - auc:  1.0000
Epoch 3/10
0s - loss:  0.6409 - binary_crossentropy:  0.6409 - auc:  1.0000
Epoch 4/10
0s - loss:  0.6049 - binary_crossentropy:  0.6049 - auc:  1.0000
Epoch 5/10
0s - loss:  0.5478 - binary_crossentropy:  0.5478 - auc:  1.0000
Epoch 6/10
0s - loss:  0.4616 - binary_crossentropy:  0.4616 - auc:  1.0000
Epoch 7/10
0s - loss:  0.3638 - binary_crossentropy:  0.3638 - auc:  1.0000
Epoch 8/10
0s - loss:  0.2887 - binary_crossentropy:  0.2887 - auc:  1.0000
Epoch 9/10
0s - loss:  0.2566 - binary_crossentropy:  0.2566 - auc:  1.0000
Epoch 10/10
0s - loss:  0.2420 - binary_crossentropy:  0.2420 - auc:  1.0000
Please check the latest version manually on https://pypi.org/project/deepctr-torch/#history
