In [1]:
import numpy as np
from urllib.request import urlopen
import pandas as pd
from deepctr.models import DIN,DeepFM
from deepctr.inputs import SparseFeat,VarLenSparseFeat,DenseFeat,get_feature_names
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.python.keras.preprocessing.sequence import pad_sequences

In [2]:
url = "www.baidu.com/recommendation?count={}"
url = url.format(1000)
data_source = urlopen(url)
data = pd.read_csv(data_source, keep_date_col=True)

In [3]:
data['age']

0    3.0
1    4.0
2    NaN
3    4.0
4    5.0
Name: age, dtype: float64

In [4]:
model_input = {}

In [5]:
fixed_sparse_dict = {
    'gender': [2, 20],
    'age': [5+1, 20],
    'born_place': [34+1, 20],
    'recipe': [-1, 20]
}
for feat,value in fixed_sparse_dict.items():
    data[feat] = data[feat].fillna(-1)
    lbe = LabelEncoder()
    data[feat] = lbe.fit_transform(data[feat])
    if value[0] == -1:
        fixed_sparse_dict[feat][0] = data[feat].nunique()

In [6]:
fixed_sparse_feature_columns = []
for feat,value in fixed_sparse_dict.items():
    fixed_sparse_feature_columns.append(SparseFeat(feat,vocabulary_size=value[0],embedding_dim=value[1]))
    model_input[feat] = data[feat]

In [8]:
data

Unnamed: 0,gender,age,born_place,preference,hist_recipe,recipe,recipe_taste,recipe_style,ingredient,seasoning
0,0,1,0,1|4|5,201|302|1|204,0,2|4,3|6|8,1|2|6,2|5|10
1,1,2,0,1|7|8,100|300|3|206,1,,3|6|10,1|2|5,2|5|10
2,0,0,0,2|3|6,202|303|2|205,2,2|4,3|6|8,2|2|0,2|5|10
3,0,2,0,7|8|13,199|198|197|196,3,2|4,,1|2|5,2|5|10
4,1,3,0,12|1|5,192|168|1|1,4,2|4,3|6|8,7|3|5,2|5|10


In [None]:
dense_dict = {
}
dense_feature_columns = []

for feat, _ in dense_dict.items():
    data[feat].fillna(0)
    mms = MinMaxScaler(feature_range=(0, 1))
    data[feat] = mms.fit_transform(data[feat])
    dense_feature_columns.append(DenseFeat(feat))
    model_input[feat] = data[feat]


In [7]:
var_sparse_dict ={
    'preference': [-1,-1,20,'preference'],
    'hist_recipe' : [-1,-1,20,'recipe'],
    'recipe_taste': [-1,-1,20,'recipe_taste'],
    'recipe_style': [-1,-1,20,'recipe_recipe_style'],
    'ingredient': [-1,-1,20,'recipe_ingredient'],
    'seasoning': [-1,-1,20,'recipe_recipe_seasoning']
}
var_sparse_feature_columns = []

In [8]:
#For multi value features， -1 denotes NA， 0 denotes padding
#Valid feature number starts from 1

In [9]:
def split(x):
    key_ans = x.split('|')
    for key in key_ans:
        if key not in key2index:
            # Notice : input value 0 is a special "padding",so we do not use 0 to encode valid feature for sequence input
            key2index[key] = len(key2index) + 1
    return list(map(lambda x: key2index[x], key_ans))

In [10]:
for feat,value in var_sparse_dict.items():
    key2index = {}
    data[feat] = data[feat].fillna('-1')
    l_values =  list(map(split, data[feat].values))
    l_len = list(map(lambda x:len(x), l_values))
    max_len = max(l_len)
    l_values = pad_sequences(l_values, maxlen=max_len, padding='post')
    if value[0] == -1:
        var_sparse_dict[feat][0] = max_len
    if value[1] == -1:
        var_sparse_dict[feat][1] = len(key2index) + 1
    var_sparse_feature_columns.append(
        VarLenSparseFeat(feat,maxlen=max_len,
                         vocabulary_size=var_sparse_dict[feat][1],
                         embedding_dim=var_sparse_dict[feat][2],
                         embedding_name=var_sparse_dict[feat][3]))
    model_input[feat] = l_values


In [11]:
model_input

{'gender': 0    0
 1    1
 2    0
 3    0
 4    1
 Name: gender, dtype: int64, 'age': 0    1
 1    2
 2    0
 3    2
 4    3
 Name: age, dtype: int64, 'born_place': 0    0
 1    0
 2    0
 3    0
 4    0
 Name: born_place, dtype: int64, 'recipe': 0    0
 1    1
 2    2
 3    3
 4    4
 Name: recipe, dtype: int64, 'preference': array([[ 1,  2,  3],
        [ 1,  4,  5],
        [ 6,  7,  8],
        [ 4,  5,  9],
        [10,  1,  3]]), 'hist_recipe': array([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12],
        [13, 14, 15, 16],
        [17, 18,  3,  3]]), 'recipe_taste': array([[1, 2],
        [3, 0],
        [1, 2],
        [1, 2],
        [1, 2]]), 'recipe_style': array([[1, 2, 3],
        [1, 2, 4],
        [1, 2, 3],
        [5, 0, 0],
        [1, 2, 3]]), 'ingredient': array([[1, 2, 3],
        [1, 2, 4],
        [2, 2, 5],
        [1, 2, 4],
        [6, 7, 4]]), 'seasoning': array([[1, 2, 3],
        [1, 2, 3],
        [1, 2, 3],
        [1, 2, 3],
       

In [12]:
#behavior_feature_list = ["recipe_id", "preference"]
behavior_feature_list = ['recipe']
#behavior_feature_list = []

In [13]:
feature_columns = fixed_sparse_feature_columns + var_sparse_feature_columns

In [14]:
model = DIN(feature_columns, behavior_feature_list)
model.compile('adam', 'binary_crossentropy',
              metrics=['binary_crossentropy'])

In [15]:
history = model.fit(model_input, [1,1,1,1,1], verbose=1, epochs=10, validation_split=0)

Train on 5 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [16]:
feature_columns

[SparseFeat(name='gender', vocabulary_size=2, embedding_dim=20, use_hash=False, dtype='int32', embedding_name='gender', group_name='default_group'),
 SparseFeat(name='age', vocabulary_size=6, embedding_dim=20, use_hash=False, dtype='int32', embedding_name='age', group_name='default_group'),
 SparseFeat(name='born_place', vocabulary_size=35, embedding_dim=20, use_hash=False, dtype='int32', embedding_name='born_place', group_name='default_group'),
 SparseFeat(name='recipe', vocabulary_size=5, embedding_dim=20, use_hash=False, dtype='int32', embedding_name='recipe', group_name='default_group'),
 VarLenSparseFeat(name='preference', maxlen=3, vocabulary_size=10, embedding_dim=20, combiner='mean', use_hash=False, dtype='float32', length_name=None, weight_name=None, embedding_name='preference', group_name='default_group'),
 VarLenSparseFeat(name='hist_recipe', maxlen=4, vocabulary_size=18, embedding_dim=20, combiner='mean', use_hash=False, dtype='float32', length_name=None, weight_name=None, 