In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Layer, Dropout
from tensorflow.keras.regularizers import l2

In [8]:
class Linear(Layer):
    def __init__(self,feature_length,w_reg=1e-6):
        """
        Linear Part
        :param feature_length: A scalar. The length of features.
        :param w_reg: A scalar. The regularization coefficient of parameter w.
        """
        super(Linear,self).__init__()
        self.feature_length = feature_length
        self.w_reg =w_reg
    def build(self,input_shape):      
        self.w = self.add_weight(name="w",
                                 shape=(self.feature_length, 1),
                                 regularizer=l2(self.w_reg),
                                 trainable=True)


#     def build(self, input_shape):
#         self.w = self.add_weight(name="w",
#                                  shape=(self.feature_length, 1),
#                                  regularizer=l2(self.w_reg),
#                                  trainable=True) 
    
    def call(self,inputs,**kwargs):
        result = tf.reduce_sum(tf.nn.embedding_lookup(self.w, inputs), axis=1)  # (batch_size, 1)
        return result
    
class DNN(Layer):
    def __init__(self, hidden_units, activation='relu', dropout=0.):
        """Deep Neural Network
        :param hidden_units: A list. Neural network hidden units.
        :param activation: A string. Activation function of dnn.
        :param dropout: A scalar. Dropout number.
        """
        super(DNN, self).__init__()
        self.dnn_network = [Dense(units=unit, activation=activation) for unit in hidden_units]
        self.dropout = Dropout(dropout)
        
    def call(self,inputs,**kwargs):
        x = inputs
        for dnn in self.dnn_network:
            x = dnn(x)
        x = self.dropout(x)
        return x


# class Linear(Layer):
#     def __init__(self, feature_length, w_reg=1e-6):
#         """
#         Linear Part
#         :param feature_length: A scalar. The length of features.
#         :param w_reg: A scalar. The regularization coefficient of parameter w.
#         """
#         super(Linear, self).__init__()
#         self.feature_length = feature_length
#         self.w_reg = w_reg

#     def build(self, input_shape):
#         self.w = self.add_weight(name="w",
#                                  shape=(self.feature_length, 1),
#                                  regularizer=l2(self.w_reg),
#                                  trainable=True)

#     def call(self, inputs, **kwargs):
#         result = tf.reduce_sum(tf.nn.embedding_lookup(self.w, inputs), axis=1)  # (batch_size, 1)
#         return result


# class DNN(Layer):
#     def __init__(self, hidden_units, activation='relu', dropout=0.):
#         """Deep Neural Network
# 		:param hidden_units: A list. Neural network hidden units.
# 		:param activation: A string. Activation function of dnn.
# 		:param dropout: A scalar. Dropout number.
# 		"""
#         super(DNN, self).__init__()
#         self.dnn_network = [Dense(units=unit, activation=activation) for unit in hidden_units]
#         self.dropout = Dropout(dropout)

#     def call(self, inputs, **kwargs):
#         x = inputs
#         for dnn in self.dnn_network:
#             x = dnn(x)
#         x = self.dropout(x)
#         return x

In [9]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Embedding, Dropout, Input
from tensorflow.keras.regularizers import l2

class WideDeep(Model):
    def __init__(self,feature_columns,hidden_units,activation='relu',
              dnn_dropout=0.,embed_reg=1e-6,w_reg=1e-6):
        """
        Wide&Deep
        :param feature_columns: A list. sparse column feature information.
        :param hidden_units: A list. Neural network hidden units.
        :param activation: A string. Activation function of dnn.
        :param dnn_dropout: A scalar. Dropout of dnn.
        :param embed_reg: A scalar. The regularizer of embedding.
        :param w_reg: A scalar. The regularizer of Linear.
        """
        super(WideDeep, self).__init__()
        self.sparse_feature_columns = feature_columns
        self.embed_layers={
            'embed_' + str(i): Embedding(input_dim=feat['feat_num'],
                                         input_length=1,
                                         output_dim=feat['embed_dim'],
                                         embeddings_initializer='random_uniform',
                                         embeddings_regularizer=l2(embed_reg))
            for i, feat in enumerate(self.sparse_feature_columns)
        }
        self.index_mapping = []
        self.feature_length = 0
        for feat in self.sparse_feature_columns:
            self.index_mapping.append(self.feature_length)
            self.feature_length += feat['feat_num']
        self.dnn_network = DNN(hidden_units,activation,dnn_dropout)
        self.linear = Linear(self.feature_length,w_reg=w_reg)
        self.final_dense = Dense(1,activation=None)
    def call(self,inputs,**kwargs):
        sparse_embed = tf.concat([self.embed_layers['embed_{}'.format(i)](inputs[:, i])
                                  for i in range(inputs.shape[1])], axis=-1)
        x = sparse_embed #(batch_size,field * embed_dim)
        #Wide
        wide_inputs = inputs + tf.convert_to_tensor(self.index_mapping)
        wide_out = self.linear(wide_inputs)
        
        #Deep
        deep_out = self.dnn_network(x)
        deep_out = self.final_dense(deep_out)
        
        #out
        outputs = tf.nn.sigmoid(0.5*wide_out+0.5*deep_out)
        return outputs
    
    def summary(self,**kwargs):
        sparse_inputs = Input(shape=(len(self.sparse_feature_columns),),dtype=tf.int32)
        Model(inputs=sparse_inputs,outputs=self.call(sparse_inputs)).summary()
        

In [10]:
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import AUC

import sys
sys.path.append('..')
from data_process.criteo import create_criteo_dataset
import datetime
import os


In [12]:
if __name__ == '__main__':
    train_X, train_y = train
    test_X, test_y = test
    model = WideDeep(feature_columns, hidden_units=hidden_units, dnn_dropout=dnn_dropout)
    model.summary()
    model.compile(loss=binary_crossentropy, optimizer=Adam(learning_rate=learning_rate),
                      metrics=[AUC()])
    
    t1 = datetime.datetime.now()

    model.fit(
    train_X,
    train_y,
    epochs=epochs,
    callbacks=[EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)],  # checkpoint
    batch_size=batch_size,
    validation_split=0.1
    )
    t2 = datetime.datetime.now()
    print("时间为:%d" %(t2-t1).seconds)
    # ===========================Test==============================
    print('test AUC: %f' % model.evaluate(test_X, test_y, batch_size=batch_size)[1])
    

Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 39)]         0                                            
__________________________________________________________________________________________________
tf_op_layer_strided_slice_78 (T [(None,)]            0           input_3[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_strided_slice_79 (T [(None,)]            0           input_3[0][0]                    
__________________________________________________________________________________________________
tf_op_layer_strided_slice_80 (T [(None,)]            0           input_3[0][0]                    
____________________________________________________________________________________________

                                                                 embedding_96[0][0]               
                                                                 embedding_97[0][0]               
                                                                 embedding_98[0][0]               
                                                                 embedding_99[0][0]               
                                                                 embedding_100[0][0]              
                                                                 embedding_101[0][0]              
                                                                 embedding_102[0][0]              
                                                                 embedding_103[0][0]              
                                                                 embedding_104[0][0]              
                                                                 embedding_105[0][0]              
          

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 2/10
Epoch 3/10
时间为:321
test AUC: 0.782312


In [6]:


file = '../data/Criteo/train.txt'
read_part = True
sample_num = 5000000
test_size = 0.2

embed_dim = 8
dnn_dropout = 0.5
hidden_units = [256, 128, 64]

learning_rate = 0.001
batch_size = 4096
epochs = 10 

# ========================== Create dataset =======================
feature_columns, train, test = create_criteo_dataset(file=file,
                                                     embed_dim=embed_dim,
                                                     read_part=read_part,
                                                     sample_num=sample_num,
                                                     test_size=test_size)