In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.layers import  *
from keras.models import  *


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
train  = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/val.csv')

In [3]:
embedding_list = list(train.columns)

In [4]:
embedding_list.remove('is_trade')


In [5]:
train_x = train[embedding_list]
test_x = test[embedding_list]

train_y = train['is_trade']
test_y = test['is_trade']

In [6]:
emb_size = 20

In [None]:
class CrossLayer(layers.Layer):
    """DCN's Cross Layer
    # Arguments
        num_layers: Positive integer, layer depth of CrossLayer.
        use_bias: Boolean, whether the layer uses a bias vector.
        kernel_initializer: Initializer for the `kernel` weights matrix
            (see [initializers](../initializers.md)).
        bias_initializer: Initializer for the bias vector
            (see [initializers](../initializers.md)).

    # Input shape
        a 3D input with shape `(batch_size, 1, input_dim)`.

    # Output shape
        2D tensor with shape: `(batch_size, input_dim)`.
    """

    def __init__(self, num_layers,
                 kernel_initializer='glorot_uniform',
                 bias_initializer='zeros',
                 **kwargs):
        super(CrossLayer, self).__init__(**kwargs)
        self.num_layer = num_layers
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        

    def build(self, input_shape):
        assert len(input_shape) == 3
        self.input_dim = input_shape[-1]
        self.W = []
        self.bias = []
        for i in range(self.num_layer):
            self.W.append(self.add_weight(shape = (1, self.input_dim), 
                                          initializer = self.kernel_initializer, 
                                          name = 'kernel_' + str(i), 
                                          trainable = True))
            
            self.bias.append(self.add_weight(shape = (1, self.input_dim), 
                                                 initializer = self.bias_initializer, 
                                                 name = 'bias_' + str(i), 
                                                 trainable = True))

        self.built = True

    def call(self, input):
        for i in range(self.num_layer):
            if i == 0:
                cross = Lambda(lambda x: Add()([K.sum(self.W[i] * 
                                                       K.batch_dot(K.reshape(x, (-1, self.input_dim, 1)), x), 
                                                        1, keepdims = True), 
                                              self.bias[i], 
                                              x]))(input)
            else:
                if self.use_bias:
                    cross = Lambda(lambda x: Add()([K.sum(self.W[i] * 
                                                           K.batch_dot(K.reshape(x, (-1, self.input_dim, 1)), input), 
                                                            1, keepdims = True), 
                                                  self.bias[i], 
                                                  x]))(cross)
                else:
                    cross = Lambda(lambda x: Add()([K.sum(self.W[i] * 
                                                       K.batch_dot(K.reshape(x, (-1, self.input_dim, 1)), input), 
                                                        1, keepdims = True),
                                                   x]))(cross)
                
        return Flatten()(cross)

    def compute_output_shape(self, input_shape):
        assert input_shape and len(input_shape) == 3
        assert input_shape[-1]
        output_shape = [input_shape[0], input_shape[2]]
        return tuple(output_shape)

## DCN model

In [None]:
class DCN():
    def __init__(self,embedding_list,emb_size,train_x,train_y,test_x,test_y):
        self.embedding_list = embedding_list
        self.emb_size = emb_size
        self.train_x = train_x
        self.train_y = train_y
        self.test_x  = test_x
        self.test_y = test_y
        self.mxlen_set = self.get_mxlen_set()
        
    def builtModel(self):
        
        emb_list =[]
        inp_list = []
        fm_list =[]
        product_list = []
                
        
        ### embedding part and fm part
        for feat in self.embedding_list:
            inp_temp = Input(shape=[1],name=feat)
            emb_temp = Flatten()(Embedding(self.mxlen_set[feat],self.emb_size)(inp_temp))
            fm_temp = Flatten()(Embedding(self.mxlen_set[feat],1)(inp_temp))
            inp_list.append(inp_temp)
            emb_list.append(emb_temp)
            fm_list.append(fm_temp)
        
         
        
        ## fm product part
            
        for i in range(0,len(emb_list)):
            for j in range(i+1,len(emb_list)):
                temp = dot([emb_list[i],emb_list[j]],axes=1)
                product_list.append(temp)
                        
        ## dnn part
        
        dnn_part = Dense(512,activation='relu')(concatenate(emb_list))
        
        ## fm_part
        fm_part = Dense(512,activation='relu')(concatenate(product_list+fm_list))
        
        inp = Dense(64,activation='relu')(concatenate([dnn_part,fm_part],axis=1))
        
        outp = Dense(1,activation='sigmoid')(inp)
        
        model = Model(inputs=inp_list,outputs=outp)
        
        model.compile(loss="binary_crossentropy", optimizer="adam",metrics=['accuracy'])
        
        return model
        
                    
    def get_mxlen_set(self):
        X = {}
        for ebd in self.embedding_list:
            X[ebd] = np.max([self.train_x[ebd].max(),self.test_x[ebd].max()])+1
        return X
            
    def get_kears_data(self,data):
        X = {}
        for ebd in self.embedding_list:
            X[ebd] = np.array(data[ebd])
        return X
    
    def train(self,batch_size,epochs):
        self.model = self.builtModel()
        X_train = self.get_kears_data(self.train_x)
        self.model.fit(X_train,self.train_y,batch_size=batch_size,epochs=epochs,verbose=10)
            
    def predict(self,batch_size):
        X_val = self.get_kears_data(self.test_x)
        pred = self.model.predict(X_val,batch_size=batch_size)[:,0]
        return pred
    