In [167]:
# https://www.jianshu.com/p/77719fc252fa
# https://blog.csdn.net/roguesir/article/details/79777635
# https://arxiv.org/abs/1708.05123 -Deep & Cross Network for Ad Click Predictions

In [3]:
import numpy as np
import pandas as pd

In [8]:
import tensorflow as tf
from time import time
from sklearn.base import BaseEstimator,TransformerMixin
from sklearn.metrics import roc_auc_score

In [79]:
from sklearn.model_selection import StratifiedKFold

In [9]:
# config.py
TRAIN_FILE = r"F:\Data\recsys-data\dcn\train.csv"
TEST_FILE = r"F:\Data\recsys-data\dcn\test.csv"

SUB_DIR = "output"


NUM_SPLITS = 3
RANDOM_SEED = 2017

# types of columns of the dataset dataframe
CATEGORICAL_COLS = [
    'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat',
    'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat',
    'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat',
    'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat',
    'ps_car_10_cat', 'ps_car_11_cat',
]

NUMERIC_COLS = [
    # # binary
    # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin",
    # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin",
    # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin",
    # "ps_ind_17_bin", "ps_ind_18_bin",
    # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
    # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin",
    # numeric
    "ps_reg_01", "ps_reg_02", "ps_reg_03",
    "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",

    # feature engineering
    "missing_feat", "ps_car_13_x_ps_reg_03",
]

IGNORE_COLS = [
    "id", "target",
    "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04",
    "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08",
    "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12",
    "ps_calc_13", "ps_calc_14",
    "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
    "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin"
]

In [25]:
class FeatureDictionary(object):
    def __init__(self,trainfile=None,testfile=None,numeric_cols = [],ignore_cols = [],cate_cols = []):
        self.trainfile = trainfile
        self.testfile = testfile
        self.cate_cols = cate_cols
        self.numeric_cols = numeric_cols
        self.ignore_cols = ignore_cols
        self.gen_feat_dict()
        # feat_dict 类别字典与索引，feat_dim类别维度
    def gen_feat_dict(self):
        df = pd.concat([self.trainfile,self.testfile])
        self.feat_dict = {}
        self.feat_len = {}
        tc = 0
        for col in df.columns:
            if col in self.ignore_cols or col in self.numeric_cols:
                continue
            else:
                # 获取每一列的类别树
                us = df[col].unique()
                # 获取每一列的类别对应的维度
                self.feat_dict[col] = dict(zip(us, range(tc, len(us) + tc)))
                tc += len(us)
        self.feat_dim = tc

In [59]:
class DataParser(object):
    def __init__(self,feat_dict):
        self.feat_dict = feat_dict
        
    def parse(self,infile=None,df=None,has_label=False):
        assert not ((infile is None) and (df is None))
        assert not ((infile is not None) and (df is not None))
        if infile is None:
            dfi = df.copy()
        else:
            dfi = pd.read_csv(infile)
        if has_label:
            y = dfi["target"].values.tolist()
            dfi.drop(['id','target'],axis = 1,inplace=True)
        else:
            ids = dfi['id'].values.tolist()
            dfi.drop(['id'],axis = 1,inplace = True)
        # 获取数值型特征 numeric_Xv
        numeric_Xv = dfi[self.feat_dict.numeric_cols].values.tolist()
        dfi.drop(self.feat_dict.numeric_cols,axis = 1,inplace=True)
        # 获取类别性特征 cate_Xi
        dfv = dfi.copy()
        for col in dfi.columns:
            if col in self.feat_dict.ignore_cols:
                dfi.drop(col,axis=1,inplace=True)
                dfv.drop(col,axis=1,inplace=True)
                continue
            else:
                #  list of list of feature indices of each sample in the dataset
                dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col])
                dfv[col] = 1.0
        #  list of list of feature indices of each sample in the dataset
        cate_Xi = dfi.values.tolist()
        # 获取除了忽略特征的其他特征值对应的值
        cate_Xv = dfv.values.tolist()
        if has_label:
            return cate_Xi,cate_Xv,numeric_Xv,y
        else:
            return cate_Xi,cate_Xv,numeric_Xv
    

## model 

# 模型输入

In [159]:
class DCN(BaseEstimator,TransformerMixin):
    def __init__(self, cate_feature_size,field_size, numeric_feature_size,
                embedding_size=8,
                deep_layers=[32, 32],dropout_deep=[0.5, 0.5, 0.5],
                deep_layers_activation=tf.nn.relu,
                epoch=10,batch_size=256,
                learning_rate=0.001,optimizer_type="adam",
                batch_norm=0,batch_norm_decay=0.995,
                verbose=False,random_seed=2016,
                loss_type='logloss',eval_metric=roc_auc_score,
                l2_reg=0.0,greater_is_better=True,cross_layer_num=3):
        assert loss_type in ['logloss','mse'] # 分类用logloss，预测用mse
        self.cate_feature_size = cate_feature_size
        self.numeric_feature_size = numeric_feature_size
        self.field_size = field_size
        self.embedding_size = embedding_size
        self.total_size = self.field_size * self.embedding_size +self.numeric_feature_size
        self.deep_layers = deep_layers
        self.cross_layer_num = cross_layer_num
        self.dropout_dep = dropout_deep
        self.deep_layers_activation =deep_layers_activation
        self.l2_reg = l2_reg
        
        self.epoch = epoch
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.optimizer_type = optimizer_type

        self.batch_norm = batch_norm
        self.batch_norm_decay = batch_norm_decay

        self.verbose = verbose
        self.random_seed = random_seed
        self.loss_type = loss_type
        self.eval_metric = eval_metric
        self.greater_is_better = greater_is_better
        
        self.train_result,self.valid_result = [],[]
        self._init_graph()
    
    def _init_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            tf.set_random_seed(self.random_seed)
            
        self.feat_index = tf.placeholder(tf.int32,shape=[None,None],name='feat_index')
        self.feat_value = tf.placeholder(tf.float32,shape=[None,None],name='feat_value')

        self.numeric_value = tf.placeholder(tf.float32,shape=[None,None],name='num_value')

        self.label = tf.placeholder(tf.float32,shape=[None,1],name='label')
        self.dropout_keep_deep = tf.placeholder(tf.float32,shape=[None],name='dropout_keep_deep')
        # 输入有五部分，feat_index是离散特征
        self.train_phase = tf.placeholder(tf.bool,name='train_pahse')
        # 初始化参数
        self.weights = self._initialize_weights()
        
        #model 将所有的category特征的全部组成字典，不区分每一列特征的具体类型，都当做字典中的一个字。
        # 这里采用 embedding_lookup的方式处理category feature. cate_dim * embeddings_size=> field_size * emebdings_size 对应公式（1）
        self.embeddings = tf.nn.embedding_lookup(self.weights['feature_embeddings'],self.feat_index) # N* F* K 提取特征索引对应的参数
        feat_value = tf.reshape(self.feat_value,shape=[-1,self.field_size,1]) # field_size*1
        self.embeddings = tf.multiply(self.embeddings,feat_value)  # field_size * emebddings_size 
        # numeric_value 此处将embeddings的维度进行了延展对应公式（2）
        self.x0 = tf.concat([self.numeric_value,tf.reshape(self.embeddings,shape=[-1,self.field_size*self.embedding_size])],axis=1)
        
        # deep part  
        # dropout_keep_deep表示随机概率的列表
        self.y_deep = tf.nn.dropout(self.x0,self.dropout_keep_deep[0])
        # 全连接层 deep_layers代表2个全连接层，每层32维度[32,32]
        for i in range(0,len(self.deep_layers)):
            # 对应公式（4）
            self.y_deep = tf.add(tf.matmul(self.y_deep,self.weights['deep_layer_%d'%i]),self.weights['deep_bias_%d'%i])
            self.y_deep = self.deep_layers_activation(self.y_deep)
            self.y_deep = tf.nn.dropout(self.y_deep,self.dropout_keep_deep[i+1])
            
        # cross part
        # total_size表示输入的总长度
        self._x0 = tf.reshape(self.x0, (-1,self.total_size,1))
        x_1 = self._x0
        
        for l in range(self.cross_layer_num):
            # 对应公式（3）
            x_1 = tf.tensordot(tf.matmul(self._x0, x_1,transpose_b=True),
                               self.weights['cross_layer_%d'%l],1) + self.weights['cross_bias_%d'%l] +x_1
            # 
        self.cross_network_out = tf.reshape(x_1, (-1,self.total_size))
        # concat part 
        # 对应公式（5）
        concat_input = tf.concat([self.cross_network_out,self.y_deep],axis=1)
        self.out = tf.add(tf.matmul(concat_input,self.weights['concat_projection']),self.weights['concat_bias'])
        
        # loss  
        if self.loss_type == 'logloss':
            self.out = tf.nn.sigmoid(self.out)
            self.loss = tf.losses.log_loss(self.label, self.out)
        elif self.loss_type == 'mse':
            self.loss = tf.nn.l2_loss(tf.subtact(self.label,self.out))
            
        # l1 regularization on weights
        if self.l2_reg > 0:
            self.loss += tf.contrib.layers.l2_regularizer(
            self.l2_reg)(self.weights['concat_projection'])
            for i in range(len(self.deep_layers)):
                self.loss += tf.contrib.layers.l2_regularizer(
                self.l2_reg)(self.weights['deep_layer_%d'%i])
            for i in range(self.cross_layer_num):
                self.loss += tf.contrib.layers.l2_regularizer(
                self.l2_reg)(self.weights['cross_layer_%d'%i])
        
        if self.optimizer_type == 'adam':
            self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate,beta1=0.9,beta2=0.999,epsilon=1e-8).minimize(self.loss)
        if self.optimizer_type == 'adagrad':
            self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate,initial_accumulator_value=1e-8).minimize(self.loss)
        if self.optimizer_type == 'gd':
            self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
        if self.optimizer_type == 'momentum':
            self.optimizer = tf.train.MomentOptimizer(learning_rate=self.learning_rate,momentum=0.95).minimize(self.loss)
        
        # init 
        self.saver = tf.train.Saver()
        init = tf.global_variables_initializer()
        self.sess = tf.Session()
        self.sess.run(init)
        
        # number of params
        total_parameters = 0
        for variable in self.weights.values():
            shape = variable.get_shape()
            print("shape:",shape)
            variable_parameters = 1
            for dim in shape:
                variable_parameters *= dim.value
            total_parameters += variable_parameters
        if self.verbose > 0:
            print("#params:%d" % total_parameters)
    def _initialize_weights(self):
        weights = dict()
        
        # embenddings cate_feature_size为category词典个数，embeddingsize是embedding后的维度
        weights['feature_embeddings'] = tf.Variable(
        tf.random_normal([self.cate_feature_size,self.embedding_size],0.0,0.01),name='feature_embeddings')
        weights['feature_bias'] = tf.Variable(tf.random_normal([self.cate_feature_size,1],0.0,0.01,name='feature_bias'))
        
        # deep layers
        num_layer = len(self.deep_layers)
        glorot = np.sqrt(2.0/(self.total_size + self.deep_layers[0]))
        
        weights['deep_layer_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.total_size,self.deep_layers[0])),dtype=np.float32)
        
        weights['deep_bias_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(1,self.deep_layers[0])),dtype=np.float32)
        
        for i in range(1,num_layer):
            glorot = np.sqrt(2.0/(self.deep_layers[i-1] + self.deep_layers[i]))
            # layers[i-1] * layers[i]
            weights['deep_layer_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.deep_layers[i-1],self.deep_layers[i])),dtype=np.float32)
            # 1* layer[i]
            weights['deep_bias_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(1,self.deep_layers[i])),dtype=np.float32)
        
        # cross layers
        for i in range(self.cross_layer_num):
            weights['cross_layer_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.total_size,1)),dtype=np.float32)
            # 1* layer[i]
            weights['cross_bias_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.total_size,1)),dtype=np.float32)
        
        # final concat projection layer
        input_size = self.total_size + self.deep_layers[-1]
        
        glorot = np.sqrt(2.0/(input_size + 1))
        weights['concat_projection'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(input_size,1)),dtype=np.float32)
        weights['concat_bias'] = tf.Variable(tf.constant(0.01),dtype=np.float32)
        
        return weights
    # 获取batch的数据
    def get_batch(self,Xi,Xv,Xv2,y,batch_size,index):
        start = index * batch_size
        end = (index+1)* batch_size
        end =end if end < len(y) else len(y)
        return Xi[start:end],Xv[start:end],Xv2[start:end],[[y_] for y_ in y[start:end]]
    
    # shuffle three lists simutaneously
    def shuffle_in_unison_scary(self,a,b,c,d):
        rng_state = np.random.get_state()
        np.random.shuffle(a)
        np.random.set_state(rng_state)
        np.random.shuffle(b)
        np.random.set_state(rng_state)
        np.random.shuffle(c)
        np.random.set_state(rng_state)
        np.random.shuffle(d)
        np.random.set_state(rng_state)
    
    def predict(self,Xi,Xv,Xv2,y):
        """
        :param Xi: list of list of feature indices of each sample in the dataset
        :param Xv: list of list of feature values of each sample in the dataset
        :return: predicted probability of each sample
        """
        
        feed_dict = {self.feat_index: Xi,
                     self.feat_value: Xv,
                     self.numeric_value: Xv2,
                     self.label: y,
                     self.dropout_keep_deep: [1.0] * len(self.dropout_dep),
                     self.train_phase: True}
        loss = self.sess.run([self.loss],feed_dict=feed_dict)
        
        return loss
    
    # 在每个batch上训练
    def fit_on_batch(self,Xi,Xv,Xv2,y):
        feed_dict = {self.feat_index:Xi,
                     self.feat_value:Xv,
                     self.numeric_value:Xv2,
                     self.label:y,
                     self.dropout_keep_deep:self.dropout_dep,
                     self.train_phase:True}

        loss,opt = self.sess.run([self.loss,self.optimizer],feed_dict=feed_dict)

        return loss
    
    def fit(self,cate_Xi_train,cate_Xv_train,numeric_Xv_train,y_train,cate_Xi_valid=None,cate_Xv_valid=None,numeric_Xv_valid=None,y_valid=None,early_stopping=False,refit=False):
        """
        :param Xi_train: [[ind1_1, ind1_2, ...], [ind2_1, ind2_2, ...], ..., [indi_1, indi_2, ..., indi_j, ...], ...]
                         indi_j is the feature index of feature field j of sample i in the training set
        :param Xv_train: [[val1_1, val1_2, ...], [val2_1, val2_2, ...], ..., [vali_1, vali_2, ..., vali_j, ...], ...]
                         vali_j is the feature value of feature field j of sample i in the training set
                         vali_j can be either binary (1/0, for binary/categorical features) or float (e.g., 10.24, for numerical features)
        :param y_train: label of each sample in the training set
        :param Xi_valid: list of list of feature indices of each sample in the validation set
        :param Xv_valid: list of list of feature values of each sample in the validation set
        :param y_valid: label of each sample in the validation set
        :param early_stopping: perform early stopping or not
        :param refit: refit the model on the train+valid dataset or not
        :return: None
        """
        print(len(cate_Xi_train))
        print(len(cate_Xv_train))
        print(len(numeric_Xv_train))
        print(len(y_train))
        
        has_valid = cate_Xv_valid is not None
        for epoch in range(self.epoch):
            t1 = time()
            self.shuffle_in_unison_scary(cate_Xi_train,cate_Xv_train,numeric_Xv_train,y_train)
            total_batch = int(len(y_train)/self.batch_size)
            for i in range(total_batch):
                cate_Xi_batch, cate_Xv_batch,numeric_Xv_batch, y_batch = self.get_batch(cate_Xi_train, cate_Xv_train, numeric_Xv_train,y_train, self.batch_size, i)
                
                self.fit_on_batch(cate_Xi_batch, cate_Xv_batch,numeric_Xv_batch, y_batch)
                
            if has_valid:
                y_valid = np.array(y_valid).reshape((-1,1))
                loss = self.predict(cate_Xi_valid, cate_Xv_valid, numeric_Xv_valid, y_valid)
                print("epoch",epoch,"loss",loss)

# 数据处理

In [33]:
def load_data():
    dfTrain = pd.read_csv(TRAIN_FILE)
    dfTest = pd.read_csv(TEST_FILE)
    
    def preprocess(df):
        cols = [c for c in df.columns if c not in ['id','target']]
        df['missing_feat'] = np.sum((df[cols]==-1).values,axis=1)
        df['ps_car_13_x_ps_reg_03'] = df['ps_car_13'] * df['ps_reg_03']
        return df
    dfTrain = preprocess(dfTrain)
    dfTest = preprocess(dfTest)
    
    cols = [c for c in dfTrain.columns if c not in ['id','target']]
    cols = [c for c in cols if (not c in IGNORE_COLS)]
    
    X_train = dfTrain[cols].values
    y_train = dfTrain['target'].values
    
    X_test = dfTest[cols].values
    ids_test = dfTest['id'].values
    
    return dfTrain, dfTest,X_train,y_train,X_test,ids_test

In [34]:
dfTrain, dfTest,X_train,y_train,X_test,ids_test = load_data()

In [35]:
dfTrain.columns

Index(['id', 'target', 'ps_ind_01', 'ps_ind_02_cat', 'ps_ind_03',
       'ps_ind_04_cat', 'ps_ind_05_cat', 'ps_ind_06_bin', 'ps_ind_07_bin',
       'ps_ind_08_bin', 'ps_ind_09_bin', 'ps_ind_10_bin', 'ps_ind_11_bin',
       'ps_ind_12_bin', 'ps_ind_13_bin', 'ps_ind_14', 'ps_ind_15',
       'ps_ind_16_bin', 'ps_ind_17_bin', 'ps_ind_18_bin', 'ps_reg_01',
       'ps_reg_02', 'ps_reg_03', 'ps_car_01_cat', 'ps_car_02_cat',
       'ps_car_03_cat', 'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat',
       'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat', 'ps_car_10_cat',
       'ps_car_11_cat', 'ps_car_11', 'ps_car_12', 'ps_car_13', 'ps_car_14',
       'ps_car_15', 'ps_calc_01', 'ps_calc_02', 'ps_calc_03', 'ps_calc_04',
       'ps_calc_05', 'ps_calc_06', 'ps_calc_07', 'ps_calc_08', 'ps_calc_09',
       'ps_calc_10', 'ps_calc_11', 'ps_calc_12', 'ps_calc_13', 'ps_calc_14',
       'ps_calc_15_bin', 'ps_calc_16_bin', 'ps_calc_17_bin', 'ps_calc_18_bin',
       'ps_calc_19_bin', 'ps_calc_20_bin', '

In [15]:
X_train.shape

(10000, 39)

In [26]:
fd = FeatureDictionary(dfTrain,dfTest,numeric_cols = NUMERIC_COLS,ignore_cols=IGNORE_COLS,cate_cols=CATEGORICAL_COLS)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=True'.


  # This is added back by InteractiveShellApp.init_path()


In [40]:
data_parser = DataParser(feat_dict=fd)

In [55]:
fd.feat_dict

{'ps_car_01_cat': {10: 0,
  11: 1,
  7: 2,
  6: 3,
  9: 4,
  5: 5,
  4: 6,
  8: 7,
  3: 8,
  0: 9,
  2: 10,
  1: 11,
  -1: 12},
 'ps_car_02_cat': {1: 13, 0: 14},
 'ps_car_03_cat': {-1: 15, 0: 16, 1: 17},
 'ps_car_04_cat': {0: 18,
  1: 19,
  8: 20,
  9: 21,
  2: 22,
  6: 23,
  3: 24,
  7: 25,
  4: 26,
  5: 27},
 'ps_car_05_cat': {1: 28, -1: 29, 0: 30},
 'ps_car_06_cat': {4: 31,
  11: 32,
  14: 33,
  13: 34,
  6: 35,
  15: 36,
  3: 37,
  0: 38,
  1: 39,
  10: 40,
  12: 41,
  9: 42,
  17: 43,
  7: 44,
  8: 45,
  5: 46,
  2: 47,
  16: 48},
 'ps_car_07_cat': {1: 49, -1: 50, 0: 51},
 'ps_car_08_cat': {0: 52, 1: 53},
 'ps_car_09_cat': {0: 54, 2: 55, 3: 56, 1: 57, -1: 58, 4: 59},
 'ps_car_10_cat': {1: 60, 0: 61, 2: 62},
 'ps_car_11': {2: 63, 3: 64, 1: 65, 0: 66},
 'ps_car_11_cat': {12: 67,
  19: 68,
  60: 69,
  104: 70,
  82: 71,
  99: 72,
  30: 73,
  68: 74,
  20: 75,
  36: 76,
  101: 77,
  103: 78,
  41: 79,
  59: 80,
  43: 81,
  64: 82,
  29: 83,
  95: 84,
  24: 85,
  5: 86,
  28: 87,
  87:

In [100]:
cate_Xi_train, cate_Xv_train, numeric_Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True)

In [101]:
cate_Xi_test, cate_Xv_test, numeric_Xv_test= data_parser.parse(df=dfTest)

In [102]:
dfTrain[[col for col in dfTrain.columns if (col not in IGNORE_COLS) and (col not in NUMERIC_COLS)]].head()

Unnamed: 0,ps_ind_01,ps_ind_02_cat,ps_ind_03,ps_ind_04_cat,ps_ind_05_cat,ps_ind_06_bin,ps_ind_07_bin,ps_ind_08_bin,ps_ind_09_bin,ps_ind_10_bin,...,ps_car_03_cat,ps_car_04_cat,ps_car_05_cat,ps_car_06_cat,ps_car_07_cat,ps_car_08_cat,ps_car_09_cat,ps_car_10_cat,ps_car_11_cat,ps_car_11
0,2,2,5,1,0,0,1,0,0,0,...,-1,0,1,4,1,0,0,1,12,2
1,1,1,7,0,0,0,0,1,0,0,...,-1,0,-1,11,1,1,2,1,19,3
2,5,4,9,1,0,0,0,1,0,0,...,-1,0,-1,14,1,1,2,1,60,1
3,0,1,2,0,0,1,0,0,0,0,...,0,0,1,11,1,1,3,1,104,1
4,0,2,0,1,0,1,0,0,0,0,...,-1,0,-1,14,1,1,2,1,82,3


In [166]:
len(cate_Xi_train[0])

30

In [165]:
fd.feat_dim

12056

In [71]:
dcn_params = {
    "embedding_size": 8,
    "deep_layers": [32, 32],
    "dropout_deep": [0.5, 0.5, 0.5],
    "deep_layers_activation": tf.nn.relu,
    "epoch": 30,
    "batch_size": 1024,
    "learning_rate": 0.001,
    "optimizer_type": "adam",
    "batch_norm": 1,
    "batch_norm_decay": 0.995,
    "l2_reg": 0.01,
    "verbose": True,
    "random_seed": RANDOM_SEED,
    "cross_layer_num":3
}

In [164]:
dcn_params['cate_feature_size'] = fd.feat_dim # category feature的中不同类别的总个数，相当于词字典的词数量。
dcn_params['field_size'] = len(cate_Xi_train[0]) # field为有多少个category feature 每一列feature当做一个field。相当于有30个field的12056词。
dcn_params['numeric_feature_size'] = len(NUMERIC_COLS) # numeric feature的个数

# 获取验证集与训练集

In [76]:
# 定义匿名函数，目的获取已知索引的对应特征，获取训练集与验证集
_get = lambda x, l:[x[i] for i in l]

In [80]:
folds = list(StratifiedKFold(n_splits=NUM_SPLITS, shuffle=True,random_state=RANDOM_SEED).split(X_train, y_train))

In [160]:
for i, (train_idx, valid_idx) in enumerate(folds):
    cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_, y_train_ = _get(cate_Xi_train,train_idx), _get(cate_Xv_train,train_idx),_get(numeric_Xv_train,train_idx), _get(y_train,train_idx)
    cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_, y_valid_ = _get(cate_Xi_train,valid_idx), _get(cate_Xv_train,valid_idx),_get(numeric_Xv_train,valid_idx), _get(y_train,valid_idx)
    
    dcn =  DCN(**dcn_params)
    dcn.fit(cate_Xi_train_, cate_Xv_train_, numeric_Xv_train_,y_train_, cate_Xi_valid_, cate_Xv_valid_, numeric_Xv_valid_,y_valid_)

#params:119336
6666
6666
6666
6666
epoch 0 loss [0.7339406]
epoch 1 loss [0.689344]
epoch 2 loss [0.6579944]
epoch 3 loss [0.6139721]
epoch 4 loss [0.57330567]
epoch 5 loss [0.5425892]
epoch 6 loss [0.51661056]
epoch 7 loss [0.49185112]
epoch 8 loss [0.46937397]
epoch 9 loss [0.44965577]
epoch 10 loss [0.43133047]
epoch 11 loss [0.41413397]
epoch 12 loss [0.39858332]
epoch 13 loss [0.38438743]
epoch 14 loss [0.37130785]
epoch 15 loss [0.3591925]
epoch 16 loss [0.3479624]
epoch 17 loss [0.33759972]
epoch 18 loss [0.32794872]
epoch 19 loss [0.31895694]
epoch 20 loss [0.31060877]
epoch 21 loss [0.30281362]
epoch 22 loss [0.2955329]
epoch 23 loss [0.28878072]
epoch 24 loss [0.28245845]
epoch 25 loss [0.2765365]
epoch 26 loss [0.27101922]
epoch 27 loss [0.26581767]
epoch 28 loss [0.2610159]
epoch 29 loss [0.2565304]
#params:119336
6667
6667
6667
6667
epoch 0 loss [1.0582225]
epoch 1 loss [0.7997861]
epoch 2 loss [0.66108996]
epoch 3 loss [0.5902871]
epoch 4 loss [0.55408746]
epoch 5 loss [0