In [29]:
import tensorflow as tf
from tensorflow import keras
import numpy
from tensorflow.keras import layers
#from pyspark.sql import SQLContext
#from pyspark import SparkCoeeeetext
from pyspark.sql import SparkSession
import pyspark.sql.functions as f
import pandas as pd
import numpy as np
import time

In [30]:
print(tf.__version__)
print("GPU Available: ", tf.test.is_gpu_available())


2.0.0
GPU Available:  True


In [31]:
a = tf.convert_to_tensor([0.1, 0.2, 0.7])
b = tf.convert_to_tensor([[1.,2.,3.], [2.,3.,4.], [3., 5., 4.]])
a * b

<tf.Tensor: id=5, shape=(3, 3), dtype=float32, numpy=
array([[0.1, 0.4, 2.1],
       [0.2, 0.6, 2.8],
       [0.3, 1. , 2.8]], dtype=float32)>

# 1. Model
## (1) AUGRU:

$$
u_t = \sigma (W^{\mu}i_{t} + U^{\mu}h_{t-1}+b^{\mu})
$$


$$
\tilde{u_{t}'} = a_{t} \ast u_{t}'
$$

$$
r_{t} = \sigma (W^{r}i_{t} + U^{r}h_{t-1}+b^{r})
$$


$$
\tilde{h_t} = tanh(W^{h}i_{t} + r_{t} \circ U^{h}h_{t-1}+b^{h})
$$


$$
h_{t}' = (1-\tilde{u_{t}'}) \circ h_{t-1}' + \tilde{u_{t}'} \circ \tilde{h_t'}
$$

In [32]:
class GRU_GATES(tf.keras.layers.Layer):
    def __init__(self, units):
        super(GRU_GATES, self).__init__()
        self.linear_act = layers.Dense(units, activation=None, use_bias=True)
        self.linear_noact = layers.Dense(units, activation=None, use_bias=False)

    def call(self, a, b, gate_b=None):
        if gate_b is None:
            return tf.keras.activations.sigmoid(self.linear_act(a) + self.linear_noact(b))
        else:
            return tf.keras.activations.tanh(self.linear_act(a) + tf.math.multiply(gate_b, self.linear_noact(b)))

In [33]:
class AUGRU(layers.Layer):
    def __init__(self, units):
        super(AUGRU, self).__init__()
        self.u_gate = GRU_GATES(units)
        self.r_gate = GRU_GATES(units)
        self.c_memo = GRU_GATES(units)

    def call(self, inputs, state, att_score):
        u = self.u_gate(inputs, state) #u_t
        r = self.r_gate(inputs, state) #r_t
        c = self.c_memo(inputs, state, r) #\tilde{h_t}
        u_= att_score * u #\tilde{u_{t}'} [AUGRU Add]
        state_next = (1 - u_) * state + u_ * c #h_t [AUGRU change u_t on output]
        return state_next

## (2) GRU Self_Defination

In [34]:
class GRU(layers.Layer):
    def __init__(self, units):
        super(GRU, self).__init__()
        self.u_gate = GRU_GATES(units)
        self.r_gate = GRU_GATES(units)
        self.c_memo = GRU_GATES(units)
    
    def call(self, inputs, state):
        u = self.u_gate(inputs, state)
        r = self.r_gate(inputs, state)
        c = self.c_memo(inputs, state, r)
        state_next = (1 - u) * state + u * c
        return state_next 

## (3) Dice Activation Function

In [35]:
class Dice(tf.keras.layers.Layer):
    def __init__(self):
        super(Dice, self).__init__()
        self.bn = tf.keras.layers.BatchNormalization(center=False, scale=False)
        self.alpha = self.add_weight(shape=(), dtype=tf.float32, name='alpha')

    def call(self, x):
        x_normed = self.bn(x)
        x_p = tf.sigmoid(x_normed)
        return self.alpha * (1.0 - x_p) * x + x_p * x

## (4) Attention Layer

In [36]:
class AttentionLayer(layers.Layer):
    def __init__(self, attention_size, drop_rate):
        super().__init__()
        self.attention_size = attention_size
        self.dropout = Dropout(drop_rate, name = "rnn_attention_dropout")

    def build(self, input_shape):
        self.attention_w = self.add_weight(name = "atten_w", shape = (input_shape[-1], self.attention_size), initializer = tf.random_uniform_initializer(), dtype = "float32", trainable = True)
        self.attention_u = self.add_weight(name = "atten_u", shape = (self.attention_size,), initializer = tf.random_uniform_initializer(), dtype = "float32", trainable = True)
        self.attention_b = self.add_weight(name = "atten_b", shape = (self.attention_size,), initializer = tf.constant_initializer(0.1), dtype = "float32", trainable = True)    
        super().build(input_shape)

    def call(self, inputs, training):
        x = tf.tanh(tf.add(tf.tensordot(inputs, self.attention_w, axes = 1), self.attention_b))
        x = tf.tensordot(x, self.attention_u, axes = 1)
        x = tf.nn.softmax(x)
        weight_out = tf.multiply(tf.expand_dims(x, -1), inputs)
        final_out = tf.reduce_sum(weight_out, axis = 1) 
        drop_out = self.dropout(final_out, training = training)
        return drop_out


## (5) DIEN

In [37]:
class DIEN(tf.keras.Model):
    def __init__(self, cate_dim, cate_count, brand_dim, brand_count, cms_segid_dim, cms_segid_count, cms_group_dim, cms_group_count, gender_dim, gender_count, age_dim, age_count, pvalue_dim, pvalue_count, shopping_dim, shopping_count, occupation_dim, occupation_count, user_class_level_count, user_class_level_dim, bias_length, activation="Dice"):
        """
        Layers Init
        """
        super(DIEN, self).__init__()
        ####Init Embedding Layer Variables
        #cate_id, brand_id
        self.cate_dim = cate_dim #cate_id embedding output length
        self.brand_dim = brand_dim #user_id embedding output lengthlength)
        #user_profile embedding features
        self.cms_segid_dim = cms_segid_dim
        self.cms_group_dim = cms_group_dim
        self.gender_dim = gender_dim
        self.age_dim = age_dim
        self.pvalue_dim = pvalue_dim
        self.shopping_dim = shopping_dim
        self.occupation_dim = occupation_dim
        self.user_class_level_dim = user_class_level_dim
        
        ####Init Embedding layers
        #(input dim:词长, output dim:输出节点数)
        self.cate_emb = layers.Embedding(cate_count, cate_dim)
        self.brand_emb = layers.Embedding(brand_count, brand_dim)
        self.cms_segid_emb = layers.Embedding(cms_segid_count, cms_segid_dim)
        self.cms_group_emb = layers.Embedding(cms_group_count, cms_group_dim)
        self.gender_emb = layers.Embedding(gender_count, gender_dim)
        self.age_emb = layers.Embedding(age_count, age_dim)
        self.pvalue_emb = layers.Embedding(pvalue_count, pvalue_dim)
        self.shopping_emb = layers.Embedding(shopping_count, shopping_dim)
        self.occupation_emb = layers.Embedding(occupation_count, occupation_dim)
        self.user_class_level_emb = layers.Embedding(user_class_level_count, user_class_level_dim)

        ####Init GRU and AUGRU
        self.hist_gru = layers.GRU(brand_dim + cate_dim, return_sequences=True)
        self.hist_augru = AUGRU(brand_dim + cate_dim)

        ####Init Attention Layer
        #self.attention = AttentionLayer(10, 0.1)
        #self.attention = layers.Attention()([self.hist_gru, self.hist_gru])
        
        ####build fully connection layer
        self.item_bias= tf.Variable(tf.zeros([bias_length]), trainable=True)
        self.fc = tf.keras.Sequential()
        self.fc.add(layers.BatchNormalization())
        self.fc.add(layers.Dense(200, activation="sigmoid"))
        if activation == "Dice":
            self.fc.add(Dice())
        elif activation == "PReLU":
            self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None))
        self.fc.add(layers.Dense(80, activation="sigmoid"))
        if activation == "Dice":
            self.fc.add(Dice()) 
        elif activation == "PReLU":
            self.fc.add(layers.PReLU(alpha_initializer='zeros', weights=None))
        self.fc.add(layers.Dense(2, activation=None))

    def get_emb(self, cate, brand, cate_hist_list, brand_hist_list, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level):
        """
        Define Embedding Layer
        """
        cate_emb = self.cate_emb(cate)
        brand_emb = self.brand_emb(brand)
        target_item_emb = tf.concat([cate_emb, brand_emb], -1)
        cate_hist_list_emb = self.cate_emb(cate_hist_list)
        brand_hist_list_emb = self.brand_emb(brand_hist_list)
        history_list_emb = tf.concat([cate_hist_list_emb, brand_hist_list_emb], -1)
        cms_segid_emb = self.cms_segid_emb(cms_segid)
        cms_group_emb = self.cms_group_emb(cms_group)
        gender_emb = self.gender_emb(gender)
        age_emb = self.age_emb(age)
        pvalue_emb = self.pvalue_emb(pvalue)
        shopping_emb = self.shopping_emb(shopping)
        occupation_emb = self.occupation_emb(occupation)
        user_class_level_emb = self.user_class_level_emb(user_class_level)
        item_bias= tf.gather(self.item_bias, cate)
        return target_item_emb, history_list_emb, cms_segid_emb, cms_group_emb, gender_emb, age_emb, pvalue_emb, shopping_emb, occupation_emb, user_class_level_emb, item_bias

    def auxiliary_loss(self, hidden_states, embedding_out):
        """
        Auxiliary Loss Function
        """
        x = tf.concat([hidden_states, embedding_out], -1)  
        return tf.reduce_mean(tf.math.log(tf.math.sigmoid(x)) + tf.math.log(1.0 - tf.math.sigmoid(x)))

    def call(self, cate, brand, cate_hist_list, brand_hist_list, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level):
        """
        Init DIEN Structure
        """
        ##########Embedding Layer########################
        ## hist_join_emb: user behavior sequence  embedding in paper
        ## user_emb: user profile embedding in paper
        ## item_join_emb: target_item embedding in paper
        #################################################
        target_item_emb, history_list_emb, cms_segid_emb, cms_group_emb, gender_emb, age_emb, pvalue_emb, shopping_emb, occupation_emb, user_class_level_emb, item_bias = self.get_emb(cate, brand, cate_hist_list, brand_hist_list, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level)

        ##########GRU Layer#########################
        ## GRU
        hist_gru_emb = self.hist_gru(history_list_emb)
        ## Auxiliary Loss
        aux_loss = self.auxiliary_loss(hist_gru_emb[:, :-1, :], history_list_emb[:, 1:, :])

        #######Attention Layer#################
        ## hist_mask = tf.sequence_mask(length, max(length), dtype = tf.bool)
        ## hist_mask = tf.tile(tf.expand_dims(hist_mask, -1), (1, 1, self.item_dim + self.cate_dim))
        #expand_target_item = tf.tile(tf.expand_dims(target_item_emb, axis=1), multiples=[1,hist_gru_emb.numpy().shape[1],1])
        #hist_att_input = tf.concat([hist_gru_emb, expand_target_item], axis=-1)
        hist_attn = layers.Attention()([hist_gru_emb, target_item_emb])

        #######AUGRU Layer#####################
        augru_hidden_state = tf.zeros_like(hist_gru_emb[:, 0, :])
        for in_emb, in_att in zip(tf.transpose(hist_gru_emb, [1, 0, 2]), tf.transpose(hist_attn, [1, 0, 2])):
            augru_hidden_state = self.hist_augru(in_emb, augru_hidden_state, in_att)
        
        ######Fully Connection Layer###########
        join_emb = tf.concat([cms_segid_emb, cms_group_emb, gender_emb,  age_emb, pvalue_emb, shopping_emb, occupation_emb, user_class_level_emb, augru_hidden_state], -1) 
        #concat& flatten
        #output = tf.squeeze(self.fc(join_emb)) + self.item_bias
        output = self.fc(join_emb)
        #print("output:" + str(output))
        
        ######Softmax Get Finaly Result########
        logit = tf.keras.activations.softmax(output)
        return aux_loss, output, logit

# 3. Model Train
## (1) Data Process

In [38]:
file_path = "/nfs/project/boweihan_2/DIEN/dien_tf2/"
def load_sql_file(sql_file):
    with open(sql_file, 'r') as isf:
        sql_txt = isf.readlines()
        return "".join(sql_txt)

In [39]:
def get_data(file_name):
    sql = load_sql_file(file_path + file_name)
    print(sql)
    rst = spark.sql(sql)
    return rst

In [40]:
train_data = get_data("get_train_data.sql")
print(train_data.dtypes)
train_data.show()

select
    *
from
    stg_gs.guide_dien_final_train_data
[('user', 'int'), ('adgroup_id', 'int'), ('time_stamp', 'int'), ('pid', 'string'), ('nonclk', 'int'), ('clk', 'int'), ('userid', 'int'), ('cms_segid', 'int'), ('cms_group_id', 'int'), ('final_gender_code', 'int'), ('age_level', 'int'), ('pvalue_level', 'int'), ('shopping_level', 'int'), ('occupation', 'int'), ('new_user_class_level', 'int'), ('cate_id', 'int'), ('campaign_id', 'int'), ('customer', 'int'), ('brand', 'string'), ('price', 'double'), ('show_cate', 'array<int>'), ('show_brand', 'array<int>'), ('show_rk', 'array<int>'), ('click_cate', 'array<int>'), ('click_brand', 'array<int>'), ('click_rk', 'array<int>')]
+----+----------+----------+-----------+------+---+------+---------+------------+-----------------+---------+------------+--------------+----------+--------------------+-------+-----------+--------+------+------+--------------------+--------------------+--------------------+--------------------+--------------------+

In [41]:
behavior_data = get_data("get_behavior_data.sql")
print(behavior_data.dtypes)
behavior_data.show()

select
    *
from
    stg_gs.guide_dien_behavior_list
[('user', 'int'), ('time_stamp', 'int'), ('btag', 'string'), ('cate', 'int'), ('brand', 'int'), ('RK', 'int')]
+----+----------+----+----+------+---+
|user|time_stamp|btag|cate| brand| RK|
+----+----------+----+----+------+---+
|  65|1492900231|  pv|6423|202844|  1|
|  65|1493001655|  pv|1535|  1933|  2|
|  65|1493001696|  pv|5144|221012|  3|
|  65|1493001718|  pv|5144|221012|  4|
|  65|1493001718|  pv|5144|221012|  4|
|  65|1493163026|  pv|4384| 83700|  6|
|  65|1493163115|  pv|4384|268509|  7|
|  65|1493163153|cart|4384|268509|  8|
|  65|1493163157|  pv|4384|268509|  9|
|  65|1493284569|  pv| 859|102030| 10|
|  65|1493337810|  pv|4384|268509| 11|
|  65|1493337870|  pv|4384|268509| 12|
|  65|1493383822|  pv|8233|211132| 13|
|  65|1493383863|  pv|8233|211132| 14|
|  65|1493383881|  pv|8233|211132| 15|
|  65|1493383893|  pv|8233|211132| 16|
|  65|1493383931|  pv|8233|211132| 17|
|  65|1493383933|  pv|8233|211132| 18|
|  65|1493383935

In [42]:
rst_1 = behavior_data.agg(f.countDistinct(behavior_data.brand).alias("brand"), f.countDistinct(behavior_data.cate).alias("cate"))
rst_1.show()

+------+-----+
| brand| cate|
+------+-----+
|460561|12968|
+------+-----+



In [43]:
rst_2 = train_data.agg(
    f.countDistinct(train_data.cms_segid).alias("cms_segid"), 
    f.countDistinct(train_data.cms_group_id).alias("cms_group_id"),
    f.countDistinct(train_data.final_gender_code).alias("final_gender_code"),
    f.countDistinct(train_data.age_level).alias("age_level"),
    f.countDistinct(train_data.pvalue_level).alias("pvalue_level"),
    f.countDistinct(train_data.shopping_level).alias("shopping_level"),
    f.countDistinct(train_data.occupation).alias("occupation"),
    f.countDistinct(train_data.new_user_class_level).alias("new_user_class_level")
)
rst_2.show()

+---------+------------+-----------------+---------+------------+--------------+----------+--------------------+
|cms_segid|cms_group_id|final_gender_code|age_level|pvalue_level|shopping_level|occupation|new_user_class_level|
+---------+------------+-----------------+---------+------------+--------------+----------+--------------------+
|       97|          13|                2|        7|           3|             3|         2|                   4|
+---------+------------+-----------------+---------+------------+--------------+----------+--------------------+



In [45]:
rst_1_pd = rst_1.toPandas()
rst_2_pd = rst_2.toPandas()

In [46]:
result = pd.concat([rst_1_pd, rst_2_pd], axis=1)
result.to_csv("/nfs/project/boweihan_2/DIEN/dien_tf2/self_data/embedding_count.csv", index=False, header=True, sep=",", encoding="utf-8_sig")

In [48]:
result

Unnamed: 0,brand,cate,cms_segid,cms_group_id,final_gender_code,age_level,pvalue_level,shopping_level,occupation,new_user_class_level
0,460561,12968,97,13,2,7,3,3,2,4


In [47]:
def get_embedding_count(feature):
    return result[feature].values[0]
get_embedding_count("brand")

460561

## (2) Train Method

## a.使用少量数据调模型

In [49]:
train_path = "/nfs/project/boweihan_2/DIEN/dien_tf2/self_data/train.csv"
test_path = "/nfs/project/boweihan_2/DIEN/dien_tf2/self_data/test.csv"
test_data = pd.read_csv(test_path, sep = "\t")
test_data = test_data.fillna(0)
test_data

Unnamed: 0,guide_dien_final_train_data.user,guide_dien_final_train_data.adgroup_id,guide_dien_final_train_data.time_stamp,guide_dien_final_train_data.pid,guide_dien_final_train_data.nonclk,guide_dien_final_train_data.clk,guide_dien_final_train_data.userid,guide_dien_final_train_data.cms_segid,guide_dien_final_train_data.cms_group_id,guide_dien_final_train_data.final_gender_code,...,guide_dien_final_train_data.campaign_id,guide_dien_final_train_data.customer,guide_dien_final_train_data.brand,guide_dien_final_train_data.price,guide_dien_final_train_data.show_cate,guide_dien_final_train_data.show_brand,guide_dien_final_train_data.show_rk,guide_dien_final_train_data.click_cate,guide_dien_final_train_data.click_brand,guide_dien_final_train_data.click_rk
0,197045,782332,1494677497,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,181830,197002,0.0,148.0,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
1,197045,632432,1494674116,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,233866,28529,0.0,216.0,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
2,197045,85419,1494678677,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,86978,30214,95618.0,2.4,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
3,197045,792489,1494678677,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,80159,133346,0.0,48.0,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
4,198714,457325,1494662608,430539_1007,0,1,198714.0,2.0,1.0,2.0,...,16625,96110,337172.0,41.0,"[6428,6428,6428,6428,6428,6428,6432,6341,6341,...","[38480,38480,38480,38480,38480,38480,136135,13...","[1,2,3,4,4,6,7,8,9,10,11,12,13,14,15,16,17,18,...","[10809,6432,4512,4512,4512,4512,4512,4512,4512...","[78538,293023,218467,8370,218566,218467,322655...","[23,27,52,58,60,62,64,69,71,75,89,100,102,108,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,966232,601812,1494689190,430539_1007,1,0,966232.0,0.0,3.0,2.0,...,227059,212714,0.0,499.0,"[6932,6932,6932,1665,6261,4282,4284,4284,4283,...","[181477,181477,181477,364321,234984,23859,3739...","[1,2,3,4,5,6,7,8,9,10,12,13,14,15,15,17,17,19,...","[4262,1665,1665,1170,6426,7205,7225,133,5166,1...","[370203,322842,13483,219647,96276,409316,13287...","[11,34,46,105,114,129,146,164,201,203,244]"
996,966232,732113,1494690992,430539_1007,1,0,966232.0,0.0,3.0,2.0,...,118375,11131,110068.0,448.0,"[6932,6932,6932,1665,6261,4282,4284,4284,4283,...","[181477,181477,181477,364321,234984,23859,3739...","[1,2,3,4,5,6,7,8,9,10,12,13,14,15,15,17,17,19,...","[4262,1665,1665,1170,6426,7205,7225,133,5166,1...","[370203,322842,13483,219647,96276,409316,13287...","[11,34,46,105,114,129,146,164,201,203,244]"
997,966232,594425,1494690992,430539_1007,1,0,966232.0,0.0,3.0,2.0,...,34336,117293,73781.0,302.0,"[6932,6932,6932,1665,6261,4282,4284,4284,4283,...","[181477,181477,181477,364321,234984,23859,3739...","[1,2,3,4,5,6,7,8,9,10,12,13,14,15,15,17,17,19,...","[4262,1665,1665,1170,6426,7205,7225,133,5166,1...","[370203,322842,13483,219647,96276,409316,13287...","[11,34,46,105,114,129,146,164,201,203,244]"
998,966232,664714,1494678642,430539_1007,0,1,966232.0,0.0,3.0,2.0,...,367511,251098,0.0,296.0,"[6932,6932,6932,1665,6261,4282,4284,4284,4283,...","[181477,181477,181477,364321,234984,23859,3739...","[1,2,3,4,5,6,7,8,9,10,12,13,14,15,15,17,17,19,...","[4262,1665,1665,1170,6426,7205,7225,133,5166,1...","[370203,322842,13483,219647,96276,409316,13287...","[11,34,46,105,114,129,146,164,201,203,244]"


In [50]:
train_data = pd.read_csv(train_path, sep = "\t")
train_data = train_data.fillna(0)
train_data

Unnamed: 0,guide_dien_final_train_data.user,guide_dien_final_train_data.adgroup_id,guide_dien_final_train_data.time_stamp,guide_dien_final_train_data.pid,guide_dien_final_train_data.nonclk,guide_dien_final_train_data.clk,guide_dien_final_train_data.userid,guide_dien_final_train_data.cms_segid,guide_dien_final_train_data.cms_group_id,guide_dien_final_train_data.final_gender_code,...,guide_dien_final_train_data.campaign_id,guide_dien_final_train_data.customer,guide_dien_final_train_data.brand,guide_dien_final_train_data.price,guide_dien_final_train_data.show_cate,guide_dien_final_train_data.show_brand,guide_dien_final_train_data.show_rk,guide_dien_final_train_data.click_cate,guide_dien_final_train_data.click_brand,guide_dien_final_train_data.click_rk
0,197045,607851,1494406826,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,332347,250055,181578.0,299.00,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
1,197045,642042,1494239072,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,233113,21240,36241.0,2.75,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
2,197045,696585,1494075937,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,401692,56209,188080.0,98.00,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
3,197045,743418,1494403500,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,81529,64774,0.0,169.00,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
4,197045,215350,1494599636,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,91018,33618,21271.0,39.00,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,1041408,613221,1494389160,430539_1007,1,0,1041408.0,0.0,3.0,2.0,...,131055,7935,140633.0,688.00,"[5957,4284,6261,6261,4520,4520,4280,4280,5920,...","[261632,261632,261632,261632,22959,182215,1700...","[1,2,3,4,5,6,7,8,9,10,11,11,13,13,15,16,17,18,...","[619,11849,5920,4520,4284,4280,133,7353,6261,6...","[240323,364046,270824,414896,414896,139558,280...","[19,25,29,37,39,148,221,232,365,380,462,543,55..."
9996,1041408,621434,1494477822,430539_1007,1,0,1041408.0,0.0,3.0,2.0,...,183730,192403,196567.0,139.80,"[5957,4284,6261,6261,4520,4520,4280,4280,5920,...","[261632,261632,261632,261632,22959,182215,1700...","[1,2,3,4,5,6,7,8,9,10,11,11,13,13,15,16,17,18,...","[619,11849,5920,4520,4284,4280,133,7353,6261,6...","[240323,364046,270824,414896,414896,139558,280...","[19,25,29,37,39,148,221,232,365,380,462,543,55..."
9997,1041408,621692,1494389107,430539_1007,1,0,1041408.0,0.0,3.0,2.0,...,16001,92576,262440.0,188.00,"[5957,4284,6261,6261,4520,4520,4280,4280,5920,...","[261632,261632,261632,261632,22959,182215,1700...","[1,2,3,4,5,6,7,8,9,10,11,11,13,13,15,16,17,18,...","[619,11849,5920,4520,4284,4280,133,7353,6261,6...","[240323,364046,270824,414896,414896,139558,280...","[19,25,29,37,39,148,221,232,365,380,462,543,55..."
9998,1041408,621692,1494477822,430539_1007,1,0,1041408.0,0.0,3.0,2.0,...,16001,92576,262440.0,188.00,"[5957,4284,6261,6261,4520,4520,4280,4280,5920,...","[261632,261632,261632,261632,22959,182215,1700...","[1,2,3,4,5,6,7,8,9,10,11,11,13,13,15,16,17,18,...","[619,11849,5920,4520,4284,4280,133,7353,6261,6...","[240323,364046,270824,414896,414896,139558,280...","[19,25,29,37,39,148,221,232,365,380,462,543,55..."


In [75]:
train_data = train_data[train_data["guide_dien_final_train_data.click_cate"] != 0]

In [76]:
train_data = train_data[train_data["guide_dien_final_train_data.click_brand"] != 0]

In [77]:
train_data

Unnamed: 0,guide_dien_final_train_data.user,guide_dien_final_train_data.adgroup_id,guide_dien_final_train_data.time_stamp,guide_dien_final_train_data.pid,guide_dien_final_train_data.nonclk,guide_dien_final_train_data.clk,guide_dien_final_train_data.userid,guide_dien_final_train_data.cms_segid,guide_dien_final_train_data.cms_group_id,guide_dien_final_train_data.final_gender_code,...,guide_dien_final_train_data.campaign_id,guide_dien_final_train_data.customer,guide_dien_final_train_data.brand,guide_dien_final_train_data.price,guide_dien_final_train_data.show_cate,guide_dien_final_train_data.show_brand,guide_dien_final_train_data.show_rk,guide_dien_final_train_data.click_cate,guide_dien_final_train_data.click_brand,guide_dien_final_train_data.click_rk
0,197045,607851,1494406826,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,332347,250055,181578.0,299.00,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
1,197045,642042,1494239072,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,233113,21240,36241.0,2.75,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
2,197045,696585,1494075937,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,401692,56209,188080.0,98.00,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
3,197045,743418,1494403500,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,81529,64774,0.0,169.00,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
4,197045,215350,1494599636,430548_1007,1,0,197045.0,5.0,2.0,2.0,...,91018,33618,21271.0,39.00,"[1245,1245,1245,6894,6894,6894,11175,11175,124...","[236225,351558,351558,103480,103480,103480,304...","[1,2,3,5,6,8,9,10,11,12,13,14,15,16,17,18,19,2...","[1245,6894,4281,6806,1570,4314,1570,9093,9093,...","[351558,103480,424633,284368,3180,42941,3180,2...","[4,7,53,64,256,322,335,397,399,516,523,526,532..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,1041408,613221,1494389160,430539_1007,1,0,1041408.0,0.0,3.0,2.0,...,131055,7935,140633.0,688.00,"[5957,4284,6261,6261,4520,4520,4280,4280,5920,...","[261632,261632,261632,261632,22959,182215,1700...","[1,2,3,4,5,6,7,8,9,10,11,11,13,13,15,16,17,18,...","[619,11849,5920,4520,4284,4280,133,7353,6261,6...","[240323,364046,270824,414896,414896,139558,280...","[19,25,29,37,39,148,221,232,365,380,462,543,55..."
9996,1041408,621434,1494477822,430539_1007,1,0,1041408.0,0.0,3.0,2.0,...,183730,192403,196567.0,139.80,"[5957,4284,6261,6261,4520,4520,4280,4280,5920,...","[261632,261632,261632,261632,22959,182215,1700...","[1,2,3,4,5,6,7,8,9,10,11,11,13,13,15,16,17,18,...","[619,11849,5920,4520,4284,4280,133,7353,6261,6...","[240323,364046,270824,414896,414896,139558,280...","[19,25,29,37,39,148,221,232,365,380,462,543,55..."
9997,1041408,621692,1494389107,430539_1007,1,0,1041408.0,0.0,3.0,2.0,...,16001,92576,262440.0,188.00,"[5957,4284,6261,6261,4520,4520,4280,4280,5920,...","[261632,261632,261632,261632,22959,182215,1700...","[1,2,3,4,5,6,7,8,9,10,11,11,13,13,15,16,17,18,...","[619,11849,5920,4520,4284,4280,133,7353,6261,6...","[240323,364046,270824,414896,414896,139558,280...","[19,25,29,37,39,148,221,232,365,380,462,543,55..."
9998,1041408,621692,1494477822,430539_1007,1,0,1041408.0,0.0,3.0,2.0,...,16001,92576,262440.0,188.00,"[5957,4284,6261,6261,4520,4520,4280,4280,5920,...","[261632,261632,261632,261632,22959,182215,1700...","[1,2,3,4,5,6,7,8,9,10,11,11,13,13,15,16,17,18,...","[619,11849,5920,4520,4284,4280,133,7353,6261,6...","[240323,364046,270824,414896,414896,139558,280...","[19,25,29,37,39,148,221,232,365,380,462,543,55..."


In [83]:
from sklearn.preprocessing import LabelEncoder


In [84]:
len(set(train_data["guide_dien_final_train_data.shopping_level"].values))

4

In [85]:
train_data.columns

Index(['guide_dien_final_train_data.user',
       'guide_dien_final_train_data.adgroup_id',
       'guide_dien_final_train_data.time_stamp',
       'guide_dien_final_train_data.pid', 'guide_dien_final_train_data.nonclk',
       'guide_dien_final_train_data.clk', 'guide_dien_final_train_data.userid',
       'guide_dien_final_train_data.cms_segid',
       'guide_dien_final_train_data.cms_group_id',
       'guide_dien_final_train_data.final_gender_code',
       'guide_dien_final_train_data.age_level',
       'guide_dien_final_train_data.pvalue_level',
       'guide_dien_final_train_data.shopping_level',
       'guide_dien_final_train_data.occupation',
       'guide_dien_final_train_data.new_user_class_level',
       'guide_dien_final_train_data.cate_id',
       'guide_dien_final_train_data.campaign_id',
       'guide_dien_final_train_data.customer',
       'guide_dien_final_train_data.brand',
       'guide_dien_final_train_data.price',
       'guide_dien_final_train_data.show_cate',
     

In [86]:
def parse_row(df, row_num):
    row = df.loc[row_num]
    label = row["guide_dien_final_train_data.clk"]
    target_cate = row["guide_dien_final_train_data.cate_id"]
    target_brand = row["guide_dien_final_train_data.brand"]
    hist_behavior_clk_cate = row["guide_dien_final_train_data.click_cate"]
    hist_behavior_clk_brand = row["guide_dien_final_train_data.click_brand"]
    hist_behavior_show_cate = row["guide_dien_final_train_data.show_cate"]
    hist_behavior_show_brand = row["guide_dien_final_train_data.show_brand"]
    return tf.convert_to_tensor(label), tf.convert_to_tensor(target_cate), tf.convert_to_tensor(target_brand), tf.convert_to_tensor(hist_behavior_clk_cate), tf.convert_to_tensor(hist_behavior_clk_brand), tf.convert_to_tensor(hist_behavior_show_cate), tf.convert_to_tensor(hist_behavior_show_brand)

def data_generator():
    df = pd.read_csv("/nfs/project/boweihan_2/DIEN/dien_tf2/self_data/train.csv")
    n = len(df)
    for i in range(n):
        yield parse_row(df, i)

In [55]:
def get_normal_data(data, col):
    return data[col].values

def get_sequence_data(data, col):
    rst = []
    max_length = 0
    for i in data[col].values:
        temp = len(list(map(eval,i[1:-1].split(","))))
        if temp > max_length:
            max_length = temp

    for i in data[col].values:
        temp = list(map(eval,i[1:-1].split(",")))
        padding = np.zeros(max_length - len(temp))
        rst.append(list(np.append(np.array(temp), padding)))
    return rst

def convert_tensor(data):
    return tf.convert_to_tensor(data)

In [87]:
def get_batch_data(data, min_batch, batch=100):
    batch_data = None
    if min_batch + batch <= len(train_data):
        batch_data = data.loc[min_batch:min_batch + batch - 1]
    else:
        batch_data = data.loc[min_batch:]
    min_batch += batch
    click = get_normal_data(batch_data, "guide_dien_final_train_data.clk")
    no_click = get_normal_data(batch_data, "guide_dien_final_train_data.nonclk")
    label = [click, no_click]
    target_cate = get_normal_data(batch_data, "guide_dien_final_train_data.cate_id")
    target_brand = get_normal_data(batch_data, "guide_dien_final_train_data.brand")
    cms_segid = get_normal_data(batch_data, "guide_dien_final_train_data.cms_segid")
    cms_group = get_normal_data(batch_data, "guide_dien_final_train_data.cms_group_id")
    gender = get_normal_data(batch_data, "guide_dien_final_train_data.final_gender_code")
    age = get_normal_data(batch_data, "guide_dien_final_train_data.age_level")
    pvalue = get_normal_data(batch_data, "guide_dien_final_train_data.pvalue_level")
    shopping = get_normal_data(batch_data, "guide_dien_final_train_data.shopping_level")
    occupation = get_normal_data(batch_data, "guide_dien_final_train_data.occupation")
    user_class_level = get_normal_data(batch_data, "guide_dien_final_train_data.new_user_class_level")
    hist_brand_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_brand")
    hist_cate_behavior_clk = get_sequence_data(batch_data, "guide_dien_final_train_data.click_cate")
    hist_brand_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_brand")
    hist_cate_behavior_show = get_sequence_data(batch_data, "guide_dien_final_train_data.show_cate")

    return tf.reshape(convert_tensor(label), (batch, 2)), convert_tensor(target_cate), convert_tensor(target_brand), convert_tensor(cms_segid), convert_tensor(cms_group), convert_tensor(gender), convert_tensor(age), convert_tensor(pvalue), convert_tensor(shopping), convert_tensor(occupation), convert_tensor(user_class_level), convert_tensor(hist_brand_behavior_clk), convert_tensor(hist_cate_behavior_clk), convert_tensor(hist_brand_behavior_show), convert_tensor(hist_cate_behavior_show), min_batch + batch

In [88]:
brand_count = 500000#get_embedding_count("brand")
cate_count = 15000#get_embedding_count("cate")
cms_segid_count = get_embedding_count("cms_segid")
cms_group_id_count = get_embedding_count("cms_group_id")
gender_count = 3#get_embedding_count("final_gender_code")
age_count = get_embedding_count("age_level")
pvalue_count = get_embedding_count("pvalue_level")
shopping_count = 4#get_embedding_count("shopping_level")
occupation_count = 5#get_embedding_count("occupation")
user_class_level_count = 5#get_embedding_count("new_user_class_level")

In [89]:
cate_dim = 8
brand_dim = 8
cms_segid_dim = 8
cms_group_dim = 8
gender_dim = 8
age_dim = 8
pvalue_dim = 8
shopping_dim = 8
occupation_dim = 8
user_class_level_dim = 8
bias_length = 80
alpha = 0.2
epochs = 10

In [90]:
model = DIEN(
    cate_dim = cate_dim, 
    cate_count = cate_count, 
    brand_dim = brand_dim, 
    brand_count = brand_count, 
    cms_segid_dim = cms_segid_dim, 
    cms_segid_count = cms_segid_count, 
    cms_group_dim = cms_group_dim, 
    cms_group_count = cms_group_id_count, 
    gender_dim = gender_dim, 
    gender_count = gender_count,
    age_dim = age_dim, 
    age_count = age_count, 
    pvalue_dim = pvalue_dim, 
    pvalue_count = pvalue_count, 
    shopping_dim = shopping_dim, 
    shopping_count = shopping_count, 
    occupation_dim = occupation_dim, 
    occupation_count = occupation_count, 
    user_class_level_count = user_class_level_count, 
    user_class_level_dim = user_class_level_dim, 
    bias_length = bias_length)

In [91]:
log_path = "/nfs/project/boweihan_2/DIEN/dien_tf2/train_log/"
train_summary_writer = tf.summary.create_file_writer(log_path)

In [92]:
optimizer = tf.keras.optimizers.SGD(learning_rate=0.5, momentum=0.0)
loss_metric = tf.keras.metrics.Sum()
auc_metric = tf.keras.metrics.AUC()

In [93]:
model.layers

[<tensorflow.python.keras.layers.embeddings.Embedding at 0x7f74ffc5c890>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f74ffc57490>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f74ffc574d0>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f74ffc578d0>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f7504b0ad90>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f74ffb452d0>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f74ffb45ad0>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f74ffcd2990>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f74ffb90310>,
 <tensorflow.python.keras.layers.embeddings.Embedding at 0x7f74ffb45d50>,
 <tensorflow.python.keras.layers.recurrent_v2.GRU at 0x7f74ffb90b50>,
 <__main__.AUGRU at 0x7f74f860d650>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x7f74f860d550>]

In [94]:
def train_one_step(target_cate, target_brand, hist_cate_behavior_clk, hist_brand_behavior_clk, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, label):
    with tf.GradientTape() as tape:
        aux_loss, output, logit = model(target_cate, target_brand, hist_cate_behavior_clk, hist_brand_behavior_clk, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level)
        target_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output,labels=tf.cast(label,dtype=tf.float32)))
        final_loss = target_loss + alpha * aux_loss
        print("aux_loss=" + str(aux_loss))
        print("target_loss=" + str(target_loss))
        print("final_loss=" + str(final_loss))
    gradient = tape.gradient(final_loss, model.trainable_variables)
    clip_gradient, _ = tf.clip_by_global_norm(gradient, 5.0)
    optimizer.apply_gradients(zip(clip_gradient, model.trainable_variables))
    loss_metric(final_loss)


In [64]:
for epoch in range(epochs):
    min_batch = 0
    for i in range(len(train_data)):
        label, target_cate, target_brand, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, hist_brand_behavior_clk, hist_cate_behavior_clk, hist_brand_behavior_show, hist_cate_behavior_show, min_batch= get_batch_data(train_data, min_batch)
        train_one_step(target_cate, target_brand, hist_cate_behavior_clk, hist_brand_behavior_clk, cms_segid, cms_group, gender, age, pvalue, shopping, occupation, user_class_level, label)

read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read data error
read dat

In [None]:
----------------

In [None]:
train_data

In [None]:
aux_loss

In [None]:
expand_target_item = tf.tile(tf.expand_dims(target_item_emb, axis=1), multiples=[1,hist_gru_emb.numpy().shape[1],1])
tf.concat([hist_gru_emb, expand_target_item], axis=-1)


In [None]:
target_item_emb

In [None]:
a = tf.expand_dims(target_item_emb, axis=1)
a

In [None]:
b = tf.tile(a, multiples=[1,190,1])
b

In [None]:
tf.concat([hist_gru_emb, b], axis=-1)

In [None]:
hist_gru_emb.numpy().shape[1]

In [None]:
tf.keras.activations.softmax(tf.convert_to_tensor([1,2,3]), axis=-1)

In [None]:
gender_dim

In [None]:
tf.concat([tf.convert_to_tensor([1,2,3]), tf.convert_to_tensor([[2,4],[1,3],[3,4]])], -1)

In [None]:
epochs = 10
best_loss = 0.
best_auc = 0.
start_time = time.time()
for epoch in range(epochs):
    for 

In [None]:
model.layers

In [None]:
filename = os.listdir("/nfs/project/boweihan_2/DIEN/dien_tf2/self_data/")
filelist = [os.path.join("/nfs/project/boweihan_2/DIEN/dien_tf2/self_data/",file) for file in filename]
example_batch,label_batch = csvread(filelist)

In [None]:
filename = os.listdir("/nfs/project/boweihan_2/DIEN/dien_tf2/self_data/")
[os.path.join('/nfs/project/boweihan_2/DIEN/dien_tf2/self_data/',file) for file in filename]

In [None]:
for i in enumerate(train_data, start = 1):
    print(i)

In [None]:
train_data.values

In [None]:
dataset.take(2)

## b. 使用hdfs全量数据训练

In [None]:
!head {"/nfs/project/boweihan_2/DIEN/dien_tf2/taobao_data/raw_sample.csv"}

In [None]:
LABEL_COLUMN = 'clk'
LABELS = [0, 1]

In [None]:
def get_dataset(file_path):
    dataset = tf.data.experimental.make_csv_dataset(
    file_path,
    batch_size=10, # 为了示例更容易展示，手动设置较小的值
    label_name=LABEL_COLUMN,
    na_value="",
    num_epochs=1,
    ignore_errors=False)
    return dataset

In [None]:
train_data = get_dataset("/nfs/project/boweihan_2/DIEN/dien_tf2/taobao_data/raw_sample.csv")
train_data

In [None]:
dataset = tf.data.experimental.CsvDataset(["/nfs/project/boweihan_2/DIEN/dien_tf2/taobao_data/raw_sample.csv"])
dataset

In [None]:
1

In [None]:
record_default = [[0.0]] * 12
ds = tf.data.experimental.CsvDataset("/nfs/project/boweihan_2/DIEN/dien_tf2/taobao_data/raw_sample.csv", record_default, header=True)
ds = ds.map(lambda *items: tf.stack(items))
for val in ds.take(1):
    print(val)


In [None]:
dataset = (tf.data.Dataset.from_tensor_slices("/nfs/project/boweihan_2/DIEN/dien_tf2/taobao_data/raw_sample.csv").interleave(lambda x:TextLineDataset(x).map(parse_fn, num_parallel_calls=1),cycle_length=4, block_length=16))

In [None]:
!pwd

In [None]:
dataset = dataset.flat_map(lambda x: tf.data.Dataset.from_tensor_slices(x))

In [None]:
!pwd

In [None]:
tensor = tf.constant([[[1, 2, 3], [4, 5, 6], [2, 3, 5]]])
tensor[:, :, 0]

In [None]:
x = tf.constant([[1, 2, 3], [4, 5, 6]])
tf.transpose(x) 

In [None]:
x

In [None]:
tf.transpose(x, perm=[1, 0])

In [None]:
x = tf.constant([])

In [None]:
layers.PReLU

In [None]:
tf.reduce_mean(tf.constant([1,2,3,4]))

In [None]:
!pwd

In [None]:
import tensorflow as tf
import numpy as np
model = tf.keras.Sequential()
model.add(tf.keras.layers.Embedding(1000, 64, input_length=10))
# The model will take as input an integer matrix of size (batch,
# input_length), and the largest integer (i.e. word index) in the input
# should be no larger than 999 (vocabulary size).
# Now model.output_shape is (None, 10, 64), where `None` is the batch
# dimension.
input_array = np.random.randint(9, size=(32, 10))
model.compile('rmsprop', 'mse')
output_array = model.predict(input_array)
print(output_array.shape)

In [None]:
input_array

In [None]:
output_array

In [None]:
!ls

In [None]:
tensor = tf.constant([[[1, 2, 3], [4, 5, 6], [2, 3, 5]]])

In [None]:
tensor

In [None]:
tensor[:, 3]