In [1]:
import numpy as np

import gc
import os
import matplotlib.pyplot as plt

import tensorflow as tf
import tensorflow.keras.backend as K

from tensorflow.keras.layers import *
from tensorflow.python.keras.layers import Layer
from tensorflow.keras import regularizers

from tensorflow.keras.models import Model,load_model
from tensorflow.keras.utils import plot_model
from tensorflow.keras.callbacks import ModelCheckpoint,ReduceLROnPlateau,EarlyStopping

from tensorflow.keras import optimizers,initializers
from tensorflow.python.keras.initializers import glorot_normal
import joblib
import json

from tensorflow.keras import layers, Model
import utils
from utils import *
import importlib
importlib.reload(utils)

from features import *
from tensorflow.keras.callbacks import LambdaCallback


In [2]:
train = joblib.load('./data_and_feature/train.txt')
val = joblib.load('./data_and_feature/val.txt')
test = joblib.load('./data_and_feature/test.txt')
encoder = joblib.load('./data_and_feature/encoder.txt')

train_num = len(train)

In [3]:
embedding_feat_dict=read_json_file('/Users/bytedance/Desktop/wechat_multi_task_learning/config/embedding_feat_dict.json')

In [4]:
embedding_feat_dict

{'dense': ['videoplayseconds'],
 'sparse': {'userid': {'vocab_size': 20001,
   'embedding_dim': 14,
   'dtype': 'int64'},
  'feedid': {'vocab_size': 99172, 'embedding_dim': 16, 'dtype': 'int64'},
  'authorid': {'vocab_size': 18624, 'embedding_dim': 14, 'dtype': 'int64'},
  'bgm_song_id': {'vocab_size': 23740, 'embedding_dim': 14, 'dtype': 'int64'},
  'bgm_singer_id': {'vocab_size': 16603,
   'embedding_dim': 14,
   'dtype': 'int64'}},
 'sequence': {'manual_tag_list': {'vocab_size': 12, 'embedding_dim': 4},
  'manual_keyword_list': {'vocab_size': 19, 'embedding_dim': 4}}}

In [5]:
train.head()

Unnamed: 0,videoplayseconds,userid,feedid,authorid,bgm_song_id,bgm_singer_id,manual_tag_list,manual_keyword_list,read_comment,like,click_avatar,forward
1493914,1.503301,4095,6889,3254,749,666,"[44, 32, 9, 2, 0, 0, 0, 0, 0, 0, 0]","[1715, 1100, 707, 779, 0, 0, 0, 0, 0, 0, 0, 0,...",0.0,0.0,0.0,0.0
3165952,1.439569,8673,49911,4996,20,20,"[208, 90, 9, 2, 0, 0, 0, 0, 0, 0, 0]","[1176, 13155, 906, 13156, 0, 0, 0, 0, 0, 0, 0,...",0.0,0.0,0.0,0.0
6882393,1.63137,18789,11461,1828,2,2,"[182, 72, 9, 2, 0, 0, 0, 0, 0, 0, 0]","[183, 4224, 4225, 4226, 0, 0, 0, 0, 0, 0, 0, 0...",0.0,0.0,0.0,0.0
1659385,1.327761,4533,11871,2576,3734,2978,"[5, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0,0.0,0.0,0.0
3597267,1.248441,9815,20583,6880,6356,4961,"[5, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0.0,0.0,0.0,0.0


In [6]:
train.columns

Index(['videoplayseconds', 'userid', 'feedid', 'authorid', 'bgm_song_id',
       'bgm_singer_id', 'manual_tag_list', 'manual_keyword_list',
       'read_comment', 'like', 'click_avatar', 'forward'],
      dtype='object')

In [7]:
target = ["read_comment", "like", "click_avatar", "forward"]
sparse_features = ['userid', 'feedid', 'authorid', 'bgm_song_id', 'bgm_singer_id']
varlen_features = ['manual_tag_list','manual_keyword_list']
dense_features = ['videoplayseconds']

# 生成输入特征
# sparse_max_len = {f:len(encoder[f]) + 1 for f in sparse_features}
# varlens_max_len = {f:len(encoder[f]) + 1 for f in varlen_features}
feature_names = sparse_features+varlen_features+dense_features

# 构建输入数据
train_model_input = {name: train[name] if name not in varlen_features else np.stack(train[name]) for name in feature_names } #训练模型的输入，字典类型。名称和具体值
val_model_input = {name: val[name] if name not in varlen_features else np.stack(val[name]) for name in feature_names }
test_model_input = {name: test[name] if name not in varlen_features else np.stack(test[name]) for name in feature_names}

train_labels = [train[y].values for y in target]
val_labels = [val[y].values for y in target]



In [8]:
sparse_features = embedding_feat_dict['sparse']

sequence_features = embedding_feat_dict['sequence']

dense_features = embedding_feat_dict['dense']

task_names = ["read_comment", "like", "click_avatar", "forward"]


In [15]:
from tensorflow.keras.layers import *
from tensorflow.keras.initializers import TruncatedNormal


def MMoE_model(dense_features, sparse_features, varlen_features, encoder, task_names):
    # 输入层定义
    inputs = {}
    
    # 稠密特征输入
    for feat in dense_features:
        inputs[feat] = Input(shape=(1,), name=feat)
    
    # 稀疏特征输入
    for feat in sparse_features:
        inputs[feat] = Input(shape=(1,), name=feat)
    
    # 变长序列特征输入
    for feat in varlen_features:
        max_len=embedding_feat_dict['sequence'][feat]['vocab_size']-1
        inputs[feat] = Input(shape=(max_len,), name=feat)
    
    # 特征处理
    ## 稠密特征处理
    dense_embeddings = []
    for feat in dense_features:
        emb = Reshape((1,))(inputs[feat])  # 保持形状一致
        dense_embeddings.append(emb)
    
    ## 稀疏特征嵌入
    sparse_embeddings = []
    for feat in sparse_features:
        vocab_size = len(encoder[feat]) + 1
        emb_dim = min(6, int(np.sqrt(vocab_size)))  # 自适应嵌入维度
        emb = Embedding(vocab_size, emb_dim, name=f'emb_{feat}')(inputs[feat])
        emb = Reshape((emb_dim,))(emb)
        sparse_embeddings.append(emb)
    
    ## 变长序列特征处理
    varlen_embeddings = []
    for feat in varlen_features:
        vocab_size = len(encoder[feat]) + 1 if feat in encoder else 100
        emb_dim = min(6, int(np.sqrt(vocab_size)))
        emb = Embedding(vocab_size, emb_dim, name=f'emb_{feat}')(inputs[feat])
        emb = GlobalAveragePooling1D()(emb)  # 对序列做平均池化
        varlen_embeddings.append(emb)
    
    # 拼接所有特征
    concat_features = Concatenate()(dense_embeddings + sparse_embeddings + varlen_embeddings)
    
    # 专家网络参数
    num_experts = 4  # 专家数量
    expert_units = 64  # 每个专家的隐藏单元数
    experts = []
    
    # 创建专家网络
    for i in range(num_experts):
        expert = Dense(expert_units, activation='relu', 
                      kernel_initializer=TruncatedNormal(stddev=0.02),
                      name=f'expert_{i}')(concat_features)
        expert = Dense(expert_units, activation='relu',
                      kernel_initializer=TruncatedNormal(stddev=0.02),
                      name=f'expert_{i}_2')(expert)
        experts.append(expert)
    
    # 任务特定参数
    task_outputs = []
    for task_name in task_names:
        # 创建任务特定的门控网络
        gate = Dense(num_experts, activation='softmax', 
                     name=f'gate_{task_name}')(concat_features)
        
        # 加权专家输出
        weighted_experts = []
        for i in range(num_experts):
            weighted_expert = Lambda(lambda x: x[0] * x[1][:, i:i+1], 
                                   name=f'weighted_expert_{task_name}_{i}')([experts[i], gate])
            weighted_experts.append(weighted_expert)
        
        # 合并加权专家
        task_input = Add()(weighted_experts)
        
        # 任务特定塔网络
        tower = Dense(32, activation='relu',
                     kernel_initializer=TruncatedNormal(stddev=0.02),
                     name=f'tower_{task_name}_1')(task_input)
        tower = Dropout(0.2)(tower)
        tower = Dense(16, activation='relu',
                     kernel_initializer=TruncatedNormal(stddev=0.02),
                     name=f'tower_{task_name}_2')(tower)
        
        # 任务输出层
        task_output = Dense(1, activation='sigmoid',
                           name=task_name)(tower)
        task_outputs.append(task_output)
    
    # 创建模型
    model = Model(inputs=list(inputs.values()), outputs=task_outputs)
    
    return model

In [19]:
model = MMoE_model(dense_features, sparse_features, varlen_features, encoder, task_names)
from tensorflow.keras.optimizers import Adam

model.compile(
    optimizer=Adam(learning_rate=1e-3),
    loss={task: 'binary_crossentropy' for task in target},
    metrics={task: ['AUC'] for task in target}
)

model.summary()


In [22]:
model.fit(
    train_model_input,
    train_labels,
    batch_size=256,
    epochs=1,
    validation_split=0.1,
    verbose=1)

[1m23586/23586[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m128s[0m 5ms/step - click_avatar_AUC: 0.6944 - click_avatar_loss: 0.0425 - forward_AUC: 0.6428 - forward_loss: 0.0252 - like_AUC: 0.7888 - like_loss: 0.1016 - loss: 0.2720 - read_comment_AUC: 0.8989 - read_comment_loss: 0.1027 - val_click_avatar_AUC: 0.8238 - val_click_avatar_loss: 0.0362 - val_forward_AUC: 0.7923 - val_forward_loss: 0.0207 - val_like_AUC: 0.8559 - val_like_loss: 0.0896 - val_loss: 0.2417 - val_read_comment_AUC: 0.9370 - val_read_comment_loss: 0.0952


<keras.src.callbacks.history.History at 0x35f2ddfc0>