In [None]:
import warnings
warnings.filterwarnings("ignore")
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import tensorflow as tf
import random
from tensorflow.keras import backend as K
from tensorflow.keras.utils import plot_model # type: ignore

print(tf.test.is_gpu_available())
print(tf.config.list_physical_devices('GPU'))
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.compat.v1.Session(config=config)

def seed_tensorflow(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
    sess = tf.compat.v1.Session(config=session_conf)
    tf.compat.v1.keras.backend.set_session(sess)

In [None]:
# binary_train_label
# short_play
# effective_play
# long_play
# complete_play
select_label = 'effective_play'

# base_model = "./model/kuairand_backbone/mlp_{}.h5".format(select_label)
CPF_model = "./model/kuairand_CPF/mlp_{}.h5".format(select_label)

In [None]:
para = {'data_path':'./kuai_input/',
        'embedding_dim':64,
        'seed':0,
        'lr':1e-5,
        'batch_size':512,
        'epochs':64,
        'verbose':1,
        'callback':{
                     'monitor':'val_auc',
                     'patience':10,
                     'CPF_model':CPF_model
                     },
        'mlp_dims':[256,128,64],
        'mlp_act':'relu',
        'mlp_dps':[.5,.5,.5],
       }


In [None]:
action = pd.read_csv(para['data_path']+"action.csv")
feed_emb = np.load(para['data_path']+"embeddings.npy")
action = action[['user_id','video_id','duration_level','tab','user_active_degree','author_id','music_id',
                 'short_play', 'binary_train_label', 'effective_play',
                 'long_play', 'complete_play']]
categories_to_keep = ['full_active', 'high_active', 'middle_active','low_active']

action = action[action['user_active_degree'].isin(categories_to_keep)]

category_mapping = {
    'low_active': 0,
    'middle_active': 1,
    'high_active': 2,
    'full_active': 3
}

action['user_active_degree_encoded'] = action['user_active_degree'].map(category_mapping)
action['music_id_encoded'], _ = pd.factorize(action['music_id'])

In [None]:
train= action.groupby('user_id').apply(lambda x: x[:int(len(x)*0.6)]).reset_index(drop=True)
valid= action.groupby('user_id').apply(lambda x: x[int(len(x)*0.6):int(len(x)*0.8)]).reset_index(drop=True)
test= action.groupby('user_id').apply(lambda x: x[int(len(x)*0.8):]).reset_index(drop=True)

In [None]:
seed_tensorflow(seed=para['seed'])
"""
    uid_lay = get_layer((1,),'uid',d1 = max(action['user_id'])+1,d2 = para['embedding_dim'],trainable=True)
    videoid_lay = get_layer((1,),'video_id',d1 = max(action['video_id'])+1,d2 = int(para['embedding_dim']),trainable=True)
    duration_lay = get_layer((1,),'duration_id',d1 = max(action['duration_level'])+1,d2 = int(para['embedding_dim']),trainable=True)
    tab_lay = get_layer((1,),'tab',d1 = max(action['tab'])+1,d2 = int(para['embedding_dim']),trainable=True)
    user_active_degree_encoded = get_layer((1,),'user_active_degree_encoded',d1 = max(action['user_active_degree_encoded'])+1,d2 = int(para['embedding_dim']),trainable=True)
    author_lay = get_layer((1,),'author_id',d1 = max(action['author_id'])+1,d2 = int(para['embedding_dim']),trainable=True)
    music_lay = get_layer((1,),'music_id_encoded',d1 = max(action['music_id_encoded'])+1,d2 = int(para['embedding_dim']),trainable=True)
"""

features = ['user_id','video_id','duration_level','tab','user_active_degree_encoded','author_id','music_id_encoded','video_id',select_label]

# # short_play binary_train_label effective_play long_play complete_play
def get_input(df,is_test=False):
    X = []
    for f in features:
        X.append(df[f].values.reshape(-1,1))
    y = [df[select_label].values.reshape(-1,1)]
    return X,y

X_train,y_train = get_input(train,is_test=False)
X_valid,y_valid = get_input(valid,is_test=False)
X_test,y_test = get_input(test,is_test=True)

In [None]:
seed_tensorflow(seed=para['seed'])

def get_layer(shape,name,dtype='int32',d1=None,d2=None,pretrain=None,trainable=False):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Input(shape=shape, name=name+'_input', dtype=dtype))
    if d1 is None:
        d1 = pretrain.shape[0]
        d2 = pretrain.shape[1]
    model.add(tf.keras.layers.Embedding(input_dim=int(d1),
                       output_dim=int(d2),
                       weights=[pretrain] if pretrain is not None else None,
                       trainable=trainable,
                       name=name+'_embedding'))
    return model
#'user_id','video_id','duration_level','tab','user_active_degree','author_id','music_id'
def mlp_bias(TRAIN):
    uid_lay = get_layer((1,),'uid',d1 = max(action['user_id'])+1,d2 = para['embedding_dim'],trainable=TRAIN)
    videoid_lay = get_layer((1,),'video_id',d1 = max(action['video_id'])+1,d2 = int(para['embedding_dim']),trainable=TRAIN)
    duration_lay = get_layer((1,),'duration_id',d1 = max(action['duration_level'])+1,d2 = int(para['embedding_dim']),trainable=TRAIN)
    tab_lay = get_layer((1,),'tab',d1 = max(action['tab'])+1,d2 = int(para['embedding_dim']),trainable=TRAIN)
    user_active_degree_encoded = get_layer((1,),'user_active_degree_encoded',d1 = max(action['user_active_degree_encoded'])+1,d2 = int(para['embedding_dim']),trainable=TRAIN)
    author_lay = get_layer((1,),'author_id',d1 = max(action['author_id'])+1,d2 = int(para['embedding_dim']),trainable=TRAIN)
    music_lay = get_layer((1,),'music_id_encoded',d1 = max(action['music_id_encoded'])+1,d2 = int(para['embedding_dim']),trainable=TRAIN)
    
    pre_lay = get_layer((1,),'pretrain',pretrain = feed_emb,trainable=False)
    label_input = tf.keras.layers.Input(shape=(1,), name='labels')

    Lay_bi =[uid_lay,videoid_lay,duration_lay,tab_lay,user_active_degree_encoded,author_lay,music_lay,pre_lay]


    lay_bi_outs = []
    for l in Lay_bi:
        lay_bi_outs += l.outputs

    vec = tf.keras.layers.concatenate(lay_bi_outs,axis=-1)
    vec = tf.squeeze(vec,axis=1)
    
    #vec = tf.keras.layers.BatchNormalization()(vec)

    vec = tf.keras.layers.Dense(256,
                                    activation = 'relu',
                                    name='mlp_dense0')(vec)

    vec = tf.keras.layers.Dropout(0.5)(vec)

    vec = tf.keras.layers.Dense(128,
                                    activation = 'relu',
                                    name='mlp_dense1')(vec)

    vec = tf.keras.layers.Dropout(0.5)(vec)

    instance = tf.keras.layers.Dense(64,
                                    activation = 'relu',
                                    name='instance')(vec)
    #instance = tf.keras.layers.Dropout(0.5)(vec)

    #instance = tf.keras.layers.BatchNormalization(name='instance')(vec)

    model_inputs = []
    for l in Lay_bi:
        model_inputs += l.inputs
    model_inputs += [label_input]
    model = tf.keras.Model(inputs=model_inputs,outputs=[instance])

    return model # 返回一个模型


In [None]:
from tensorflow.keras.models import Model
seed_tensorflow(seed=para['seed'])
base_model_backbone = mlp_bias(True)


# n*64
vec = base_model_backbone.outputs[0] # instance     dim=64
final_output = tf.keras.layers.Dense(1,"sigmoid")(vec)

# Create a model that includes the routed output
model = Model(inputs=base_model_backbone.inputs, outputs=final_output)

model.summary()


# Generate Prototypes

In [None]:
proto_tensor = tf.random.normal([10, 64], mean=0.0, stddev=1.0, dtype=tf.float32)

# prototype generation

In [None]:
class Prototypes(tf.keras.layers.Layer):
    name = 'prototypes'
    def __init__(self,
                 k ,
                 beta1=0.0,
                 beta2=0.0,
                 beta3=0.0,
                 init_prototypes = None,
                 **kwargs):
        super(Prototypes, self).__init__(**kwargs)
        self.k = k
        self.beta1, self.beta2, self.beta3 = beta1, beta2, beta3
        # y_train[0]
        self.init_prototypes = init_prototypes
        self.binary_classifiers = []
        # 
        for _ in range(5):
            mlp = tf.keras.Sequential([
                tf.keras.layers.Dense(1, activation='sigmoid')
            ])
            mlp.build((None, 64))
            self.binary_classifiers.append(mlp)
            
    # define prototype
    def build(self, input_shape):
        self.prototypes = self.add_weight(
            name='prototypes',
            shape=(1, self.k, 64),
            initializer=tf.keras.initializers.Constant(self.init_prototypes[tf.newaxis, :, :]),
            trainable=True
        )
        super(Prototypes, self).build(input_shape)

    
    def orthogonality_loss(self,prototypes, k):
        # 确保传入的 k 是偶数
        assert k % 2 == 0, "k must be even."
        D = k // 2

        prototypes = tf.reshape(prototypes, [k, 64])
        prototypes = tf.nn.l2_normalize(prototypes, axis=1)

        cosine_similarity_matrix = tf.matmul(prototypes, prototypes, transpose_b=True)

        upper_triangular_part = tf.linalg.band_part(cosine_similarity_matrix, 0, -1)
        upper_triangular_part -= tf.linalg.band_part(cosine_similarity_matrix, 0, 0)  # 去掉对角线

        loss = tf.reduce_sum(tf.square(upper_triangular_part))
        normalization_factor = D * (2 * D - 1)
        loss /= normalization_factor

        return loss

    def contrastive_loss(self, vec, labels, duration):
        # Normalize the vectors
        vec_norm = tf.nn.l2_normalize(vec, axis=1)
        #print("vec_norm",vec_norm)
        sim_matrix = tf.matmul(vec_norm, vec_norm, transpose_b=True)
        #print("sim_matrix",sim_matrix)
        # Create masks
        label_eq = tf.equal(labels, tf.transpose(labels))
        #print("label_eq",label_eq)
        duration_eq = tf.equal(duration, tf.transpose(duration))
        #print("duration_eq",duration_eq)
        mask = tf.logical_and(label_eq, duration_eq)
        mask = tf.cast(mask, dtype=tf.float32)
        #print("mask",mask)

        # Compute the softmax denominator
        exp_sim = tf.exp(sim_matrix)
        total_sum = tf.reduce_sum(exp_sim)
        #print("total_sum",total_sum)
        normal_sim = exp_sim / total_sum
        #print("normal_sim",normal_sim)

        log_normal_sim = tf.math.log(normal_sim)
        weighted_log_prob = mask * log_normal_sim

        num_ones = tf.reduce_sum(mask)

        loss = -tf.reduce_sum(weighted_log_prob)
        return loss/num_ones

    def assign_loss(self,output, vec):


        output_norm = tf.nn.l2_normalize(output, axis=1)
        vec_norm = tf.nn.l2_normalize(vec, axis=1)

        cosine_similarity = tf.reduce_sum(tf.multiply(output_norm, vec_norm), axis=1)
        loss = 1 - tf.reduce_mean(cosine_similarity)
        return loss
    def get_alpha(self,n_batch,duration_one_hot_expanded,alpha_product):
        """
        alpha_product : (n,10,1)
        duration_one_hot_expanded: (n,5,1)
        """
        expanded_alpha = tf.reshape(alpha_product, [n_batch, 5, 2, 1])
        masked = expanded_alpha * tf.cast(duration_one_hot_expanded, tf.float32)[:, :, :, tf.newaxis]
        # n,2,1
        alpha_para = tf.reduce_sum(masked, axis=1)
        alpha_neg = alpha_para[:, 0, :]  # 第一个元素，保留最后一个轴
        alpha_pos = alpha_para[:, 1, :]  # 第二个元素，保留最后一个轴
        return alpha_neg,alpha_pos
    def mse_loss(self,vec,label):
        vec = tf.convert_to_tensor(vec, dtype=tf.float32)

        label = tf.convert_to_tensor(label, dtype=tf.float32)

        squeezed_label = tf.squeeze(label)

        #tf.print(label, summarize=-1)
        positive_indices = tf.where(squeezed_label == 1)
        negative_indices = tf.where(squeezed_label == 0)

        positive_samples = tf.gather(vec, positive_indices)
        negative_samples = tf.gather(vec, negative_indices)
        positive_samples = tf.squeeze(positive_samples,axis=1)
        negative_samples = tf.squeeze(negative_samples,axis=1)
        positive_loss = tf.reduce_mean(tf.square(positive_samples - self.pos_mean_prototype))
        negative_loss = tf.reduce_mean(tf.square(negative_samples - self.neg_mean_prototype))

        total_loss = positive_loss * 0.7 + negative_loss * 0.3

        return total_loss

    def binary_loss(self,alpha, labels):
        bce = tf.keras.losses.BinaryCrossentropy(from_logits=False)
        loss = bce(labels, alpha)
        return loss     
    
    def create_mask(self,duration, batch_size, num_classes=40, bucket_size=8):
        start_indices = duration * bucket_size
        masks = []
        for i in range(batch_size):
            mask = tf.concat([
                tf.zeros((start_indices[i, 0], 1), dtype=tf.float32),
                tf.ones((bucket_size, 1), dtype=tf.float32),
                tf.zeros((num_classes - start_indices[i, 0] - bucket_size, 1), dtype=tf.float32)
            ], axis=0)
            masks.append(mask)
        return tf.stack(masks)
    def binary_crossentropy_manual(self, y_true, y_pred,para):
        y_true = tf.convert_to_tensor(y_true, dtype=tf.float32)
        y_pred = tf.convert_to_tensor(y_pred, dtype=tf.float32)

        # 防止 log(0) 的情况发生
        epsilon = tf.keras.backend.epsilon()
        y_pred = tf.clip_by_value(y_pred, epsilon, 1. - epsilon)

        loss = -y_true * tf.math.log(y_pred) - (1 - y_true) * tf.math.log(1 - y_pred)

        weights = tf.where(y_true == 1, para, 1.0)
        weighted_loss = loss * weights

        positive_loss = tf.boolean_mask(weighted_loss, y_true == 1)
        negative_loss = tf.boolean_mask(weighted_loss, y_true == 0)
        #total_positive_loss = tf.reduce_sum(positive_loss)
        #total_negative_loss = tf.reduce_sum(negative_loss)

        return tf.reduce_mean(weighted_loss)
    def prototype_loss(self,prototypes_output):
        bce = tf.keras.losses.BinaryCrossentropy(from_logits=False)
        labels = tf.constant([[0], [1], [0], [1], [0], [1], [0], [1], [0], [1]], dtype=tf.float32)
        loss = bce(labels, prototypes_output)
        return loss
    
    def call(self, x, training=None):

        vec,duration,label = x
        a = tf.expand_dims(vec, -2)

        b = self.prototypes
        n_batch = tf.shape(vec)[0]

        dot_product = tf.multiply(a, b)

        norm_a = tf.norm(a, axis=-1, keepdims=True)  # 保持维度，使得维度是 [batch_size, 1, 1]

        norm_b = tf.norm(b, axis=-1, keepdims=True)  # 保持维度，使得维度是 [1, k, 1]

        # (batch_size, k,1)
        sum_product = tf.reduce_sum(dot_product, axis=-1, keepdims=True)
        cos_similarity = sum_product / (norm_a * norm_b)
        
        #tf.print(cos_similarity.shape)
        """
        tf.print(vec,summarize=-1)
        tf.print(self.prototypes,summarize=-1)
        tf.print("Input duration")
        tf.print(duration.shape)
        tf.print("Input label")
        tf.print(label.shape)
        #tf.print(vec)
        tf.print(duration, summarize=-1)
        tf.print(label, summarize=-1)

        tf.print(dot_product.shape)
        tf.print(dot_product)

        tf.print(sum_product.shape)
        tf.print(sum_product,summarize=-1)

        tf.print(norm_a.shape)
        tf.print(norm_a)

        tf.print(norm_b.shape)
        tf.print(norm_b)

        tf.print(cos_similarity, summarize=-1)
        """
        # （batch_size, k//2, 2, 1)
        reshaped_sum_product = tf.reshape(cos_similarity, (n_batch, self.k // 2, 2))

        temperature = 0.05

        scaled_logits = reshaped_sum_product / temperature

        softmaxed_sum_product = tf.nn.softmax(scaled_logits, axis=2)

        # (batch_size, k,1)
        alpha_product = tf.reshape(softmaxed_sum_product, (n_batch, self.k, 1))
        #tf.print(alpha_product,summarize=-1)
        """
        tf.print(alpha_product.shape)
        tf.print(alpha_product)
        """
        #self.new = alpha_product
        # n_batch, self.k, 64
        # b:(1, k, 64)
        alpha_product_expanded = tf.broadcast_to(alpha_product, [n_batch, self.k, 64])

        # calculate
        weighted_products = alpha_product_expanded * b

        # add up
        reshaped_weighted_products = tf.reshape(weighted_products, [n_batch, self.k//2, 2, 64])

        # (batchsize,5,64)
        pre_output = tf.reduce_sum(reshaped_weighted_products, axis=2)  # 对第三个维度求和

        # 1 -> [0,1,0,0,0]
        duration_one_hot = tf.one_hot(duration, depth=5)
        duration_one_hot_expanded = tf.expand_dims(duration_one_hot, -1)
        #(batch_size, 5, 1)
        duration_one_hot_expanded = tf.squeeze(duration_one_hot_expanded, axis=[1])

        masked_output = pre_output * duration_one_hot_expanded
        # output (batchsize,64)
        output = tf.reduce_sum(masked_output, axis=1)


        # get_alpha
        alpha_neg, alpha_pos = self.get_alpha(n_batch,duration_one_hot_expanded,alpha_product)
        #tf.print(alpha_pos,summarize = -1)
        #tf.print(alpha_neg,summarize = -1)
        #tf.print(alpha_pos,summarize   =-1)
        #tf.print(label, summarize=-1)
        classifier_outputs = []


        
        for i in range(5):
            classifier_output = self.binary_classifiers[i](tf.squeeze(self.prototypes,axis=0))
             # (batch_size, 1, 1)#
            #classifier_output += i
            classifier_outputs.append(classifier_output)
        
        #tf.print(classifier_outputs,summarize = -1)
        #tf.print(classifier_outputs.shape,summarize = -1)
        indices = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]  
        prototype_output_elements = []

        for i, idx in enumerate(indices):
            tensor_index = i // 2  
            element = tf.gather(classifier_outputs[tensor_index], idx, axis=0) 
            prototype_output_elements.append(element)

        prototype_output = tf.concat(prototype_output_elements, axis=0)
        prototype_output = tf.expand_dims(prototype_output,axis=1)
        #(1,10)
  
        positive_indices = 2 * duration + 1
        negative_indices = 2 * duration

        positive_indices = tf.clip_by_value(positive_indices, 0, prototype_output.shape[0] - 1)
        negative_indices = tf.clip_by_value(negative_indices, 0, prototype_output.shape[0] - 1)
        #tf.print(alpha_pos,summarize = -1)
        # 提取对应的 prototype_output 值
        positive_values = tf.gather(prototype_output, positive_indices)
        negative_values = tf.gather(prototype_output, negative_indices)
        #positive_values = tf.squeeze(positive_values,axis=0)
        #negative_values = tf.squeeze(negative_values,axis=0)
        output = alpha_pos * tf.squeeze(positive_values,axis=-1) 
        assign_loss = self.assign_loss(output,vec)

        binary_loss = self.binary_crossentropy_manual(label,output,1.0)
        self.add_loss(1.0 * binary_loss)
        # 5
        assign = self.binary_crossentropy_manual(label,alpha_pos,1.0)
        self.add_loss(0.1 * assign)

        ortho_loss = self.orthogonality_loss(self.prototypes,self.k)
        self.add_loss(0.3 * ortho_loss)

        contrastive_loss = self.contrastive_loss(vec,label,duration)
        self.add_loss(0.12 * contrastive_loss)

        proto_loss = self.prototype_loss(prototype_output)
        self.add_loss(0.1 * proto_loss)


        return output

In [None]:
from tensorflow.keras.models import Model
seed_tensorflow(seed=para['seed'])
backbone = mlp_bias(True)
#backbone.load_weights("./model/kuairand_CPF/mlp_effect_backbone.h5")

duration_id_input_output = backbone.get_layer("duration_id_input").output
labels_output = backbone.get_layer("labels").output

flag = 1
if flag:
    prototypes_layer = Prototypes(k=10,init_prototypes = proto_tensor
                                  )

    output = prototypes_layer([backbone.outputs[0],duration_id_input_output,labels_output])
    #output.shape
else:
    output = tf.keras.layers.Dense(1, activation='sigmoid')(backbone.outputs[0])


model = tf.keras.Model(inputs=backbone.inputs, outputs=[output])

seed_tensorflow(seed=para['seed'])

adam=tf.keras.optimizers.Adam(learning_rate=para['lr'])
model.compile(optimizer=adam,
              metrics=['AUC'])




In [None]:
para['callback']['CPF_model']

In [None]:
seed_tensorflow(seed=para['seed'])

es_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # 监控验证集损失
    mode='min',          # 最小化验证集损失
    patience=para['callback']['patience']  # 等待的轮数
)

checkpoint_auc = tf.keras.callbacks.ModelCheckpoint(
    filepath=para['callback']['CPF_model'],  # 保存模型的路径
    monitor='val_auc',  # 监控验证集 AUC
    mode='max',          # 保存验证 AUC 最大的模型
    save_weights_only=True,  # 只保存模型权重
    save_best_only=True  # 只保存最佳模型
)

hist = model.fit(X_train,
                 y_train,
                 epochs=100,
                 batch_size=512,
                 shuffle=True,
                 verbose=para['verbose'],
                 callbacks=[checkpoint_auc,es_callback],
                 validation_data=(X_valid,y_valid))


# Test


In [None]:

# infer
"""
auc: 0.7899478
logloss: 0.51047784
"""

seed_tensorflow(seed=para['seed'])
model.load_weights(para['callback']['CPF_model'])
y_test_pred = model.predict(X_test,batch_size=10000)
auc_metric = tf.keras.metrics.AUC()
auc_metric.update_state(y_test[0], y_test_pred)
auc = auc_metric.result().numpy()

logloss_metric = tf.keras.metrics.BinaryCrossentropy()

logloss_metric.update_state(y_test[0], y_test_pred)
log_loss = logloss_metric.result().numpy()
print('auc:',auc)
print('logloss:',log_loss)