In [1]:
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.


from os.path import join
import abc
import time
from tqdm import tqdm

import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
from layer import cal_metric
from layer import SelfAttention

from layer import (
    AttLayer2,
    ComputeMasking,
    OverwriteMasking,
)

class BaseModel:
    """Basic class of models

    Attributes:
        hparams (obj): A tf.contrib.training.HParams object, hold the entire set of hyperparameters.
        iterator_creator_train (obj): An iterator to load the data in training steps.
        iterator_creator_train (obj): An iterator to load the data in testing steps.
        graph (obj): An optional graph.
        seed (int): Random seed.
    """

    def __init__(
        self,
        hparams,
        iterator_creator,
        seed=None,
    ):
        """Initializing the model. Create common logics which are needed by all deeprec models, such as loss function,
        parameter set.

        Args:
            hparams (obj): A tf.contrib.training.HParams object, hold the entire set of hyperparameters.
            iterator_creator_train (obj): An iterator to load the data in training steps.
            iterator_creator_train (obj): An iterator to load the data in testing steps.
            graph (obj): An optional graph.
            seed (int): Random seed.
        """
        self.seed = seed
        tf.compat.v1.set_random_seed(seed)
        np.random.seed(seed)

        self.train_iterator = iterator_creator(
            hparams,
            hparams.npratio,
            col_spliter="\t",
        )
        self.test_iterator = iterator_creator(
            hparams,
            col_spliter="\t",
        )

        self.hparams = hparams
        self.support_quick_scoring = hparams.support_quick_scoring

        # set GPU use with on demand growth
        gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
        sess = tf.compat.v1.Session(
            config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)
        )

        # set this TensorFlow session as the default session for Keras
        tf.compat.v1.keras.backend.set_session(sess)

        # IMPORTANT: models have to be loaded AFTER SETTING THE SESSION for keras!
        # Otherwise, their weights will be unavailable in the threads after the session there has been set
        self.model, self.scorer = self._build_graph()

        self.loss = self._get_loss()
        self.train_optimizer = self._get_opt()

        self.model.compile(loss=self.loss, optimizer=self.train_optimizer)

    def _init_embedding(self, file_path):
        """Load pre-trained embeddings as a constant tensor.

        Args:
            file_path (str): the pre-trained glove embeddings file path.

        Returns:
            np.array: A constant numpy array.
        """

        return np.load(file_path)

    @abc.abstractmethod
    def _build_graph(self):
        """Subclass will implement this."""
        pass

    @abc.abstractmethod
    def _get_input_label_from_iter(self, batch_data):
        """Subclass will implement this"""
        pass

    def _get_loss(self):
        """Make loss function, consists of data loss and regularization loss

        Returns:
            obj: Loss function or loss function name
        """
        if self.hparams.loss == "cross_entropy_loss":
            data_loss = "categorical_crossentropy"
        elif self.hparams.loss == "log_loss":
            data_loss = "binary_crossentropy"
        else:
            raise ValueError("this loss not defined {0}".format(self.hparams.loss))
        return data_loss

    def _get_opt(self):
        """Get the optimizer according to configuration. Usually we will use Adam.
        Returns:
            obj: An optimizer.
        """
        lr = self.hparams.learning_rate
        optimizer = self.hparams.optimizer

        if optimizer == "adam":
            train_opt = keras.optimizers.Adam(lr=lr)

        return train_opt

    def _get_pred(self, logit, task):
        """Make final output as prediction score, according to different tasks.

        Args:
            logit (obj): Base prediction value.
            task (str): A task (values: regression/classification)

        Returns:
            obj: Transformed score
        """
        if task == "regression":
            pred = tf.identity(logit)
        elif task == "classification":
            pred = tf.sigmoid(logit)
        else:
            raise ValueError(
                "method must be regression or classification, but now is {0}".format(
                    task
                )
            )
        return pred

    def train(self, train_batch_data):
        """Go through the optimization step once with training data in feed_dict.

        Args:
            sess (obj): The model session object.
            feed_dict (dict): Feed values to train the model. This is a dictionary that maps graph elements to values.

        Returns:
            list: A list of values, including update operation, total loss, data loss, and merged summary.
        """
        train_input, train_label = self._get_input_label_from_iter(train_batch_data)
        rslt = self.model.train_on_batch(train_input, train_label)
        return rslt

    def eval(self, eval_batch_data):
        """Evaluate the data in feed_dict with current model.

        Args:
            sess (obj): The model session object.
            feed_dict (dict): Feed values for evaluation. This is a dictionary that maps graph elements to values.

        Returns:
            list: A list of evaluated results, including total loss value, data loss value,
                predicted scores, and ground-truth labels.
        """
        eval_input, eval_label = self._get_input_label_from_iter(eval_batch_data)
        imp_index = eval_batch_data["impression_index_batch"]

        pred_rslt = self.scorer.predict_on_batch(eval_input)

        return pred_rslt, eval_label, imp_index

    def fit(
        self,
        train_news_file,
        train_behaviors_file,
        valid_news_file,
        valid_behaviors_file,
        test_news_file=None,
        test_behaviors_file=None,
    ):
        """Fit the model with train_file. Evaluate the model on valid_file per epoch to observe the training status.
        If test_news_file is not None, evaluate it too.

        Args:
            train_file (str): training data set.
            valid_file (str): validation set.
            test_news_file (str): test set.

        Returns:
            obj: An instance of self.
        """
        
        train_losses=[]
        val_losses=[]
        val_result=[]
        for epoch in range(1, self.hparams.epochs + 1):
            step = 0
            self.hparams.current_epoch = epoch
            epoch_loss = 0
            train_start = time.time()

            tqdm_util = tqdm(
                self.train_iterator.load_data_from_file(
                    train_news_file, train_behaviors_file
                )
            )

            for batch_data_input in tqdm_util:

                step_result = self.train(batch_data_input)
                step_data_loss = step_result

                epoch_loss += step_data_loss
                step += 1
                if step % self.hparams.show_step == 0:
                    tqdm_util.set_description(
                        "step {0:d} , total_loss: {1:.4f}, data_loss: {2:.4f}".format(
                            step, epoch_loss / step, step_data_loss
                        )
                    )
                    
            train_losses.append(epoch_loss/step)
            train_end = time.time()
            train_time = train_end - train_start

            eval_start = time.time()

            train_info = ",".join(
                [
                    str(item[0]) + ":" + str(item[1])
                    for item in [("logloss loss", epoch_loss / step)]
                ]
            )

            eval_res = self.run_eval(valid_news_file, valid_behaviors_file)
            
            val_result.append(eval_res)
            
            eval_info = ", ".join(
                [
                    str(item[0]) + ":" + str(item[1])
                    for item in sorted(eval_res.items(), key=lambda x: x[0])
                ]
            )
            if test_news_file is not None:
                test_res = self.run_eval(test_news_file, test_behaviors_file)
                test_info = ", ".join(
                    [
                        str(item[0]) + ":" + str(item[1])
                        for item in sorted(test_res.items(), key=lambda x: x[0])
                    ]
                )
            eval_end = time.time()
            eval_time = eval_end - eval_start

            if test_news_file is not None:
                print(
                    "at epoch {0:d}".format(epoch)
                    + "\ntrain info: "
                    + train_info
                    + "\neval info: "
                    + eval_info
                    + "\ntest info: "
                    + test_info
                )
            else:
                print(
                    "at epoch {0:d}".format(epoch)
                    + "\ntrain info: "
                    + train_info
                    + "\neval info: "
                    + eval_info
                )
            print(
                "at epoch {0:d} , train time: {1:.1f} eval time: {2:.1f}".format(
                    epoch, train_time, eval_time
                )
            )
            
            if epoch%5==0:
                self.model.save_weights(os.path.join('data/model_newlr', "lstur_lr3e-4_{}".format(epoch)))
        return train_losses, val_result

    def group_labels(self, labels, preds, group_keys):
        """Devide labels and preds into several group according to values in group keys.

        Args:
            labels (list): ground truth label list.
            preds (list): prediction score list.
            group_keys (list): group key list.

        Returns:
            all_labels: labels after group.
            all_preds: preds after group.

        """

        all_keys = list(set(group_keys))
        all_keys.sort()
        group_labels = {k: [] for k in all_keys}
        group_preds = {k: [] for k in all_keys}

        for l, p, k in zip(labels, preds, group_keys):
            group_labels[k].append(l)
            group_preds[k].append(p)

        all_labels = []
        all_preds = []
        for k in all_keys:
            all_labels.append(group_labels[k])
            all_preds.append(group_preds[k])

        return all_keys, all_labels, all_preds

    def run_eval(self, news_filename, behaviors_file):
        """Evaluate the given file and returns some evaluation metrics.

        Args:
            filename (str): A file name that will be evaluated.

        Returns:
            dict: A dictionary contains evaluation metrics.
        """

        if self.support_quick_scoring:
            _, group_labels, group_preds = self.run_fast_eval(
                news_filename, behaviors_file
            )
        else:
            _, group_labels, group_preds = self.run_slow_eval(
                news_filename, behaviors_file
            )
        res = cal_metric(group_labels, group_preds, self.hparams.metrics)

        return res

    def user(self, batch_user_input):
        user_input = self._get_user_feature_from_iter(batch_user_input)
        user_vec = self.userencoder.predict_on_batch(user_input)
        user_index = batch_user_input["impr_index_batch"]

        return user_index, user_vec

    def news(self, batch_news_input):
        news_input = self._get_news_feature_from_iter(batch_news_input)
        news_vec = self.newsencoder.predict_on_batch(news_input)
        news_index = batch_news_input["news_index_batch"]

        return news_index, news_vec

    def run_user(self, news_filename, behaviors_file):
        if not hasattr(self, "userencoder"):
            raise ValueError("model must have attribute userencoder")

        user_indexes = []
        user_vecs = []
        for batch_data_input in tqdm(
            self.test_iterator.load_user_from_file(news_filename, behaviors_file)
        ):
            user_index, user_vec = self.user(batch_data_input)
            user_indexes.extend(np.reshape(user_index, -1))
            user_vecs.extend(user_vec)

        return dict(zip(user_indexes, user_vecs))

    def run_news(self, news_filename):
        if not hasattr(self, "newsencoder"):
            raise ValueError("model must have attribute newsencoder")

        news_indexes = []
        news_vecs = []
        for batch_data_input in tqdm(
            self.test_iterator.load_news_from_file(news_filename)
        ):
            news_index, news_vec = self.news(batch_data_input)
            news_indexes.extend(np.reshape(news_index, -1))
            news_vecs.extend(news_vec)

        return dict(zip(news_indexes, news_vecs))

    def run_slow_eval(self, news_filename, behaviors_file):
        preds = []
        labels = []
        imp_indexes = []

        for batch_data_input in tqdm(
            self.test_iterator.load_data_from_file(news_filename, behaviors_file)
        ):
            step_pred, step_labels, step_imp_index = self.eval(batch_data_input)
            preds.extend(np.reshape(step_pred, -1))
            labels.extend(np.reshape(step_labels, -1))
            imp_indexes.extend(np.reshape(step_imp_index, -1))

        group_impr_indexes, group_labels, group_preds = self.group_labels(
            labels, preds, imp_indexes
        )
        return group_impr_indexes, group_labels, group_preds

    def run_fast_eval(self, news_filename, behaviors_file):
        news_vecs = self.run_news(news_filename)
        user_vecs = self.run_user(news_filename, behaviors_file)

        self.news_vecs = news_vecs
        self.user_vecs = user_vecs

        group_impr_indexes = []
        group_labels = []
        group_preds = []

        for (
            impr_index,
            news_index,
            user_index,
            label,
        ) in tqdm(self.test_iterator.load_impression_from_file(behaviors_file)):
            pred = np.dot(
                np.stack([news_vecs[i] for i in news_index], axis=0),
                user_vecs[impr_index],
            )
            group_impr_indexes.append(impr_index)
            group_labels.append(label)
            group_preds.append(pred)

        return group_impr_indexes, group_labels, group_preds




In [2]:
class LSTURModel(BaseModel):
    """LSTUR model(Neural News Recommendation with Multi-Head Self-Attention)

    Mingxiao An, Fangzhao Wu, Chuhan Wu, Kun Zhang, Zheng Liu and Xing Xie: 
    Neural News Recommendation with Long- and Short-term User Representations, ACL 2019

    Attributes:
        word2vec_embedding (numpy.array): Pretrained word embedding matrix.
        hparam (obj): Global hyper-parameters.
    """

    def __init__(self, hparams, iterator_creator, seed=None):
        """Initialization steps for LSTUR.
        Compared with the BaseModel, LSTUR need word embedding.
        After creating word embedding matrix, BaseModel's __init__ method will be called.
        
        Args:
            hparams (obj): Global hyper-parameters. Some key setttings such as type and gru_unit are there.
            iterator_creator_train(obj): LSTUR data loader class for train data.
            iterator_creator_test(obj): LSTUR data loader class for test and validation data
        """

        self.word2vec_embedding = self._init_embedding(hparams.wordEmb_file)
        self.hparam = hparams

        super().__init__(hparams, iterator_creator, seed=seed)

    def _get_input_label_from_iter(self, batch_data):
        input_feat = [
            batch_data["user_index_batch"],
            batch_data["clicked_title_batch"],
            batch_data["candidate_title_batch"],
        ]
        input_label = batch_data["labels"]
        return input_feat, input_label

    def _get_user_feature_from_iter(self, batch_data):
        return [batch_data["clicked_title_batch"], batch_data["user_index_batch"]]

    def _get_news_feature_from_iter(self, batch_data):
        return batch_data["candidate_title_batch"]

    def _build_graph(self):
        """Build LSTUR model and scorer.

        Returns:
            obj: a model used to train.
            obj: a model used to evaluate and inference.
        """

        model, scorer = self._build_lstur()
        return model, scorer

    def _build_userencoder(self, titleencoder, type="ini"):
        """The main function to create user encoder of LSTUR.

        Args:
            titleencoder(obj): the news encoder of LSTUR. 

        Return:
            obj: the user encoder of LSTUR.
        """
        hparams = self.hparams
        his_input_title = keras.Input(
            shape=(hparams.his_size, hparams.title_size), dtype="int32"
        )
        user_indexes = keras.Input(shape=(1,), dtype="int32")

        user_embedding_layer = layers.Embedding(
            len(self.train_iterator.uid2index),
            hparams.gru_unit,
            trainable=True,
            embeddings_initializer="zeros",
        )

        long_u_emb = layers.Reshape((hparams.gru_unit,))(
            user_embedding_layer(user_indexes)
        )
        click_title_presents = layers.TimeDistributed(titleencoder)(his_input_title)

        if type == "ini":
            user_present = layers.GRU(
                hparams.gru_unit,
                kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                bias_initializer=keras.initializers.Zeros(),
            )(
                layers.Masking(mask_value=0.0)(click_title_presents),
                initial_state=[long_u_emb],
            )
        elif type == "con":
            short_uemb = layers.GRU(
                hparams.gru_unit,
                kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                recurrent_initializer=keras.initializers.glorot_uniform(seed=self.seed),
                bias_initializer=keras.initializers.Zeros(),
            )(layers.Masking(mask_value=0.0)(click_title_presents))

            user_present = layers.Concatenate()([short_uemb, long_u_emb])
            user_present = layers.Dense(
                hparams.gru_unit,
                bias_initializer=keras.initializers.Zeros(),
                kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
            )(user_present)

        model = keras.Model(
            [his_input_title, user_indexes], user_present, name="user_encoder"
        )
        return model

    def _build_newsencoder(self, embedding_layer):
        """The main function to create news encoder of LSTUR.

        Args:
            embedding_layer(obj): a word embedding layer.
        
        Return:
            obj: the news encoder of LSTUR.
        """
        hparams = self.hparams
        sequences_input_title = keras.Input(shape=(hparams.title_size,), dtype="int32")
        embedded_sequences_title = embedding_layer(sequences_input_title)

        y = layers.Dropout(hparams.dropout)(embedded_sequences_title)
        y = layers.Conv1D(
            hparams.filter_num,
            hparams.window_size,
            activation=hparams.cnn_activation,
            padding="same",
            bias_initializer=keras.initializers.Zeros(),
            kernel_initializer=keras.initializers.glorot_uniform(seed=self.seed),
        )(y)
        print(y)
        y = layers.Dropout(hparams.dropout)(y)
        y = layers.Masking()(
            OverwriteMasking()([y, ComputeMasking()(sequences_input_title)])
        )
        pred_title = AttLayer2(hparams.attention_hidden_dim, seed=self.seed)(y)
        print(pred_title)
        model = keras.Model(sequences_input_title, pred_title, name="news_encoder")
        return model

    def _build_lstur(self):
        """The main function to create LSTUR's logic. The core of LSTUR
        is a user encoder and a news encoder.
        
        Returns:
            obj: a model used to train.
            obj: a model used to evaluate and inference.
        """
        hparams = self.hparams

        his_input_title = keras.Input(
            shape=(hparams.his_size, hparams.title_size), dtype="int32"
        )
        pred_input_title = keras.Input(
            shape=(hparams.npratio + 1, hparams.title_size), dtype="int32"
        )
        pred_input_title_one = keras.Input(
            shape=(1, hparams.title_size,), dtype="int32"
        )
        pred_title_reshape = layers.Reshape((hparams.title_size,))(pred_input_title_one)
        user_indexes = keras.Input(shape=(1,), dtype="int32")

        embedding_layer = layers.Embedding(
            self.word2vec_embedding.shape[0],
            hparams.word_emb_dim,
            weights=[self.word2vec_embedding],
            trainable=True,
        )

        titleencoder = self._build_newsencoder(embedding_layer)
        self.userencoder = self._build_userencoder(titleencoder, type=hparams.type)
        self.newsencoder = titleencoder

        user_present = self.userencoder([his_input_title, user_indexes])
        news_present = layers.TimeDistributed(self.newsencoder)(pred_input_title)
        news_present_one = self.newsencoder(pred_title_reshape)

        preds = layers.Dot(axes=-1)([news_present, user_present])
        preds = layers.Activation(activation="softmax")(preds)

        pred_one = layers.Dot(axes=-1)([news_present_one, user_present])
        pred_one = layers.Activation(activation="sigmoid")(pred_one)

        model = keras.Model([user_indexes, his_input_title, pred_input_title], preds)
        scorer = keras.Model(
            [user_indexes, his_input_title, pred_input_title_one], pred_one
        )

        return model, scorer


In [3]:
import sys
sys.path.append("../../")
import os
import numpy as np
import zipfile
from tqdm import tqdm
# import scrapbook as sb
from tempfile import TemporaryDirectory
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from utils import download_deeprec_resources 
from utils import prepare_hparams
from utils import get_mind_data_set
from iterator import MINDIterator

tf.__version__

'1.15.4'

In [4]:
epochs = 50
seed = 40
batch_size = 32

# Options: demo, small, large
MIND_type = 'large'

In [5]:
# tmpdir = TemporaryDirectory()
# data_path = tmpdir.name
data_path='data'


train_news_file = os.path.join(data_path, 'train', r'news.tsv')
train_behaviors_file = os.path.join(data_path, 'train', r'behaviors.tsv')
valid_news_file = os.path.join(data_path, 'valid', r'news.tsv')
valid_behaviors_file = os.path.join(data_path, 'valid', r'behaviors.tsv')
wordEmb_file = os.path.join(data_path, "utils", "embedding_all.npy")
userDict_file = os.path.join(data_path, "utils", "uid2index.pkl")
wordDict_file = os.path.join(data_path, "utils", "word_dict_all.pkl")
yaml_file = os.path.join(data_path, "utils", r'lstur.yaml')

mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(MIND_type)


In [6]:
hparams = prepare_hparams(yaml_file, 
                          wordEmb_file=wordEmb_file,
                          wordDict_file=wordDict_file, 
                          userDict_file=userDict_file,
                          batch_size=batch_size,
                          epochs=epochs)

hparams.learning_rate=3e-4
print(hparams)

data_format=news,iterator_type=None,support_quick_scoring=True,wordEmb_file=data/utils/embedding_all.npy,wordDict_file=data/utils/word_dict_all.pkl,userDict_file=data/utils/uid2index.pkl,vertDict_file=None,subvertDict_file=None,title_size=30,body_size=None,word_emb_dim=300,word_size=None,user_num=None,vert_num=None,subvert_num=None,his_size=50,npratio=4,dropout=0.2,attention_hidden_dim=200,head_num=4,head_dim=100,cnn_activation=relu,dense_activation=None,filter_num=400,window_size=3,vert_emb_dim=100,subvert_emb_dim=100,gru_unit=400,type=ini,user_emb_dim=50,learning_rate=0.0003,loss=cross_entropy_loss,optimizer=adam,epochs=50,batch_size=32,show_step=100000,metrics=['group_auc', 'mean_mrr', 'ndcg@5;10']


In [7]:
iterator = MINDIterator
model = LSTURModel(hparams, iterator, seed=seed)

Tensor("conv1d/Relu:0", shape=(?, 30, 400), dtype=float32)
Tensor("att_layer2/Sum_1:0", shape=(?, 400), dtype=float32)


In [8]:
%%time
train_results=model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file)

1086it [03:15,  5.54it/s]
586it [00:02, 238.23it/s]
236it [00:10, 21.47it/s]
7538it [00:02, 2562.42it/s]
1it [00:00,  5.58it/s]

at epoch 1
train info: logloss loss:1.4713537685339841
eval info: group_auc:0.6127, mean_mrr:0.2745, ndcg@10:0.3648, ndcg@5:0.3004
at epoch 1 , train time: 195.9 eval time: 25.0


1086it [03:02,  5.97it/s]
586it [00:01, 356.59it/s]
236it [00:09, 24.01it/s]
7538it [00:02, 2848.88it/s]
1it [00:00,  5.84it/s]

at epoch 2
train info: logloss loss:1.367838222732421
eval info: group_auc:0.6317, mean_mrr:0.2906, ndcg@10:0.3801, ndcg@5:0.3175
at epoch 2 , train time: 182.0 eval time: 23.7


1086it [03:01,  5.98it/s]
586it [00:01, 344.45it/s]
236it [00:09, 23.85it/s]
7538it [00:02, 2871.67it/s]
1it [00:00,  5.73it/s]

at epoch 3
train info: logloss loss:1.2906187642125575
eval info: group_auc:0.6467, mean_mrr:0.2962, ndcg@10:0.391, ndcg@5:0.3271
at epoch 3 , train time: 181.6 eval time: 22.6


1086it [03:01,  5.97it/s]
586it [00:01, 345.79it/s]
236it [00:09, 23.97it/s]
7538it [00:02, 3391.10it/s]
1it [00:00,  6.06it/s]

at epoch 4
train info: logloss loss:1.2036339394185644
eval info: group_auc:0.6268, mean_mrr:0.2839, ndcg@10:0.3751, ndcg@5:0.3095
at epoch 4 , train time: 182.0 eval time: 23.1


1086it [02:59,  6.06it/s]
586it [00:01, 329.14it/s]
236it [00:09, 24.09it/s]
7538it [00:02, 3374.43it/s]


at epoch 5
train info: logloss loss:1.1289678005223776
eval info: group_auc:0.6394, mean_mrr:0.2955, ndcg@10:0.3883, ndcg@5:0.3205
at epoch 5 , train time: 179.1 eval time: 23.3


1086it [02:59,  6.06it/s]
586it [00:01, 320.87it/s]
236it [00:09, 24.05it/s]
7538it [00:02, 3243.25it/s]
1it [00:00,  5.65it/s]

at epoch 6
train info: logloss loss:1.0540410205584643
eval info: group_auc:0.6336, mean_mrr:0.2918, ndcg@10:0.3832, ndcg@5:0.3159
at epoch 6 , train time: 179.3 eval time: 22.8


1086it [02:57,  6.10it/s]
586it [00:01, 320.82it/s]
236it [00:09, 24.39it/s]
7538it [00:02, 3294.87it/s]
1it [00:00,  6.06it/s]

at epoch 7
train info: logloss loss:0.9804196330833611
eval info: group_auc:0.6294, mean_mrr:0.2893, ndcg@10:0.3814, ndcg@5:0.3162
at epoch 7 , train time: 178.0 eval time: 22.8


1086it [02:59,  6.04it/s]
586it [00:01, 364.29it/s]
236it [00:09, 23.96it/s]
7538it [00:02, 2705.33it/s]
1it [00:00,  5.60it/s]

at epoch 8
train info: logloss loss:0.8981508883488113
eval info: group_auc:0.6237, mean_mrr:0.2838, ndcg@10:0.3722, ndcg@5:0.3067
at epoch 8 , train time: 179.8 eval time: 23.3


1086it [03:00,  6.03it/s]
586it [00:01, 326.32it/s]
236it [00:09, 24.31it/s]
7538it [00:01, 3943.09it/s]
1it [00:00,  6.03it/s]

at epoch 9
train info: logloss loss:0.8255947938610836
eval info: group_auc:0.6358, mean_mrr:0.2936, ndcg@10:0.385, ndcg@5:0.32
at epoch 9 , train time: 180.0 eval time: 22.3


1086it [02:57,  6.12it/s]
586it [00:01, 350.01it/s]
236it [00:09, 24.19it/s]
7538it [00:01, 3981.74it/s]


at epoch 10
train info: logloss loss:0.7465287481004143
eval info: group_auc:0.6298, mean_mrr:0.2875, ndcg@10:0.3785, ndcg@5:0.3157
at epoch 10 , train time: 177.4 eval time: 22.3


1086it [02:56,  6.15it/s]
586it [00:01, 323.66it/s]
236it [00:09, 24.43it/s]
7538it [00:01, 4041.43it/s]
1it [00:00,  5.93it/s]

at epoch 11
train info: logloss loss:0.6813034349657993
eval info: group_auc:0.6234, mean_mrr:0.2829, ndcg@10:0.3717, ndcg@5:0.3079
at epoch 11 , train time: 176.7 eval time: 22.3


1086it [02:57,  6.12it/s]
586it [00:01, 309.30it/s]
236it [00:09, 24.31it/s]
7538it [00:01, 4011.44it/s]
1it [00:00,  5.56it/s]

at epoch 12
train info: logloss loss:0.6245043806827749
eval info: group_auc:0.6234, mean_mrr:0.2851, ndcg@10:0.3741, ndcg@5:0.3083
at epoch 12 , train time: 177.5 eval time: 22.3


1086it [02:57,  6.12it/s]
586it [00:01, 339.47it/s]
236it [00:09, 24.28it/s]
7538it [00:01, 3916.82it/s]
1it [00:00,  6.26it/s]

at epoch 13
train info: logloss loss:0.56537083806693
eval info: group_auc:0.619, mean_mrr:0.284, ndcg@10:0.373, ndcg@5:0.3075
at epoch 13 , train time: 177.5 eval time: 22.2


1086it [03:00,  6.02it/s]
586it [00:01, 323.16it/s]
236it [00:09, 23.89it/s]
7538it [00:02, 2778.86it/s]
1it [00:00,  6.14it/s]

at epoch 14
train info: logloss loss:0.5183553358687442
eval info: group_auc:0.6252, mean_mrr:0.2859, ndcg@10:0.3756, ndcg@5:0.311
at epoch 14 , train time: 180.3 eval time: 22.8


1086it [03:01,  5.98it/s]
586it [00:01, 331.55it/s]
236it [00:09, 23.92it/s]
7538it [00:01, 4421.24it/s]


at epoch 15
train info: logloss loss:0.47882760726395673
eval info: group_auc:0.6263, mean_mrr:0.2857, ndcg@10:0.3738, ndcg@5:0.31
at epoch 15 , train time: 181.6 eval time: 22.4


1086it [03:01,  5.99it/s]
586it [00:01, 337.95it/s]
236it [00:09, 23.98it/s]
7538it [00:02, 2914.47it/s]
1it [00:00,  5.47it/s]

at epoch 16
train info: logloss loss:0.4328786233031838
eval info: group_auc:0.6208, mean_mrr:0.2839, ndcg@10:0.3734, ndcg@5:0.3084
at epoch 16 , train time: 181.3 eval time: 22.7


1086it [02:58,  6.07it/s]
586it [00:01, 323.23it/s]
236it [00:09, 24.01it/s]
7538it [00:02, 3426.91it/s]
1it [00:00,  6.13it/s]

at epoch 17
train info: logloss loss:0.3988796986200423
eval info: group_auc:0.6233, mean_mrr:0.2881, ndcg@10:0.3765, ndcg@5:0.3128
at epoch 17 , train time: 178.8 eval time: 23.2


1086it [02:59,  6.06it/s]
586it [00:01, 340.22it/s]
236it [00:09, 24.12it/s]
7538it [00:02, 3576.28it/s]
1it [00:00,  5.72it/s]

at epoch 18
train info: logloss loss:0.37780462335074805
eval info: group_auc:0.6237, mean_mrr:0.2873, ndcg@10:0.3748, ndcg@5:0.3121
at epoch 18 , train time: 179.1 eval time: 22.6


1086it [03:01,  6.00it/s]
586it [00:01, 341.82it/s]
236it [00:09, 24.10it/s]
7538it [00:02, 2748.08it/s]
1it [00:00,  6.07it/s]

at epoch 19
train info: logloss loss:0.34994708079138076
eval info: group_auc:0.6198, mean_mrr:0.285, ndcg@10:0.3727, ndcg@5:0.3091
at epoch 19 , train time: 181.0 eval time: 22.6


1086it [03:01,  5.99it/s]
586it [00:01, 330.71it/s]
236it [00:09, 23.76it/s]
7538it [00:01, 3867.75it/s]


at epoch 20
train info: logloss loss:0.32976444670673233
eval info: group_auc:0.6204, mean_mrr:0.2862, ndcg@10:0.3743, ndcg@5:0.3111
at epoch 20 , train time: 181.4 eval time: 23.1


1086it [03:02,  5.95it/s]
586it [00:01, 421.74it/s]
236it [00:09, 24.29it/s]
7538it [00:02, 2613.10it/s]
1it [00:00,  5.87it/s]

at epoch 21
train info: logloss loss:0.30749534936981965
eval info: group_auc:0.622, mean_mrr:0.2844, ndcg@10:0.3747, ndcg@5:0.3097
at epoch 21 , train time: 182.4 eval time: 22.9


1086it [03:02,  5.94it/s]
586it [00:01, 452.06it/s]
236it [00:09, 23.94it/s]
7538it [00:02, 2648.04it/s]
1it [00:00,  5.81it/s]

at epoch 22
train info: logloss loss:0.28618806476051306
eval info: group_auc:0.6202, mean_mrr:0.2859, ndcg@10:0.3739, ndcg@5:0.3105
at epoch 22 , train time: 182.7 eval time: 23.8


1086it [03:04,  5.88it/s]
586it [00:01, 372.02it/s]
236it [00:09, 24.13it/s]
7538it [00:03, 2426.30it/s]
1it [00:00,  5.75it/s]

at epoch 23
train info: logloss loss:0.2790450444084834
eval info: group_auc:0.6172, mean_mrr:0.2826, ndcg@10:0.3697, ndcg@5:0.3078
at epoch 23 , train time: 184.7 eval time: 24.9


1086it [03:04,  5.87it/s]
586it [00:01, 346.72it/s]
236it [00:10, 23.13it/s]
7538it [00:03, 2048.00it/s]
1it [00:00,  5.43it/s]

at epoch 24
train info: logloss loss:0.26224854398813807
eval info: group_auc:0.6184, mean_mrr:0.2824, ndcg@10:0.3708, ndcg@5:0.3062
at epoch 24 , train time: 185.0 eval time: 25.9


1086it [03:04,  5.88it/s]
586it [00:01, 346.20it/s]
236it [00:10, 23.36it/s]
7538it [00:03, 2016.75it/s]


at epoch 25
train info: logloss loss:0.2517746432373921
eval info: group_auc:0.6149, mean_mrr:0.2824, ndcg@10:0.3703, ndcg@5:0.3058
at epoch 25 , train time: 184.6 eval time: 26.1


1086it [03:05,  5.86it/s]
586it [00:01, 356.53it/s]
236it [00:10, 23.29it/s]
7538it [00:04, 1724.07it/s]
1it [00:00,  5.45it/s]

at epoch 26
train info: logloss loss:0.24468194278948435
eval info: group_auc:0.6259, mean_mrr:0.2885, ndcg@10:0.3778, ndcg@5:0.3142
at epoch 26 , train time: 185.3 eval time: 26.8


1086it [03:06,  5.84it/s]
586it [00:01, 357.63it/s]
236it [00:09, 23.88it/s]
7538it [00:04, 1650.18it/s]
1it [00:00,  5.32it/s]

at epoch 27
train info: logloss loss:0.22973876987613093
eval info: group_auc:0.6197, mean_mrr:0.2858, ndcg@10:0.3741, ndcg@5:0.3108
at epoch 27 , train time: 186.1 eval time: 26.9


1086it [03:06,  5.83it/s]
586it [00:01, 360.35it/s]
236it [00:10, 23.37it/s]
7538it [00:03, 2025.21it/s]
1it [00:00,  5.77it/s]

at epoch 28
train info: logloss loss:0.22270132720806596
eval info: group_auc:0.6231, mean_mrr:0.2882, ndcg@10:0.3773, ndcg@5:0.3138
at epoch 28 , train time: 186.2 eval time: 26.1


1086it [03:05,  5.85it/s]
586it [00:01, 331.22it/s]
236it [00:09, 24.04it/s]
7538it [00:02, 2978.77it/s]
1it [00:00,  5.93it/s]

at epoch 29
train info: logloss loss:0.21212992040032236
eval info: group_auc:0.6204, mean_mrr:0.2868, ndcg@10:0.3753, ndcg@5:0.3117
at epoch 29 , train time: 185.7 eval time: 22.9


1086it [03:04,  5.89it/s]
586it [00:01, 351.47it/s]
236it [00:10, 23.47it/s]
7538it [00:03, 1967.92it/s]


at epoch 30
train info: logloss loss:0.2071587186860706
eval info: group_auc:0.6181, mean_mrr:0.2829, ndcg@10:0.3708, ndcg@5:0.3074
at epoch 30 , train time: 184.3 eval time: 25.9


1086it [03:04,  5.89it/s]
586it [00:01, 389.35it/s]
236it [00:10, 23.48it/s]
7538it [00:03, 1909.60it/s]
1it [00:00,  5.83it/s]

at epoch 31
train info: logloss loss:0.20428568221848203
eval info: group_auc:0.6187, mean_mrr:0.2863, ndcg@10:0.3736, ndcg@5:0.3118
at epoch 31 , train time: 184.4 eval time: 26.3


1086it [03:04,  5.89it/s]
586it [00:01, 381.83it/s]
236it [00:10, 23.40it/s]
7538it [00:03, 1944.24it/s]
1it [00:00,  5.81it/s]

at epoch 32
train info: logloss loss:0.19049582071602345
eval info: group_auc:0.6225, mean_mrr:0.2882, ndcg@10:0.3764, ndcg@5:0.3136
at epoch 32 , train time: 184.5 eval time: 26.3


1086it [03:05,  5.87it/s]
586it [00:01, 397.72it/s]
236it [00:10, 23.49it/s]
7538it [00:03, 2055.53it/s]
1it [00:00,  5.41it/s]

at epoch 33
train info: logloss loss:0.18443211014030936
eval info: group_auc:0.6171, mean_mrr:0.2829, ndcg@10:0.3705, ndcg@5:0.3074
at epoch 33 , train time: 185.1 eval time: 25.9


1086it [03:05,  5.86it/s]
586it [00:01, 382.01it/s]
236it [00:09, 23.72it/s]
7538it [00:04, 1621.58it/s]
1it [00:00,  5.12it/s]

at epoch 34
train info: logloss loss:0.18468079963205022
eval info: group_auc:0.6182, mean_mrr:0.2854, ndcg@10:0.372, ndcg@5:0.3112
at epoch 34 , train time: 185.2 eval time: 27.1


1086it [03:05,  5.87it/s]
586it [00:01, 395.24it/s]
236it [00:09, 24.08it/s]
7538it [00:02, 2761.24it/s]


at epoch 35
train info: logloss loss:0.1782173028251784
eval info: group_auc:0.6211, mean_mrr:0.2866, ndcg@10:0.3743, ndcg@5:0.3129
at epoch 35 , train time: 185.2 eval time: 24.6


1086it [03:03,  5.92it/s]
586it [00:01, 380.34it/s]
236it [00:10, 23.47it/s]
7538it [00:03, 1922.11it/s]
1it [00:00,  5.39it/s]

at epoch 36
train info: logloss loss:0.17686548285807172
eval info: group_auc:0.6197, mean_mrr:0.2843, ndcg@10:0.3732, ndcg@5:0.3081
at epoch 36 , train time: 183.5 eval time: 26.2


1086it [03:03,  5.91it/s]
586it [00:01, 353.43it/s]
236it [00:09, 23.91it/s]
7538it [00:03, 2422.31it/s]
1it [00:00,  6.17it/s]

at epoch 37
train info: logloss loss:0.1688809787261113
eval info: group_auc:0.617, mean_mrr:0.2822, ndcg@10:0.3698, ndcg@5:0.3091
at epoch 37 , train time: 183.9 eval time: 24.7


1086it [03:05,  5.85it/s]
586it [00:01, 351.15it/s]
236it [00:10, 23.24it/s]
7538it [00:04, 1684.00it/s]
1it [00:00,  5.53it/s]

at epoch 38
train info: logloss loss:0.1691346126625702
eval info: group_auc:0.6164, mean_mrr:0.2824, ndcg@10:0.3709, ndcg@5:0.3067
at epoch 38 , train time: 185.7 eval time: 27.0


1086it [03:06,  5.82it/s]
586it [00:01, 368.56it/s]
236it [00:10, 23.26it/s]
7538it [00:04, 1790.62it/s]
1it [00:00,  5.70it/s]

at epoch 39
train info: logloss loss:0.1651448489657327
eval info: group_auc:0.6203, mean_mrr:0.2879, ndcg@10:0.3751, ndcg@5:0.3126
at epoch 39 , train time: 186.8 eval time: 25.6


1086it [03:05,  5.87it/s]
586it [00:01, 340.10it/s]
236it [00:09, 23.67it/s]
7538it [00:03, 2336.89it/s]


at epoch 40
train info: logloss loss:0.16401585997262383
eval info: group_auc:0.6174, mean_mrr:0.283, ndcg@10:0.3699, ndcg@5:0.3071
at epoch 40 , train time: 185.1 eval time: 24.2


1086it [03:02,  5.97it/s]
586it [00:01, 426.19it/s]
236it [00:09, 23.67it/s]
7538it [00:03, 1980.34it/s]
1it [00:00,  5.31it/s]

at epoch 41
train info: logloss loss:0.15777124081091684
eval info: group_auc:0.6203, mean_mrr:0.2858, ndcg@10:0.3747, ndcg@5:0.3113
at epoch 41 , train time: 182.0 eval time: 25.7


1086it [03:01,  5.98it/s]
586it [00:01, 344.02it/s]
236it [00:09, 23.88it/s]
7538it [00:03, 2426.74it/s]
1it [00:00,  5.71it/s]

at epoch 42
train info: logloss loss:0.15856884594090817
eval info: group_auc:0.6192, mean_mrr:0.2878, ndcg@10:0.3755, ndcg@5:0.3138
at epoch 42 , train time: 181.6 eval time: 25.0


1086it [03:03,  5.91it/s]
586it [00:01, 342.13it/s]
236it [00:09, 23.94it/s]
7538it [00:03, 2106.40it/s]
1it [00:00,  5.72it/s]

at epoch 43
train info: logloss loss:0.15467934830982108
eval info: group_auc:0.6206, mean_mrr:0.2872, ndcg@10:0.3743, ndcg@5:0.3121
at epoch 43 , train time: 183.9 eval time: 23.6


1086it [03:03,  5.91it/s]
586it [00:01, 342.43it/s]
236it [00:09, 23.70it/s]
7538it [00:03, 2457.76it/s]
1it [00:00,  5.88it/s]

at epoch 44
train info: logloss loss:0.1462821456797517
eval info: group_auc:0.6176, mean_mrr:0.2854, ndcg@10:0.3728, ndcg@5:0.3099
at epoch 44 , train time: 183.8 eval time: 23.4


1086it [03:03,  5.92it/s]
586it [00:01, 371.48it/s]
236it [00:09, 23.67it/s]
7538it [00:03, 2431.88it/s]


at epoch 45
train info: logloss loss:0.14724512847524623
eval info: group_auc:0.6168, mean_mrr:0.2835, ndcg@10:0.3704, ndcg@5:0.306
at epoch 45 , train time: 183.4 eval time: 23.9


1086it [03:04,  5.89it/s]
586it [00:01, 351.94it/s]
236it [00:09, 23.84it/s]
7538it [00:02, 2857.52it/s]
1it [00:00,  5.47it/s]

at epoch 46
train info: logloss loss:0.1438441412895575
eval info: group_auc:0.613, mean_mrr:0.2819, ndcg@10:0.3671, ndcg@5:0.3042
at epoch 46 , train time: 184.2 eval time: 24.3


1086it [03:03,  5.91it/s]
586it [00:01, 354.01it/s]
236it [00:09, 23.89it/s]
7538it [00:03, 2451.27it/s]
1it [00:00,  5.89it/s]

at epoch 47
train info: logloss loss:0.14403545845005813
eval info: group_auc:0.619, mean_mrr:0.2851, ndcg@10:0.373, ndcg@5:0.3088
at epoch 47 , train time: 183.9 eval time: 23.1


1086it [03:04,  5.90it/s]
586it [00:01, 358.19it/s]
236it [00:09, 24.05it/s]
7538it [00:03, 2300.31it/s]
1it [00:00,  5.46it/s]

at epoch 48
train info: logloss loss:0.14103643729910598
eval info: group_auc:0.6136, mean_mrr:0.2817, ndcg@10:0.3679, ndcg@5:0.3048
at epoch 48 , train time: 184.1 eval time: 25.4


1086it [03:03,  5.92it/s]
586it [00:01, 384.29it/s]
236it [00:09, 23.88it/s]
7538it [00:03, 2083.72it/s]
1it [00:00,  5.41it/s]

at epoch 49
train info: logloss loss:0.1400783404040718
eval info: group_auc:0.6138, mean_mrr:0.2801, ndcg@10:0.3669, ndcg@5:0.3026
at epoch 49 , train time: 183.4 eval time: 25.6


1086it [03:03,  5.92it/s]
586it [00:01, 361.17it/s]
236it [00:09, 23.82it/s]
7538it [00:03, 2429.76it/s]


at epoch 50
train info: logloss loss:0.14093992197423022
eval info: group_auc:0.6108, mean_mrr:0.2798, ndcg@10:0.3654, ndcg@5:0.3026
at epoch 50 , train time: 183.3 eval time: 25.0
CPU times: user 5h 7min 30s, sys: 1h 12min 13s, total: 6h 19min 43s
Wall time: 2h 52min 37s


In [9]:
%%time
res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
print(res_syn)

586it [00:01, 352.55it/s]
236it [00:09, 23.74it/s]
7538it [00:03, 2280.38it/s]


{'group_auc': 0.6108, 'mean_mrr': 0.2798, 'ndcg@5': 0.3026, 'ndcg@10': 0.3654}
CPU times: user 1min 13s, sys: 1min 1s, total: 2min 15s
Wall time: 24.8 s


In [10]:
with open('lstur_loss_lr3e-4.txt', 'w') as filehandle:
    for listitem in train_results:
        for item in listitem:
            filehandle.write('%s\n' % item)


In [11]:
model_path = os.path.join(data_path, "model_newlr")
os.makedirs(model_path, exist_ok=True)

model.model.save_weights(os.path.join(model_path, "lstur_lr3e-4_50"))