In [1]:
import sys
sys.path.append("../../")
import os
import numpy as np
import zipfile
from tqdm import tqdm
# import scrapbook as sb
from tempfile import TemporaryDirectory
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from utils import download_deeprec_resources 
from utils import prepare_hparams
from utils import get_mind_data_set
from iterator import MINDIterator
tf.__version__

'1.15.4'

In [2]:
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.


from os.path import join
import abc
import time
from tqdm import tqdm

import numpy as np
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import layers
from layer import cal_metric
from layer import SelfAttention

from layer import (
    AttLayer2,
    ComputeMasking,
    OverwriteMasking,
)

class BaseModel:
    """Basic class of models

    Attributes:
        hparams (obj): A tf.contrib.training.HParams object, hold the entire set of hyperparameters.
        iterator_creator_train (obj): An iterator to load the data in training steps.
        iterator_creator_train (obj): An iterator to load the data in testing steps.
        graph (obj): An optional graph.
        seed (int): Random seed.
    """

    def __init__(
        self,
        hparams,
        iterator_creator,
        seed=None,
    ):
        """Initializing the model. Create common logics which are needed by all deeprec models, such as loss function,
        parameter set.

        Args:
            hparams (obj): A tf.contrib.training.HParams object, hold the entire set of hyperparameters.
            iterator_creator_train (obj): An iterator to load the data in training steps.
            iterator_creator_train (obj): An iterator to load the data in testing steps.
            graph (obj): An optional graph.
            seed (int): Random seed.
        """
        self.seed = seed
        tf.compat.v1.set_random_seed(seed)
        np.random.seed(seed)

        self.train_iterator = iterator_creator(
            hparams,
            hparams.npratio,
            col_spliter="\t",
        )
        self.test_iterator = iterator_creator(
            hparams,
            col_spliter="\t",
        )

        self.hparams = hparams
        self.support_quick_scoring = hparams.support_quick_scoring

        # set GPU use with on demand growth
        gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
        sess = tf.compat.v1.Session(
            config=tf.compat.v1.ConfigProto(gpu_options=gpu_options)
        )

        # set this TensorFlow session as the default session for Keras
        tf.compat.v1.keras.backend.set_session(sess)

        # IMPORTANT: models have to be loaded AFTER SETTING THE SESSION for keras!
        # Otherwise, their weights will be unavailable in the threads after the session there has been set
        self.model, self.scorer = self._build_graph()

        self.loss = self._get_loss()
        self.train_optimizer = self._get_opt()

        self.model.compile(loss=self.loss, optimizer=self.train_optimizer)

    def _init_embedding(self, file_path):
        """Load pre-trained embeddings as a constant tensor.

        Args:
            file_path (str): the pre-trained glove embeddings file path.

        Returns:
            np.array: A constant numpy array.
        """

        return np.load(file_path)

    @abc.abstractmethod
    def _build_graph(self):
        """Subclass will implement this."""
        pass

    @abc.abstractmethod
    def _get_input_label_from_iter(self, batch_data):
        """Subclass will implement this"""
        pass

    def _get_loss(self):
        """Make loss function, consists of data loss and regularization loss

        Returns:
            obj: Loss function or loss function name
        """
        if self.hparams.loss == "cross_entropy_loss":
            data_loss = "categorical_crossentropy"
        elif self.hparams.loss == "log_loss":
            data_loss = "binary_crossentropy"
        else:
            raise ValueError("this loss not defined {0}".format(self.hparams.loss))
        return data_loss

    def _get_opt(self):
        """Get the optimizer according to configuration. Usually we will use Adam.
        Returns:
            obj: An optimizer.
        """
        lr = self.hparams.learning_rate
        optimizer = self.hparams.optimizer

        if optimizer == "adam":
            train_opt = keras.optimizers.Adam(lr=lr)

        return train_opt

    def _get_pred(self, logit, task):
        """Make final output as prediction score, according to different tasks.

        Args:
            logit (obj): Base prediction value.
            task (str): A task (values: regression/classification)

        Returns:
            obj: Transformed score
        """
        if task == "regression":
            pred = tf.identity(logit)
        elif task == "classification":
            pred = tf.sigmoid(logit)
        else:
            raise ValueError(
                "method must be regression or classification, but now is {0}".format(
                    task
                )
            )
        return pred

    def train(self, train_batch_data):
        """Go through the optimization step once with training data in feed_dict.

        Args:
            sess (obj): The model session object.
            feed_dict (dict): Feed values to train the model. This is a dictionary that maps graph elements to values.

        Returns:
            list: A list of values, including update operation, total loss, data loss, and merged summary.
        """
        train_input, train_label = self._get_input_label_from_iter(train_batch_data)
        rslt = self.model.train_on_batch(train_input, train_label)
        return rslt

    def eval(self, eval_batch_data):
        """Evaluate the data in feed_dict with current model.

        Args:
            sess (obj): The model session object.
            feed_dict (dict): Feed values for evaluation. This is a dictionary that maps graph elements to values.

        Returns:
            list: A list of evaluated results, including total loss value, data loss value,
                predicted scores, and ground-truth labels.
        """
        eval_input, eval_label = self._get_input_label_from_iter(eval_batch_data)
        imp_index = eval_batch_data["impression_index_batch"]

        pred_rslt = self.scorer.predict_on_batch(eval_input)

        return pred_rslt, eval_label, imp_index

    def fit(
        self,
        train_news_file,
        train_behaviors_file,
        valid_news_file,
        valid_behaviors_file,
        test_news_file=None,
        test_behaviors_file=None,
    ):
        """Fit the model with train_file. Evaluate the model on valid_file per epoch to observe the training status.
        If test_news_file is not None, evaluate it too.

        Args:
            train_file (str): training data set.
            valid_file (str): validation set.
            test_news_file (str): test set.

        Returns:
            obj: An instance of self.
        """
        
        train_losses=[]
        val_losses=[]
        val_result=[]
        for epoch in range(1, self.hparams.epochs + 1):
            step = 0
            self.hparams.current_epoch = epoch
            epoch_loss = 0
            train_start = time.time()

            tqdm_util = tqdm(
                self.train_iterator.load_data_from_file(
                    train_news_file, train_behaviors_file
                )
            )

            for batch_data_input in tqdm_util:

                step_result = self.train(batch_data_input)
                step_data_loss = step_result

                epoch_loss += step_data_loss
                step += 1
                if step % self.hparams.show_step == 0:
                    tqdm_util.set_description(
                        "step {0:d} , total_loss: {1:.4f}, data_loss: {2:.4f}".format(
                            step, epoch_loss / step, step_data_loss
                        )
                    )
                    
            train_losses.append(epoch_loss/step)
            train_end = time.time()
            train_time = train_end - train_start

            eval_start = time.time()

            train_info = ",".join(
                [
                    str(item[0]) + ":" + str(item[1])
                    for item in [("logloss loss", epoch_loss / step)]
                ]
            )

            eval_res = self.run_eval(valid_news_file, valid_behaviors_file)
            
            val_result.append(eval_res)
            
            eval_info = ", ".join(
                [
                    str(item[0]) + ":" + str(item[1])
                    for item in sorted(eval_res.items(), key=lambda x: x[0])
                ]
            )
            if test_news_file is not None:
                test_res = self.run_eval(test_news_file, test_behaviors_file)
                test_info = ", ".join(
                    [
                        str(item[0]) + ":" + str(item[1])
                        for item in sorted(test_res.items(), key=lambda x: x[0])
                    ]
                )
            eval_end = time.time()
            eval_time = eval_end - eval_start

            if test_news_file is not None:
                print(
                    "at epoch {0:d}".format(epoch)
                    + "\ntrain info: "
                    + train_info
                    + "\neval info: "
                    + eval_info
                    + "\ntest info: "
                    + test_info
                )
            else:
                print(
                    "at epoch {0:d}".format(epoch)
                    + "\ntrain info: "
                    + train_info
                    + "\neval info: "
                    + eval_info
                )
            print(
                "at epoch {0:d} , train time: {1:.1f} eval time: {2:.1f}".format(
                    epoch, train_time, eval_time
                )
            )
            
            if epoch%5==0:
                self.model.save_weights(os.path.join('data/model_newlr', "nmrs_3e-4_{}".format(epoch)))
        return train_losses, val_result

    def group_labels(self, labels, preds, group_keys):
        """Devide labels and preds into several group according to values in group keys.

        Args:
            labels (list): ground truth label list.
            preds (list): prediction score list.
            group_keys (list): group key list.

        Returns:
            all_labels: labels after group.
            all_preds: preds after group.

        """

        all_keys = list(set(group_keys))
        all_keys.sort()
        group_labels = {k: [] for k in all_keys}
        group_preds = {k: [] for k in all_keys}

        for l, p, k in zip(labels, preds, group_keys):
            group_labels[k].append(l)
            group_preds[k].append(p)

        all_labels = []
        all_preds = []
        for k in all_keys:
            all_labels.append(group_labels[k])
            all_preds.append(group_preds[k])

        return all_keys, all_labels, all_preds

    def run_eval(self, news_filename, behaviors_file):
        """Evaluate the given file and returns some evaluation metrics.

        Args:
            filename (str): A file name that will be evaluated.

        Returns:
            dict: A dictionary contains evaluation metrics.
        """

        if self.support_quick_scoring:
            _, group_labels, group_preds = self.run_fast_eval(
                news_filename, behaviors_file
            )
        else:
            _, group_labels, group_preds = self.run_slow_eval(
                news_filename, behaviors_file
            )
        res = cal_metric(group_labels, group_preds, self.hparams.metrics)

        return res

    def user(self, batch_user_input):
        user_input = self._get_user_feature_from_iter(batch_user_input)
        user_vec = self.userencoder.predict_on_batch(user_input)
        user_index = batch_user_input["impr_index_batch"]

        return user_index, user_vec

    def news(self, batch_news_input):
        news_input = self._get_news_feature_from_iter(batch_news_input)
        news_vec = self.newsencoder.predict_on_batch(news_input)
        news_index = batch_news_input["news_index_batch"]

        return news_index, news_vec

    def run_user(self, news_filename, behaviors_file):
        if not hasattr(self, "userencoder"):
            raise ValueError("model must have attribute userencoder")

        user_indexes = []
        user_vecs = []
        for batch_data_input in tqdm(
            self.test_iterator.load_user_from_file(news_filename, behaviors_file)
        ):
            user_index, user_vec = self.user(batch_data_input)
            user_indexes.extend(np.reshape(user_index, -1))
            user_vecs.extend(user_vec)

        return dict(zip(user_indexes, user_vecs))

    def run_news(self, news_filename):
        if not hasattr(self, "newsencoder"):
            raise ValueError("model must have attribute newsencoder")

        news_indexes = []
        news_vecs = []
        for batch_data_input in tqdm(
            self.test_iterator.load_news_from_file(news_filename)
        ):
            news_index, news_vec = self.news(batch_data_input)
            news_indexes.extend(np.reshape(news_index, -1))
            news_vecs.extend(news_vec)

        return dict(zip(news_indexes, news_vecs))

    def run_slow_eval(self, news_filename, behaviors_file):
        preds = []
        labels = []
        imp_indexes = []

        for batch_data_input in tqdm(
            self.test_iterator.load_data_from_file(news_filename, behaviors_file)
        ):
            step_pred, step_labels, step_imp_index = self.eval(batch_data_input)
            preds.extend(np.reshape(step_pred, -1))
            labels.extend(np.reshape(step_labels, -1))
            imp_indexes.extend(np.reshape(step_imp_index, -1))

        group_impr_indexes, group_labels, group_preds = self.group_labels(
            labels, preds, imp_indexes
        )
        return group_impr_indexes, group_labels, group_preds

    def run_fast_eval(self, news_filename, behaviors_file):
        news_vecs = self.run_news(news_filename)
        user_vecs = self.run_user(news_filename, behaviors_file)

        self.news_vecs = news_vecs
        self.user_vecs = user_vecs

        group_impr_indexes = []
        group_labels = []
        group_preds = []

        for (
            impr_index,
            news_index,
            user_index,
            label,
        ) in tqdm(self.test_iterator.load_impression_from_file(behaviors_file)):
            pred = np.dot(
                np.stack([news_vecs[i] for i in news_index], axis=0),
                user_vecs[impr_index],
            )
            group_impr_indexes.append(impr_index)
            group_labels.append(label)
            group_preds.append(pred)

        return group_impr_indexes, group_labels, group_preds




In [3]:
class NRMSModel(BaseModel):
    """NRMS model(Neural News Recommendation with Multi-Head Self-Attention)

    Chuhan Wu, Fangzhao Wu, Suyu Ge, Tao Qi, Yongfeng Huang,and Xing Xie, "Neural News
    Recommendation with Multi-Head Self-Attention" in Proceedings of the 2019 Conference 
    on Empirical Methods in Natural Language Processing and the 9th International Joint Conference 
    on Natural Language Processing (EMNLP-IJCNLP)

    Attributes:
        word2vec_embedding (numpy.array): Pretrained word embedding matrix.
        hparam (obj): Global hyper-parameters.
    """

    def __init__(
        self, hparams, iterator_creator, seed=None,
    ):
        """Initialization steps for NRMS.
        Compared with the BaseModel, NRMS need word embedding.
        After creating word embedding matrix, BaseModel's __init__ method will be called.
        
        Args:
            hparams (obj): Global hyper-parameters. Some key setttings such as head_num and head_dim are there.
            iterator_creator_train(obj): NRMS data loader class for train data.
            iterator_creator_test(obj): NRMS data loader class for test and validation data
        """
        self.word2vec_embedding = self._init_embedding(hparams.wordEmb_file)

        super().__init__(
            hparams, iterator_creator, seed=seed,
        )

    def _get_input_label_from_iter(self, batch_data):
        """ get input and labels for trainning from iterator

        Args: 
            batch data: input batch data from iterator

        Returns:
            list: input feature fed into model (clicked_title_batch & candidate_title_batch)
            array: labels
        """
        input_feat = [
            batch_data["clicked_title_batch"],
            batch_data["candidate_title_batch"],
        ]
        input_label = batch_data["labels"]
        return input_feat, input_label

    def _get_user_feature_from_iter(self, batch_data):
        """ get input of user encoder 
        Args:
            batch_data: input batch data from user iterator
        
        Returns:
            array: input user feature (clicked title batch)
        """
        return batch_data["clicked_title_batch"]

    def _get_news_feature_from_iter(self, batch_data):
        """ get input of news encoder
        Args:
            batch_data: input batch data from news iterator
        
        Returns:
            array: input news feature (candidate title batch)
        """
        return batch_data["candidate_title_batch"]

    def _build_graph(self):
        """Build NRMS model and scorer.

        Returns:
            obj: a model used to train.
            obj: a model used to evaluate and inference.
        """
        hparams = self.hparams
        model, scorer = self._build_nrms()
        return model, scorer

    def _build_userencoder(self, titleencoder):
        """The main function to create user encoder of NRMS.

        Args:
            titleencoder(obj): the news encoder of NRMS. 

        Return:
            obj: the user encoder of NRMS.
        """
        hparams = self.hparams
        his_input_title = keras.Input(
            shape=(hparams.his_size, hparams.title_size), dtype="int32"
        )

        click_title_presents = layers.TimeDistributed(titleencoder)(his_input_title)
        y = SelfAttention(hparams.head_num, hparams.head_dim, seed=self.seed)(
            [click_title_presents] * 3
        )
        user_present = AttLayer2(hparams.attention_hidden_dim, seed=self.seed)(y)

        model = keras.Model(his_input_title, user_present, name="user_encoder")
        return model

    def _build_newsencoder(self, embedding_layer):
        """The main function to create news encoder of NRMS.

        Args:
            embedding_layer(obj): a word embedding layer.
        
        Return:
            obj: the news encoder of NRMS.
        """
        hparams = self.hparams
        sequences_input_title = keras.Input(shape=(hparams.title_size,), dtype="int32")

        embedded_sequences_title = embedding_layer(sequences_input_title)

        y = layers.Dropout(hparams.dropout)(embedded_sequences_title)
        y = SelfAttention(hparams.head_num, hparams.head_dim, seed=self.seed)([y, y, y])
        y = layers.Dropout(hparams.dropout)(y)
        pred_title = AttLayer2(hparams.attention_hidden_dim, seed=self.seed)(y)

        model = keras.Model(sequences_input_title, pred_title, name="news_encoder")
        return model

    def _build_nrms(self):
        """The main function to create NRMS's logic. The core of NRMS
        is a user encoder and a news encoder.
        
        Returns:
            obj: a model used to train.
            obj: a model used to evaluate and inference.
        """
        hparams = self.hparams

        his_input_title = keras.Input(
            shape=(hparams.his_size, hparams.title_size), dtype="int32"
        )
        pred_input_title = keras.Input(
            shape=(hparams.npratio + 1, hparams.title_size), dtype="int32"
        )
        pred_input_title_one = keras.Input(
            shape=(1, hparams.title_size,), dtype="int32"
        )
        pred_title_one_reshape = layers.Reshape((hparams.title_size,))(
            pred_input_title_one
        )

        embedding_layer = layers.Embedding(
            self.word2vec_embedding.shape[0],
            hparams.word_emb_dim,
            weights=[self.word2vec_embedding],
            trainable=True,
        )

        titleencoder = self._build_newsencoder(embedding_layer)
        self.userencoder = self._build_userencoder(titleencoder)
        self.newsencoder = titleencoder

        user_present = self.userencoder(his_input_title)
        news_present = layers.TimeDistributed(self.newsencoder)(pred_input_title)
        news_present_one = self.newsencoder(pred_title_one_reshape)

        preds = layers.Dot(axes=-1)([news_present, user_present])
        preds = layers.Activation(activation="softmax")(preds)

        pred_one = layers.Dot(axes=-1)([news_present_one, user_present])
        pred_one = layers.Activation(activation="sigmoid")(pred_one)

        model = keras.Model([his_input_title, pred_input_title], preds)
        scorer = keras.Model([his_input_title, pred_input_title_one], pred_one)

        return model, scorer

In [4]:
epochs = 50
seed = 42
batch_size = 32

# Options: demo, small, large


In [5]:

MIND_type = 'large'

# tmpdir = TemporaryDirectory()
# data_path = tmpdir.name

#select a nontemporary folder to avoid re-download
data_path = 'data'

train_news_file = os.path.join(data_path, 'train', r'news.tsv')
train_behaviors_file = os.path.join(data_path, 'train', r'behaviors.tsv')
valid_news_file = os.path.join(data_path, 'valid', r'news.tsv')
valid_behaviors_file = os.path.join(data_path, 'valid', r'behaviors.tsv')
wordEmb_file = os.path.join(data_path, "utils", "embedding.npy")
userDict_file = os.path.join(data_path, "utils", "uid2index.pkl")
wordDict_file = os.path.join(data_path, "utils", "word_dict.pkl")
yaml_file = os.path.join(data_path, "utils", r'nrms.yaml')

mind_url, mind_train_dataset, mind_dev_dataset, mind_utils = get_mind_data_set(MIND_type)



In [6]:
hparams = prepare_hparams(yaml_file, 
                          wordEmb_file=wordEmb_file,
                          wordDict_file=wordDict_file, 
                          userDict_file=userDict_file,
                          batch_size=batch_size,
                          epochs=epochs,
                          show_step=10)
hparams.learning_rate=3e-4

print(hparams)

data_format=news,iterator_type=None,support_quick_scoring=True,wordEmb_file=data/utils/embedding.npy,wordDict_file=data/utils/word_dict.pkl,userDict_file=data/utils/uid2index.pkl,vertDict_file=None,subvertDict_file=None,title_size=30,body_size=None,word_emb_dim=300,word_size=None,user_num=None,vert_num=None,subvert_num=None,his_size=50,npratio=4,dropout=0.2,attention_hidden_dim=200,head_num=20,head_dim=20,cnn_activation=None,dense_activation=None,filter_num=200,window_size=3,vert_emb_dim=100,subvert_emb_dim=100,gru_unit=400,type=ini,user_emb_dim=50,learning_rate=0.0003,loss=cross_entropy_loss,optimizer=adam,epochs=50,batch_size=32,show_step=10,metrics=['group_auc', 'mean_mrr', 'ndcg@5;10']


In [7]:
iterator = MINDIterator

In [8]:
model = NRMSModel(hparams, iterator, seed=seed)

In [9]:
pre_trained_metric=model.run_eval(valid_news_file, valid_behaviors_file)

586it [00:02, 208.26it/s]
236it [00:05, 44.28it/s]
7538it [00:02, 3318.67it/s]


In [10]:
%%time
train_results=model.fit(train_news_file, train_behaviors_file, valid_news_file, valid_behaviors_file)

step 1080 , total_loss: 1.4681, data_loss: 1.1930: : 1086it [01:23, 12.97it/s]
586it [00:01, 305.84it/s]
236it [00:04, 50.29it/s]
7538it [00:02, 3097.64it/s]
2it [00:00, 12.22it/s]

at epoch 1
train info: logloss loss:1.467323507612801
eval info: group_auc:0.6046, mean_mrr:0.2601, ndcg@10:0.3529, ndcg@5:0.2788
at epoch 1 , train time: 83.7 eval time: 18.7


step 1080 , total_loss: 1.3596, data_loss: 1.2187: : 1086it [01:19, 13.58it/s]
586it [00:01, 297.16it/s]
236it [00:04, 50.96it/s]
7538it [00:02, 2738.69it/s]
2it [00:00, 12.37it/s]

at epoch 2
train info: logloss loss:1.3600232077566958
eval info: group_auc:0.6216, mean_mrr:0.2744, ndcg@10:0.3688, ndcg@5:0.2952
at epoch 2 , train time: 80.0 eval time: 17.9


step 1080 , total_loss: 1.3110, data_loss: 1.1483: : 1086it [01:20, 13.53it/s]
586it [00:01, 295.27it/s]
236it [00:04, 50.14it/s]
7538it [00:02, 3406.53it/s]
2it [00:00, 12.79it/s]

at epoch 3
train info: logloss loss:1.310909848461274
eval info: group_auc:0.6207, mean_mrr:0.2781, ndcg@10:0.3725, ndcg@5:0.3019
at epoch 3 , train time: 80.3 eval time: 17.5


step 1080 , total_loss: 1.2723, data_loss: 1.1987: : 1086it [01:20, 13.53it/s]
586it [00:01, 299.31it/s]
236it [00:04, 49.56it/s]
7538it [00:02, 3740.79it/s]
2it [00:00, 12.93it/s]

at epoch 4
train info: logloss loss:1.2723403669084195
eval info: group_auc:0.6257, mean_mrr:0.2791, ndcg@10:0.3748, ndcg@5:0.3054
at epoch 4 , train time: 80.2 eval time: 17.4


step 1080 , total_loss: 1.2280, data_loss: 1.1777: : 1086it [01:20, 13.54it/s]
586it [00:01, 306.34it/s]
236it [00:04, 50.09it/s]
7538it [00:02, 3107.52it/s]


at epoch 5
train info: logloss loss:1.2284527074675973
eval info: group_auc:0.6175, mean_mrr:0.2784, ndcg@10:0.3704, ndcg@5:0.3008
at epoch 5 , train time: 80.2 eval time: 17.4


step 1080 , total_loss: 1.1916, data_loss: 1.2666: : 1086it [01:20, 13.54it/s]
586it [00:01, 296.50it/s]
236it [00:04, 50.67it/s]
7538it [00:02, 3183.77it/s]
2it [00:00, 12.58it/s]

at epoch 6
train info: logloss loss:1.192296288729153
eval info: group_auc:0.6353, mean_mrr:0.2883, ndcg@10:0.3827, ndcg@5:0.3148
at epoch 6 , train time: 80.2 eval time: 17.9


step 1080 , total_loss: 1.1567, data_loss: 1.2623: : 1086it [01:20, 13.51it/s]
586it [00:01, 304.53it/s]
236it [00:04, 51.45it/s]
7538it [00:02, 2952.73it/s]
2it [00:00, 12.94it/s]

at epoch 7
train info: logloss loss:1.1566130410241817
eval info: group_auc:0.6337, mean_mrr:0.2916, ndcg@10:0.3848, ndcg@5:0.3157
at epoch 7 , train time: 80.4 eval time: 18.0


step 1080 , total_loss: 1.1144, data_loss: 1.1861: : 1086it [01:19, 13.72it/s]
586it [00:01, 310.00it/s]
236it [00:04, 53.20it/s]
7538it [00:02, 3369.48it/s]
2it [00:00, 12.78it/s]

at epoch 8
train info: logloss loss:1.11420575086144
eval info: group_auc:0.6291, mean_mrr:0.2874, ndcg@10:0.3797, ndcg@5:0.3127
at epoch 8 , train time: 79.2 eval time: 17.1


step 1080 , total_loss: 1.0748, data_loss: 1.1230: : 1086it [01:19, 13.72it/s]
586it [00:01, 297.34it/s]
236it [00:04, 50.37it/s]
7538it [00:02, 3634.17it/s]
2it [00:00, 13.01it/s]

at epoch 9
train info: logloss loss:1.0749805745279373
eval info: group_auc:0.6281, mean_mrr:0.2878, ndcg@10:0.38, ndcg@5:0.3133
at epoch 9 , train time: 79.2 eval time: 17.1


step 1080 , total_loss: 1.0340, data_loss: 0.9224: : 1086it [01:19, 13.74it/s]
586it [00:01, 306.68it/s]
236it [00:04, 50.59it/s]
7538it [00:02, 3509.74it/s]
0it [00:00, ?it/s]

at epoch 10
train info: logloss loss:1.0338402505737643
eval info: group_auc:0.6251, mean_mrr:0.2877, ndcg@10:0.3802, ndcg@5:0.3136
at epoch 10 , train time: 79.0 eval time: 17.0


step 1080 , total_loss: 0.9868, data_loss: 0.8007: : 1086it [01:19, 13.70it/s]
586it [00:01, 298.11it/s]
236it [00:04, 52.67it/s]
7538it [00:02, 3766.46it/s]
2it [00:00, 13.37it/s]

at epoch 11
train info: logloss loss:0.9872817896567216
eval info: group_auc:0.6221, mean_mrr:0.2868, ndcg@10:0.3794, ndcg@5:0.3119
at epoch 11 , train time: 79.3 eval time: 17.3


step 1080 , total_loss: 0.9471, data_loss: 0.9331: : 1086it [01:19, 13.72it/s]
586it [00:01, 294.34it/s]
236it [00:04, 50.15it/s]
7538it [00:02, 3440.34it/s]
2it [00:00, 13.30it/s]

at epoch 12
train info: logloss loss:0.9472576936305557
eval info: group_auc:0.6107, mean_mrr:0.2805, ndcg@10:0.3703, ndcg@5:0.3016
at epoch 12 , train time: 79.1 eval time: 17.6


step 1080 , total_loss: 0.8992, data_loss: 0.9839: : 1086it [01:19, 13.74it/s]
586it [00:01, 309.29it/s]
236it [00:04, 50.90it/s]
7538it [00:01, 3843.52it/s]
2it [00:00, 13.15it/s]

at epoch 13
train info: logloss loss:0.8994593631551631
eval info: group_auc:0.6125, mean_mrr:0.2835, ndcg@10:0.374, ndcg@5:0.307
at epoch 13 , train time: 79.0 eval time: 17.1


step 1080 , total_loss: 0.8574, data_loss: 0.9976: : 1086it [01:21, 13.27it/s]
586it [00:01, 396.20it/s]
236it [00:04, 48.36it/s]
7538it [00:02, 3349.36it/s]
2it [00:00, 12.14it/s]

at epoch 14
train info: logloss loss:0.8578004978089482
eval info: group_auc:0.6103, mean_mrr:0.2823, ndcg@10:0.372, ndcg@5:0.3041
at epoch 14 , train time: 81.8 eval time: 19.0


step 1080 , total_loss: 0.8139, data_loss: 0.6784: : 1086it [01:23, 12.97it/s]
586it [00:01, 297.87it/s]
236it [00:04, 48.50it/s]
7538it [00:02, 3364.73it/s]
0it [00:00, ?it/s]

at epoch 15
train info: logloss loss:0.8143713253088858
eval info: group_auc:0.6228, mean_mrr:0.2885, ndcg@10:0.3792, ndcg@5:0.3132
at epoch 15 , train time: 83.7 eval time: 19.8


step 1080 , total_loss: 0.7714, data_loss: 0.9177: : 1086it [01:22, 13.22it/s]
586it [00:01, 298.57it/s]
236it [00:04, 49.12it/s]
7538it [00:02, 3021.54it/s]
2it [00:00, 12.55it/s]

at epoch 16
train info: logloss loss:0.7718944720251565
eval info: group_auc:0.6118, mean_mrr:0.2833, ndcg@10:0.3717, ndcg@5:0.3067
at epoch 16 , train time: 82.1 eval time: 17.8


step 1080 , total_loss: 0.7334, data_loss: 0.9780: : 1086it [01:22, 13.13it/s]
586it [00:01, 300.81it/s]
236it [00:04, 49.04it/s]
7538it [00:02, 2621.10it/s]
2it [00:00, 12.55it/s]

at epoch 17
train info: logloss loss:0.7333167729072588
eval info: group_auc:0.6216, mean_mrr:0.287, ndcg@10:0.3781, ndcg@5:0.3114
at epoch 17 , train time: 82.7 eval time: 18.4


step 1080 , total_loss: 0.6958, data_loss: 0.6309: : 1086it [01:23, 13.07it/s]
586it [00:01, 302.77it/s]
236it [00:04, 49.07it/s]
7538it [00:02, 2647.20it/s]
2it [00:00, 12.68it/s]

at epoch 18
train info: logloss loss:0.6957906975271952
eval info: group_auc:0.6117, mean_mrr:0.2815, ndcg@10:0.3699, ndcg@5:0.3051
at epoch 18 , train time: 83.1 eval time: 18.4


step 1080 , total_loss: 0.6530, data_loss: 1.1214: : 1086it [01:23, 13.06it/s]
586it [00:01, 302.88it/s]
236it [00:04, 49.25it/s]
7538it [00:02, 2593.97it/s]
2it [00:00, 12.42it/s]

at epoch 19
train info: logloss loss:0.6531660534290539
eval info: group_auc:0.6109, mean_mrr:0.2811, ndcg@10:0.3695, ndcg@5:0.3058
at epoch 19 , train time: 83.2 eval time: 19.9


step 1080 , total_loss: 0.6196, data_loss: 0.8200: : 1086it [01:22, 13.20it/s]
586it [00:01, 306.76it/s]
236it [00:04, 49.46it/s]
7538it [00:02, 2562.21it/s]
0it [00:00, ?it/s]

at epoch 20
train info: logloss loss:0.6195797041662271
eval info: group_auc:0.6094, mean_mrr:0.2797, ndcg@10:0.3673, ndcg@5:0.3046
at epoch 20 , train time: 82.3 eval time: 19.2


step 1080 , total_loss: 0.5895, data_loss: 0.6522: : 1086it [01:21, 13.31it/s]
586it [00:02, 292.03it/s]
236it [00:04, 50.33it/s]
7538it [00:02, 2961.60it/s]
2it [00:00, 12.25it/s]

at epoch 21
train info: logloss loss:0.5894609831177507
eval info: group_auc:0.6059, mean_mrr:0.2798, ndcg@10:0.3668, ndcg@5:0.3024
at epoch 21 , train time: 81.6 eval time: 17.7


step 1080 , total_loss: 0.5576, data_loss: 0.7617: : 1086it [01:22, 13.20it/s]
586it [00:02, 288.13it/s]
236it [00:04, 48.73it/s]
7538it [00:02, 3431.85it/s]
2it [00:00, 12.48it/s]

at epoch 22
train info: logloss loss:0.5578092303175074
eval info: group_auc:0.6088, mean_mrr:0.2809, ndcg@10:0.3691, ndcg@5:0.3077
at epoch 22 , train time: 82.3 eval time: 19.7


step 1080 , total_loss: 0.5325, data_loss: 0.4534: : 1086it [01:22, 13.19it/s]
586it [00:01, 303.03it/s]
236it [00:04, 49.21it/s]
7538it [00:02, 2917.51it/s]
2it [00:00, 12.03it/s]

at epoch 23
train info: logloss loss:0.5322018436964046
eval info: group_auc:0.6108, mean_mrr:0.2806, ndcg@10:0.3688, ndcg@5:0.3049
at epoch 23 , train time: 82.3 eval time: 19.6


step 1080 , total_loss: 0.5079, data_loss: 0.6355: : 1086it [01:22, 13.17it/s]
586it [00:01, 298.04it/s]
236it [00:04, 49.40it/s]
7538it [00:02, 3292.35it/s]
2it [00:00, 12.60it/s]

at epoch 24
train info: logloss loss:0.5080855257164268
eval info: group_auc:0.6107, mean_mrr:0.2798, ndcg@10:0.3678, ndcg@5:0.3048
at epoch 24 , train time: 82.5 eval time: 17.5


step 1080 , total_loss: 0.4762, data_loss: 0.5636: : 1086it [01:21, 13.31it/s]
586it [00:01, 305.41it/s]
236it [00:04, 49.71it/s]
7538it [00:02, 3750.13it/s]
0it [00:00, ?it/s]

at epoch 25
train info: logloss loss:0.4758488375206459
eval info: group_auc:0.6015, mean_mrr:0.2761, ndcg@10:0.362, ndcg@5:0.2988
at epoch 25 , train time: 81.6 eval time: 17.2


step 1080 , total_loss: 0.4576, data_loss: 0.4936: : 1086it [01:22, 13.17it/s]
586it [00:01, 299.64it/s]
236it [00:04, 49.87it/s]
7538it [00:03, 2467.33it/s]
2it [00:00, 11.98it/s]

at epoch 26
train info: logloss loss:0.4574486224004796
eval info: group_auc:0.6059, mean_mrr:0.278, ndcg@10:0.3649, ndcg@5:0.3009
at epoch 26 , train time: 82.5 eval time: 20.2


step 1080 , total_loss: 0.4366, data_loss: 0.5970: : 1086it [01:22, 13.16it/s]
586it [00:01, 305.88it/s]
236it [00:04, 49.49it/s]
7538it [00:02, 2557.57it/s]
2it [00:00, 12.43it/s]

at epoch 27
train info: logloss loss:0.43698586704651954
eval info: group_auc:0.6075, mean_mrr:0.2802, ndcg@10:0.3673, ndcg@5:0.3043
at epoch 27 , train time: 82.5 eval time: 19.4


step 1080 , total_loss: 0.4164, data_loss: 0.5240: : 1086it [01:23, 13.07it/s]
586it [00:01, 307.59it/s]
236it [00:04, 48.60it/s]
7538it [00:02, 2790.05it/s]
2it [00:00, 11.88it/s]

at epoch 28
train info: logloss loss:0.4168904404709901
eval info: group_auc:0.6013, mean_mrr:0.2758, ndcg@10:0.3625, ndcg@5:0.2982
at epoch 28 , train time: 83.1 eval time: 20.1


step 1080 , total_loss: 0.3984, data_loss: 0.2585: : 1086it [01:23, 13.03it/s]
586it [00:01, 301.07it/s]
236it [00:04, 48.56it/s]
7538it [00:02, 2834.28it/s]
2it [00:00, 12.31it/s]

at epoch 29
train info: logloss loss:0.3988917582217281
eval info: group_auc:0.6007, mean_mrr:0.2755, ndcg@10:0.3622, ndcg@5:0.2986
at epoch 29 , train time: 83.3 eval time: 18.9


step 1080 , total_loss: 0.3836, data_loss: 0.2501: : 1086it [01:23, 12.99it/s]
586it [00:01, 293.74it/s]
236it [00:04, 48.65it/s]
7538it [00:02, 2883.18it/s]
0it [00:00, ?it/s]

at epoch 30
train info: logloss loss:0.3832172507190353
eval info: group_auc:0.6093, mean_mrr:0.2786, ndcg@10:0.3662, ndcg@5:0.3029
at epoch 30 , train time: 83.6 eval time: 18.1


step 1080 , total_loss: 0.3658, data_loss: 0.5025: : 1086it [01:22, 13.11it/s]
586it [00:01, 299.42it/s]
236it [00:04, 49.16it/s]
7538it [00:03, 2503.24it/s]
2it [00:00, 12.00it/s]

at epoch 31
train info: logloss loss:0.36613150560477165
eval info: group_auc:0.6041, mean_mrr:0.2754, ndcg@10:0.3619, ndcg@5:0.2982
at epoch 31 , train time: 82.9 eval time: 18.3


step 1080 , total_loss: 0.3492, data_loss: 0.4536: : 1086it [01:22, 13.13it/s]
586it [00:02, 288.78it/s]
236it [00:04, 49.13it/s]
7538it [00:03, 2391.11it/s]
2it [00:00, 12.54it/s]

at epoch 32
train info: logloss loss:0.3490085786424618
eval info: group_auc:0.6053, mean_mrr:0.2772, ndcg@10:0.3637, ndcg@5:0.3006
at epoch 32 , train time: 82.7 eval time: 18.6


step 1080 , total_loss: 0.3411, data_loss: 0.6897: : 1086it [01:22, 13.12it/s]
586it [00:01, 299.08it/s]
236it [00:04, 48.83it/s]
7538it [00:03, 2505.47it/s]
2it [00:00, 12.03it/s]

at epoch 33
train info: logloss loss:0.3412127620291864
eval info: group_auc:0.6047, mean_mrr:0.276, ndcg@10:0.3628, ndcg@5:0.2983
at epoch 33 , train time: 82.8 eval time: 20.3


step 1080 , total_loss: 0.3224, data_loss: 0.2814: : 1086it [01:22, 13.14it/s]
586it [00:02, 292.84it/s]
236it [00:04, 49.26it/s]
7538it [00:02, 2718.52it/s]
2it [00:00, 12.63it/s]

at epoch 34
train info: logloss loss:0.3229876074109837
eval info: group_auc:0.6048, mean_mrr:0.2765, ndcg@10:0.3649, ndcg@5:0.2992
at epoch 34 , train time: 82.6 eval time: 18.2


step 1080 , total_loss: 0.3205, data_loss: 0.2267: : 1086it [01:22, 13.18it/s]
586it [00:01, 305.67it/s]
236it [00:04, 49.07it/s]
7538it [00:02, 2714.20it/s]
0it [00:00, ?it/s]

at epoch 35
train info: logloss loss:0.3208690561820679
eval info: group_auc:0.6041, mean_mrr:0.2729, ndcg@10:0.3603, ndcg@5:0.295
at epoch 35 , train time: 82.4 eval time: 18.1


step 1080 , total_loss: 0.3075, data_loss: 0.4599: : 1086it [01:22, 13.13it/s]
586it [00:01, 302.24it/s]
236it [00:04, 50.51it/s]
7538it [00:02, 3165.66it/s]
2it [00:00, 12.43it/s]

at epoch 36
train info: logloss loss:0.3079007255294211
eval info: group_auc:0.6006, mean_mrr:0.2739, ndcg@10:0.3601, ndcg@5:0.2966
at epoch 36 , train time: 82.7 eval time: 17.5


step 1080 , total_loss: 0.2912, data_loss: 0.3138: : 1086it [01:21, 13.30it/s]
586it [00:01, 317.30it/s]
236it [00:04, 49.29it/s]
7538it [00:03, 2466.19it/s]
2it [00:00, 11.89it/s]

at epoch 37
train info: logloss loss:0.2913656254156628
eval info: group_auc:0.6024, mean_mrr:0.2715, ndcg@10:0.3592, ndcg@5:0.2958
at epoch 37 , train time: 81.6 eval time: 18.4


step 1080 , total_loss: 0.2865, data_loss: 0.3044: : 1086it [01:22, 13.12it/s]
586it [00:01, 302.30it/s]
236it [00:04, 49.46it/s]
7538it [00:03, 2448.29it/s]
2it [00:00, 12.78it/s]

at epoch 38
train info: logloss loss:0.28653857700910906
eval info: group_auc:0.5982, mean_mrr:0.2711, ndcg@10:0.3571, ndcg@5:0.294
at epoch 38 , train time: 82.8 eval time: 18.4


step 1080 , total_loss: 0.2802, data_loss: 0.2829: : 1086it [01:22, 13.16it/s]
586it [00:01, 304.14it/s]
236it [00:04, 50.87it/s]
7538it [00:02, 2847.30it/s]
2it [00:00, 13.02it/s]

at epoch 39
train info: logloss loss:0.280197961255908
eval info: group_auc:0.5946, mean_mrr:0.2714, ndcg@10:0.3565, ndcg@5:0.294
at epoch 39 , train time: 82.5 eval time: 17.6


step 1080 , total_loss: 0.2665, data_loss: 0.0623: : 1086it [01:21, 13.27it/s]
586it [00:02, 289.46it/s]
236it [00:04, 48.86it/s]
7538it [00:02, 2765.98it/s]
0it [00:00, ?it/s]

at epoch 40
train info: logloss loss:0.26693846728536086
eval info: group_auc:0.5999, mean_mrr:0.2728, ndcg@10:0.3599, ndcg@5:0.2947
at epoch 40 , train time: 81.9 eval time: 19.1


step 1080 , total_loss: 0.2560, data_loss: 0.2162: : 1086it [01:22, 13.17it/s]
586it [00:01, 316.46it/s]
236it [00:04, 49.50it/s]
7538it [00:03, 2354.64it/s]
2it [00:00, 11.98it/s]

at epoch 41
train info: logloss loss:0.2557736539777581
eval info: group_auc:0.5993, mean_mrr:0.2714, ndcg@10:0.358, ndcg@5:0.2944
at epoch 41 , train time: 82.5 eval time: 19.1


step 1080 , total_loss: 0.2520, data_loss: 0.2166: : 1086it [01:21, 13.37it/s]
586it [00:01, 297.15it/s]
236it [00:04, 50.05it/s]
7538it [00:02, 3493.33it/s]
2it [00:00, 12.38it/s]

at epoch 42
train info: logloss loss:0.25209687639569817
eval info: group_auc:0.6029, mean_mrr:0.2768, ndcg@10:0.3642, ndcg@5:0.301
at epoch 42 , train time: 81.3 eval time: 17.6


step 1080 , total_loss: 0.2541, data_loss: 0.1395: : 1086it [01:21, 13.36it/s]
586it [00:01, 316.64it/s]
236it [00:04, 49.76it/s]
7538it [00:02, 3378.81it/s]
2it [00:00, 12.37it/s]

at epoch 43
train info: logloss loss:0.25409988030571745
eval info: group_auc:0.5999, mean_mrr:0.2727, ndcg@10:0.3595, ndcg@5:0.296
at epoch 43 , train time: 81.3 eval time: 18.9


step 1080 , total_loss: 0.2448, data_loss: 0.3073: : 1086it [01:21, 13.36it/s]
586it [00:01, 296.28it/s]
236it [00:04, 50.73it/s]
7538it [00:02, 2914.87it/s]
2it [00:00, 12.48it/s]

at epoch 44
train info: logloss loss:0.24429078970934584
eval info: group_auc:0.5949, mean_mrr:0.2686, ndcg@10:0.3545, ndcg@5:0.291
at epoch 44 , train time: 81.3 eval time: 18.5


step 1080 , total_loss: 0.2338, data_loss: 0.3154: : 1086it [01:21, 13.40it/s]
586it [00:01, 303.85it/s]
236it [00:04, 50.18it/s]
7538it [00:02, 3162.30it/s]
0it [00:00, ?it/s]

at epoch 45
train info: logloss loss:0.23399824352890364
eval info: group_auc:0.5978, mean_mrr:0.2721, ndcg@10:0.3586, ndcg@5:0.2963
at epoch 45 , train time: 81.0 eval time: 17.8


step 1080 , total_loss: 0.2350, data_loss: 0.4026: : 1086it [01:21, 13.31it/s]
586it [00:01, 307.14it/s]
236it [00:04, 49.86it/s]
7538it [00:02, 2918.80it/s]
2it [00:00, 13.17it/s]

at epoch 46
train info: logloss loss:0.23510191723546545
eval info: group_auc:0.596, mean_mrr:0.2696, ndcg@10:0.3564, ndcg@5:0.2939
at epoch 46 , train time: 81.6 eval time: 17.8


step 1080 , total_loss: 0.2239, data_loss: 0.2544: : 1086it [01:21, 13.36it/s]
586it [00:01, 300.99it/s]
236it [00:04, 49.86it/s]
7538it [00:02, 3007.89it/s]
2it [00:00, 12.23it/s]

at epoch 47
train info: logloss loss:0.22382652589581345
eval info: group_auc:0.6021, mean_mrr:0.2724, ndcg@10:0.3596, ndcg@5:0.2984
at epoch 47 , train time: 81.3 eval time: 19.0


step 1080 , total_loss: 0.2225, data_loss: 0.1757: : 1086it [01:21, 13.38it/s]
586it [00:01, 309.51it/s]
236it [00:04, 50.04it/s]
7538it [00:02, 2963.23it/s]
2it [00:00, 13.11it/s]

at epoch 48
train info: logloss loss:0.22271163778595668
eval info: group_auc:0.6028, mean_mrr:0.2753, ndcg@10:0.3628, ndcg@5:0.3005
at epoch 48 , train time: 81.2 eval time: 17.7


step 1080 , total_loss: 0.2120, data_loss: 0.2901: : 1086it [01:21, 13.30it/s]
586it [00:01, 301.48it/s]
236it [00:04, 49.69it/s]
7538it [00:02, 3206.78it/s]
2it [00:00, 12.15it/s]

at epoch 49
train info: logloss loss:0.21246073488489095
eval info: group_auc:0.5957, mean_mrr:0.2707, ndcg@10:0.3568, ndcg@5:0.2918
at epoch 49 , train time: 81.6 eval time: 19.5


step 1080 , total_loss: 0.2130, data_loss: 0.4338: : 1086it [01:22, 13.24it/s]
586it [00:01, 308.38it/s]
236it [00:04, 49.70it/s]
7538it [00:02, 2809.82it/s]


at epoch 50
train info: logloss loss:0.2131707558209423
eval info: group_auc:0.6005, mean_mrr:0.2733, ndcg@10:0.3595, ndcg@5:0.2977
at epoch 50 , train time: 82.0 eval time: 17.9
CPU times: user 1h 47min 39s, sys: 49min 6s, total: 2h 36min 45s
Wall time: 1h 23min 23s


In [11]:
train_results

([1.467323507612801,
  1.3600232077566958,
  1.310909848461274,
  1.2723403669084195,
  1.2284527074675973,
  1.192296288729153,
  1.1566130410241817,
  1.11420575086144,
  1.0749805745279373,
  1.0338402505737643,
  0.9872817896567216,
  0.9472576936305557,
  0.8994593631551631,
  0.8578004978089482,
  0.8143713253088858,
  0.7718944720251565,
  0.7333167729072588,
  0.6957906975271952,
  0.6531660534290539,
  0.6195797041662271,
  0.5894609831177507,
  0.5578092303175074,
  0.5322018436964046,
  0.5080855257164268,
  0.4758488375206459,
  0.4574486224004796,
  0.43698586704651954,
  0.4168904404709901,
  0.3988917582217281,
  0.3832172507190353,
  0.36613150560477165,
  0.3490085786424618,
  0.3412127620291864,
  0.3229876074109837,
  0.3208690561820679,
  0.3079007255294211,
  0.2913656254156628,
  0.28653857700910906,
  0.280197961255908,
  0.26693846728536086,
  0.2557736539777581,
  0.25209687639569817,
  0.25409988030571745,
  0.24429078970934584,
  0.23399824352890364,
  0.2351

In [12]:
with open('nrms_loss_lr3e-4.txt', 'w') as filehandle:
    for listitem in train_results:
        for item in listitem:
            filehandle.write('%s\n' % item)


In [13]:
%%time
res_syn = model.run_eval(valid_news_file, valid_behaviors_file)
print(res_syn)


586it [00:01, 297.22it/s]
236it [00:04, 50.03it/s]
7538it [00:03, 2443.34it/s]


{'group_auc': 0.6005, 'mean_mrr': 0.2733, 'ndcg@5': 0.2977, 'ndcg@10': 0.3595}
CPU times: user 57.7 s, sys: 55.7 s, total: 1min 53s
Wall time: 18.5 s


In [14]:
model_path = os.path.join(data_path, "model_newlr")
os.makedirs(model_path, exist_ok=True)

model.model.save_weights(os.path.join(model_path, "nrms_ckpt_3e-4"))

In [15]:
model.scorer.save_weights('data/model_newlr/nrms_ckpt_503e-4')

In [16]:
group_impr_indexes, group_labels, group_preds = model.run_fast_eval(valid_news_file, valid_behaviors_file)
with open(os.path.join(data_path, 'prediction_nrms.txt'), 'w') as f:
    for impr_index, preds in tqdm(zip(group_impr_indexes, group_preds)):
        impr_index += 1
        pred_rank = (np.argsort(np.argsort(preds)[::-1]) + 1).tolist()
        pred_rank = '[' + ','.join([str(i) for i in pred_rank]) + ']'
        f.write(' '.join([str(impr_index), pred_rank])+ '\n')

586it [00:01, 296.08it/s]
236it [00:04, 50.60it/s]
7538it [00:02, 2670.96it/s]
7538it [00:00, 30285.70it/s]


In [17]:
f = zipfile.ZipFile(os.path.join(data_path, 'nrms_prediction.zip'), 'w', zipfile.ZIP_DEFLATED)
f.write(os.path.join(data_path, 'nrms_prediction.txt'), arcname='nrms_prediction.txt')
f.close()

FileNotFoundError: [Errno 2] No such file or directory: 'data/nrms_prediction.txt'