In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = '1'
device = 'cuda'

In [2]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pickle
import random
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from copy import deepcopy
from sklearn.metrics import roc_auc_score
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import tqdm
from pytorchtools import EarlyStopping
assert(torch.cuda.is_available())

## Data Preprocessing

In [3]:
# # merge news to one document
# news_set = set()
# news = []
# with open('/data/Recommend/MIND/MINDsmall_train/news.tsv', 'r') as f:
#     for line in f:
#         data = line.split('\t')
#         news_id = data[0]
#         if news_id not in news_set:
#             news.append(line)
#             news_set.add(news_id)
# with open('/data/Recommend/MIND/MINDsmall_dev/news.tsv') as f:
#     for line in f:
#         data = line.split('\t')
#         news_id = data[0]
#         if news_id not in news_set:
#             news.append(line)
#             news_set.add(news_id)
# # with open('/data/Recommend/MIND/MINDlarge_test/news.tsv') as f:
# #     for line in f:
# #         data = line.split('\t')
# #         news_id = data[0]
# #         if news_id not in news_set:
# #             news.append(line)
# #             news_set.add(news_id)

# # with open('/data/Recommend/MIND/small_news.tsv', 'w') as f:
# #     f.writelines(news)

# print(len(news))

In [4]:
def load_news(path):
    news_dict = {} # index -> news
    news_list = [] # index -> news
    newsid_dict = {} # newsid -> index
    word_dict = {'<PAD>': 0, '<OOV>': 1}
    cate_dict = {'<PAD>': 0, '<OOV>': 1}
    with open(path, 'r') as f:
        for line in f.readlines():
            news_id, category, subcategory, title, abstract, \
                url, title_entities, abstract_entities = line.strip().split('\t')
            title = title.lower().replace('.', '').replace(',', '').replace(';', '').replace(':', '').replace('\'', '').replace('"', '').replace('?', '').replace('!', '').replace('(', '').replace(')', '').split(' ')
            abstract = abstract.lower().replace('.', '').replace(',', '').replace(';', '').replace(':', '').replace('\'', '').replace('"', '').replace('?', '').replace('!', '').replace('(', '').replace(')', '').split(' ')
            for word in title + abstract:
                if word not in word_dict:
                    word_dict[word] = len(word_dict)
            if category not in cate_dict:
                cate_dict[category] = len(cate_dict)
            if subcategory not in cate_dict:
                cate_dict[subcategory] = len(cate_dict)
            if news_id not in newsid_dict:
                newsid_dict[news_id] = len(newsid_dict)
                news_list.append([category, subcategory, title, abstract])
    print(len(news_list))
    return news_list, newsid_dict, word_dict, cate_dict

In [5]:
max_title = 30
max_body = 100
def map_news_input(news_list, word_dict, cate_dict):
    n_news = len(news_list)
    titles = np.zeros((n_news, max_title), dtype = 'int32')
    bodys = np.zeros((n_news, max_body), dtype = 'int32')
    cates = np.zeros((n_news,1), dtype = 'int32')
    subcates = np.zeros((n_news,1), dtype = 'int32')
    for i in range(n_news):
        category, subcategory, title, abstract = news_list[i]
        titles[i, :len(title)] = [word_dict[word] for word in title[:max_title]]
        bodys[i, :len(abstract)] = [word_dict[word] for word in abstract[:max_body]]
        cates[i] = cate_dict[category]
        subcates[i] = cate_dict[subcategory]
    news_info = np.concatenate((titles, bodys, cates, subcates), axis = 1)
    print(news_info.shape)
    return news_info # index -> news_info

In [6]:
'''
news_list: original news
news_info: mapped news(word ids)
'''
news_list, newsid_dict, word_dict, cate_dict = load_news('/data/Recommend/MIND/small_news.tsv')
news_info = map_news_input(news_list, word_dict, cate_dict)

65238
(65238, 132)


In [7]:
def load_glove(word_to_ix, dim = 100):
    if dim == 100:
        path = '/data/pretrained/Glove/glove.6B.100d.txt'
    elif dim == 300:
        path = '/data/pretrained/Glove/glove.840B.300d.txt'
    word_emb = []
    word_emb = np.zeros((len(word_to_ix), dim), dtype = float)
    with open(path, 'r') as f:
        for line in f:
            data = line.strip().split(' ') # [word emb1 emb2 ... emb n]
            word = data[0]
            if word in word_to_ix:
                word_emb[word_to_ix[word]] = [float(i) for i in data[1:]]
    print(word_emb.shape)
    return torch.tensor(word_emb, dtype = torch.float)

In [9]:
word_emb = load_glove(word_dict, 300)
cate_emb = load_glove(cate_dict, 100)

(80416, 300)
(282, 100)


In [10]:
def load_train_impression(path, newsid_dict): # train&dev
    logs = []
    with open(path, 'r') as f:
        for line in f:
            imp_id, user_id, time, history, impression = line.strip().split('\t')
            if history:
                history = [newsid_dict[news_id] for news_id in history.split(' ')]
            else:
                history = []
            positive = []
            negative = []
            for item in impression.split(' '):
                news_id, num = item.split('-')
                if num == '1':
                    positive.append(newsid_dict[news_id])
                else:
                    negative.append(newsid_dict[news_id])
            logs.append([history, positive, negative]) # indexs
    return logs

In [11]:
max_history = 50
def map_user(logs): # index -> history, 用 index 代表 user_id, train&dev
    n_user = len(logs)
    user_hist = np.zeros((n_user, max_history), dtype = 'int32') # index -> history
    for i in range(n_user):
        history, positive, negative = logs[i]
        n_hist = len(history)
        if n_hist == 0:
            continue
        user_hist[i, -n_hist:] = history[-max_history:]
    return user_hist         

In [12]:
neg_ratio = 4
def neg_sample(negative):
    if len(negative) < neg_ratio:
        return random.sample(negative * (neg_ratio // len(negative) + 1), neg_ratio)
    else:
        return random.sample(negative, neg_ratio)

def get_train_input(logs): # 和 map_user 使用同一个 log
    all_pos = [] # 每个 sample 的 pos
    all_neg = []
    user_id = [] # 每个 sample 的 user，用 index 表示，和 map_user 的结果对应
    for i in range(len(logs)):
        history, positive, negative = logs[i]
        for pos in positive:
            all_pos.append(pos)
            all_neg.append(neg_sample(negative))
            user_id.append(i)
    n_imps = len(all_pos)
    imps = np.zeros((n_imps, 1 + neg_ratio), dtype = 'int32')
    for i in range(len(all_pos)):
        imps[i, 0] = all_pos[i]
        imps[i, 1:] = all_neg[i]
    user_id = np.array(user_id, dtype = 'int32')
    labels = np.zeros((n_imps, 1 + neg_ratio), dtype = 'int32')
    labels[:, 0] = 1
    print(n_imps)
    return imps, user_id, labels

def get_dev_input(logs): # 和 map_user 使用同一个 log
    imps = []
    labels = []
    user_id = np.zeros((len(logs)), dtype = 'int32') # 每个 sample 的 user index，和 map_user 的结果对应
    for i in range(len(logs)):
        history, positive, negative = logs[i]
        imps.append(np.array(positive + negative, dtype = 'int32'))
        labels.append([1] * len(positive) + [0] * len(negative))
        user_id[i] = i
    print(len(logs))
    return imps, user_id, labels

In [11]:
# # merge entity embedding to one document
# ent_set = set()
# ents = []
# with open('/data/Recommend/MIND/MINDsmall_train/entity_embedding.vec', 'r') as f:
#     for line in f:
#         ent_id = line.split('\t')[0]
#         if ent_id not in ent_set:
#             ents.append(line)
#             ent_set.add(ent_id)
# with open('/data/Recommend/MIND/MINDsmall_dev/entity_embedding.vec') as f:
#     for line in f:
#         ent_id = line.split('\t')[0]
#         if ent_id not in ent_set:
#             ents.append(line)
#             ent_set.add(ent_id)
# # with open('/data/Recommend/MIND/MINDlarge_test/entity_embedding.vec') as f:
# #     for line in f:
# #         ent_id = line.split('\t')[0]
# #         if ent_id not in ent_set:
# #             ents.append(line)
# #             ent_set.add(ent_id)

# with open('/data/Recommend/MIND/small_entity_embedding.vec', 'w') as f:
#     f.writelines(ents)

# print(len(ents))

In [13]:
def load_ent_emb(path):
    ent_emb = []
    ent_dict = {'<PAD>': 0, '<OOV>': 1}
    with open(path, 'r') as f:
        for line in f:
            data = line.strip().split('\t')
            ent_id = data[0]
            ent_dict[ent_id] = len(ent_dict)
            ent_emb.append([float(i) for i in data[1:]])
    ent_emb.insert(0, [0.] * len(ent_emb[0]))
    ent_emb.insert(0, [0.] * len(ent_emb[0]))
    ent_emb = torch.tensor(ent_emb, dtype = torch.float)
    print(ent_emb.shape)
    return ent_emb, ent_dict

In [14]:
max_ents = 5
def load_news_ent(path, ent_dict):
    n_news = len(news_list)
    news_ents = np.zeros((n_news, max_ents), dtype = 'int32')
    i = 0
    with open(path, 'r') as f:
        for line in f.readlines():
            data = line.strip().split('\t')
            ents = [ent['WikidataId'] for ent in json.loads(data[6])] + [ent['WikidataId'] for ent in json.loads(data[7])]
            news_ents[i, :len(ents)] = [ent_dict[ent] if ent in ent_dict else ent_dict['<OOV>'] for ent in ents[:max_ents]]
            i += 1
    print(len(news_ents))
    return news_ents # index -> ent_index

In [15]:
ent_emb, ent_dict = load_ent_emb('/data/Recommend/MIND/small_entity_embedding.vec')
news_ents = load_news_ent('/data/Recommend/MIND/small_news.tsv', ent_dict)

torch.Size([31453, 100])
65238


In [16]:
class TrainDataset(Dataset):
    def __init__(self, imp_datas, imp_users, imp_labels, news_info, user_clicks, batch_size, news_ents = None, news_urls = None):
        self.imp_datas = imp_datas # (n_imps, 1 + k)
        self.imp_users = imp_users
        self.imp_labels = imp_labels
        self.news = news_info
        self.user_clicks = user_clicks
        self.batch_size = batch_size
        self.news_ents = news_ents
        self.news_urls = news_urls
        
        self.n_data = imp_datas.shape[0]
        
    def __len__(self):
        return int(np.ceil(self.n_data / self.batch_size))

    def __getitem__(self, idx):
        start = idx * self.batch_size
        end = min((idx + 1) * self.batch_size, self.n_data)
        
        data_id = self.imp_datas[start: end] # (n_batch, 1 + k)
        data_news = self.news[data_id] # (n_batch, 1 + k, news_len)
        user_id = self.imp_users[start: end] # (n_batch)
        user_news_id = self.user_clicks[user_id] # (n_batch, n_hist)
        user_news = self.news[user_news_id] # (n_batch, n_hist, news_len)
        labels = self.imp_labels[start: end] # (n_batch, 1 + k)
        
        if self.news_ents is not None:
            samp_ents = self.news_ents[data_id]
            user_ents = self.news_ents[user_news_id]
            return data_news, user_news, labels, samp_ents, user_ents
        
        if self.news_urls is not None:
            samp_urls = self.news_urls[data_id]
            user_urls = self.news_urls[user_news_id]
            return data_news, user_news, labels, samp_urls, user_urls
        
        return data_news, user_news, labels
    
class DevDataset(Dataset): # data 和 label 是 list，每条数据不同长度
    def __init__(self, imp_datas, imp_users, imp_labels, news_info, user_clicks, batch_size):
        self.imp_datas = imp_datas # [imp1, imp2, ..., impn]
        self.imp_users = imp_users # (n_imps)
        self.imp_labels = imp_labels
        self.news = news_info
        self.user_clicks = user_clicks
        self.batch_size = batch_size
        
        self.n_data = len(imp_datas)
        
    def __len__(self):
        return int(np.ceil(self.n_data / self.batch_size))

    def __getitem__(self, idx):
        start = idx * self.batch_size
        end = min((idx + 1) * self.batch_size, self.n_data)
        
        data_ids = []
        data_news = [] # [(n_imp, news_len)]
        labels = [] # [(n_imp)]
        for i in range(start, end):
            data_id = self.imp_datas[i] # (n_imp)
            data_ids.append(data_id)
            # data_news.append(self.news[data_id]) # (n_imp, news_len)
            labels.append(self.imp_labels[i]) # (n_imp)
        user_id = self.imp_users[start: end] # (n_batch)
        user_news_id = self.user_clicks[user_id] # (n_batch, n_hist)
        # user_news = self.news[user_news_id] # (n_batch, n_hist, news_len)
        
        #return data_news, user_news, labels
        return data_ids, user_news_id, labels

In [17]:
n_batch = 16
train_logs = load_train_impression('/data/Recommend/MIND/MINDsmall_train/behaviors.tsv', newsid_dict)
train_user_hist = map_user(train_logs)
train_datas, train_users, train_labels = get_train_input(train_logs)
#train_dataset = TrainDataset(train_datas, train_users, train_labels, news_info, train_user_hist, n_batch)
train_dataset = TrainDataset(train_datas, train_users, train_labels, news_info, train_user_hist, n_batch, news_ents)

dev_logs = load_train_impression('/data/Recommend/MIND/MINDsmall_dev/behaviors.tsv', newsid_dict)
dev_user_hist = map_user(dev_logs)
dev_datas, dev_users, dev_labels = get_dev_input(dev_logs)
dev_dataset = DevDataset(dev_datas, dev_users, dev_labels, news_info, dev_user_hist, 64)

valid_datas, valid_users, valid_labels = get_train_input(dev_logs) # 用 train 的方法构造 dev_set
valid_dataset = TrainDataset(valid_datas, valid_users, valid_labels, news_info, dev_user_hist, n_batch)

236344
73152
111383


In [18]:
def encode_all_news(news_encoder, news_info, news_ents = None):
    n_news = len(news_info)
    news_rep = []
    n_batch = 32
    for i in range((len(news_info) + n_batch - 1) // n_batch):
        batch_news = torch.tensor(news_info[i * n_batch: (i + 1) * n_batch], dtype = torch.long, device = 'cuda')
        batch_ents = torch.tensor(news_ents[i * n_batch: (i + 1) * n_batch], dtype = torch.long, device = 'cuda')
        batch_rep = news_encoder(batch_news, batch_ents).detach().cpu().numpy()
        news_rep.append(batch_rep)
    news_rep = np.concatenate(news_rep, axis = 0)
    return news_rep # (n_news, n_title, n_emb)

def encode_all_user(user_encoder, user_ids, user_hist, news_rep):
    user_rep = []
    with torch.no_grad():
        for _, batch in enumerate(dev_dataset):
            if len(batch[0]) == 0:
                break
            user_hist_rep = torch.tensor(news_rep[batch[1]], device = 'cuda') # (n_batch, n_hist)
            user = model.user_encoder(user_hist_rep).detach().cpu().numpy() # (n_batch, emb_dim)
            user_rep.append(user)
    # user_rep = np.concatenate(user_rep, axis = 0)
    return user_rep # [user_rep, ...]

In [19]:
def dcg_score(y_true, y_score, k=10):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    gains = 2 ** y_true - 1
    discounts = np.log2(np.arange(len(y_true)) + 2)
    return np.sum(gains / discounts)

def ndcg_score(y_true, y_score, k=10):
    best = dcg_score(y_true, y_true, k)
    actual = dcg_score(y_true, y_score, k)
    return actual / best

def mrr_score(y_true, y_score):
    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order)
    rr_score = y_true / (np.arange(len(y_true)) + 1)
    return np.sum(rr_score) / np.sum(y_true)

### Model

In [20]:
class AttentionPooling(nn.Module):
    def __init__(self, emb_dim, query_dim):
        super().__init__()
        self.fc1 = nn.Linear(emb_dim, query_dim)
        self.fc2 = nn.Linear(query_dim, 1)
        
    def forward(self, x, mask = None):
        '''
        (n_batch, n_seq, emb_dim) -> (n_batch, emb_dim)
        a = q^T tanh(V * k + v)
        alpha = softmax(a)
        '''
        a = self.fc2(torch.tanh(self.fc1(x))) # (n_batch, n_seq, 1)
        if mask is not None:
            a = a.masked_fill(mask.unsqueeze(-1) == 0, -1e9)
        alpha = F.softmax(a, dim = -2) # (n_batch, n_seq, 1)
        r = torch.matmul(alpha.transpose(-2, -1), x).squeeze(-2) # (n_batch, emb_dim)
        return r

In [21]:
class TextEncoder(nn.Module):
    def __init__(self, word_embedding, word_emb_dim, 
                 filter_num, window_size, query_dim, dropout, use_relu = False
                ):
        super().__init__()
        self.use_relu = use_relu
        self.word_embedding = word_embedding
        self.cnn = nn.Conv1d(word_emb_dim, filter_num, window_size, padding = window_size // 2)
        self.drop1 = nn.Dropout(dropout)
        self.drop2 = nn.Dropout(dropout)
        self.attn = AttentionPooling(filter_num, query_dim)
        
    def forward(self, x, mask = None):
        x_emb = self.word_embedding(x) # (n_batch, n_seq, emb_dim)
        x_emb = self.drop1(x_emb)
        x_rep = self.cnn(x_emb.transpose(2, 1)).transpose(2, 1) # (n_batch, n_seq, emb_dim)
        if self.use_relu:
            x_rep = F.relu(x_rep)
        x_rep = self.drop2(x_rep)
        x_rep = self.attn(x_rep, mask) # (n_batch, emb_dim)
        return x_rep

class CateEncoder(nn.Module):
    def __init__(self, cate_embedding, cate_emb_dim, out_dim, dropout = 0.2):
        super().__init__()
        self.cate_embedding = cate_embedding
        self.fc = nn.Linear(cate_emb_dim, out_dim)
        self.drop = nn.Dropout(dropout)
        
    def forward(self, x):
        x_emb = self.cate_embedding(x) # (n_batch, emb_dim)
        x_emb = self.drop(x_emb)
        x_rep = self.fc(x_emb) # (n_batch, out_dim)
        x_rep = F.relu(x_rep)
        return x_rep

In [132]:
class ConvNewsEncoder(nn.Module):
    def __init__(self, args, word_emb, cate_emb, ent_emb):
        super().__init__()
        if args['model'] == 'NAML' or 'title_relu' not in args:
            args['title_relu'] = False # NAML 不能用 relu，因为 user 不能全正
        if 'aggr_relu' not in args:
            args['aggr_relu'] = False
        if 'ent_mode' not in args:
            args['ent_mode'] = 'attn'
        self.args = args
        news_dim, query_dim = 256, 200
        
        self.word_embedding = nn.Embedding.from_pretrained(word_emb)
        self.title_cnn = nn.Conv1d(word_emb.shape[1], news_dim, 3, padding = 1)
        self.title_attn = AttentionPooling(news_dim, query_dim)
        out_dim = news_dim
        if args['use_ent']:
            self.ent_embedding = nn.Embedding.from_pretrained(ent_emb)
            if args['ent_mode'] == 'attn':
                self.ent_transformer = MultiHeadSelfAttention(ent_emb.shape[1], 16, 16, 16)
                
            else:
                self.ent_fc1 = nn.Linear(ent_emb.shape[1], query_dim)
                self.ent_fc2 = nn.Linear(query_dim, news_dim)
            self.ent_attn = AttentionPooling(news_dim, query_dim)
            out_dim += news_dim
        if args['use_cate']:
            self.cate_embedding = nn.Embedding.from_pretrained(cate_emb)
            self.cate_fc1 = nn.Linear(cate_emb.shape[1], query_dim)
            self.cate_fc2 = nn.Linear(query_dim, news_dim)
            out_dim += news_dim * 2
        self.aggr_fc = nn.Linear(out_dim, news_dim)
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, news, ents = None):
        title, body, cate = news[:, :30], news[:, 30: -2], news[:, -2:]
        # print(news.shape, ents.shape)
        
        t_rep = self.word_embedding(title) # (n_batch, n_seq, emb_dim)
        t_rep = self.dropout(t_rep)
        t_rep = self.title_cnn(t_rep.transpose(-1, -2)).transpose(-1, -2)
        if self.args['title_relu']:
            t_rep = F.relu(t_rep)
        t_rep = self.title_attn(t_rep) # (n_batch, 256)
        
        if self.args['use_ent']:
            e_rep = self.ent_embedding(ents) # (n_batch, n_ent, emb_dim)
            e_rep = self.dropout(e_rep)
            if args['ent_mode'] == 'attn':
                e_rep = self.ent_transformer(e_rep, e_rep, e_rep) # (n_batch, n_ent, 256)
            else:
                e_rep = F.relu(self.ent_fc1(e_rep))
                e_rep = self.ent_fc2(e_rep) # (n_news, n_ent, news_dim)
            e_rep = self.ent_attn(e_rep) # (n_batch, 256)
            # print(e_rep.shape, t_rep.shape)
            t_rep = torch.cat((t_rep, e_rep), dim = -1)
        if self.args['use_cate']:
            c_rep = self.cate_embedding(cate) # (n_news, 2, emb_dim)
            c_rep = self.dropout(c_rep)
            c_rep = F.relu(self.cate_fc1(c_rep))
            c_rep = self.cate_fc2(c_rep) # (n_news, 2, news_dim)
            t_rep = torch.cat((t_rep, c_rep.reshape(c_rep.shape[0], -1)), dim = -1)
            # t_rep = torch.cat((t_rep, c_rep), dim = -2)
        # r = self.attn(r) # (n_news, n_filter)
        r = self.aggr_fc(t_rep)
        if 'aggr_relu' in self.args and self.args['aggr_relu']:
            r = F.relu(r)
        return r # (n_news, n_filter)

class UserEncoder(nn.Module):
    def __init__(self, news_dim):
        super().__init__()
        self.attn = AttentionPooling(news_dim, 200)
    
    def forward(self, h): 
        u = self.attn(h)
        return u

In [105]:
class NAML(nn.Module):
    def __init__(self, args, word_emb, cate_emb, ent_emb):
        super().__init__()
        self.news_encoder = ConvNewsEncoder(args, word_emb, cate_emb, ent_emb)
        self.user_encoder = UserEncoder(256)
    
    def forward(self, hist, samp, samp_ents = None, user_ents = None):
        n_batch, n_news, n_sequence = hist.shape
        n_samp = samp.shape[1] # k + 1
        n_ents = samp_ents.shape[2]
        
        hist = hist.reshape(n_batch * n_news, n_sequence)
        if user_ents is not None:
            user_ents = user_ents.reshape(n_batch * n_news, n_ents)
        h = self.news_encoder(hist, user_ents) # (n_batch*n_news, n_filter)
        h = h.reshape(n_batch, n_news, -1)  # (n_batch, n_news, n_filter)
        u = self.user_encoder(h) # (n_batch, n_filter)
        
        samp = samp.reshape(n_batch * n_samp, n_sequence)
        if samp_ents is not None:
            samp_ents = samp_ents.reshape(n_batch * n_samp, n_ents)
        r = self.news_encoder(samp, samp_ents) # (n_batch*(k+1), n_filter)
        r = r.reshape(n_batch, n_samp, -1) # (n_batch, k + 1, n_filter)
        
        y = torch.bmm(r, u.unsqueeze(2)) # (n_batch, K + 1, 1)
        return y.squeeze(2)

In [22]:
class ScaledDotProductAttention(nn.Module):
    def __init__(self, d_k):
        super().__init__()
        self.d_k = d_k

    def forward(self, Q, K, V, attn_mask=None):
        scores = torch.matmul(Q, K.transpose(-1, -2)) / np.sqrt(self.d_k)
        scores = torch.exp(scores)
        if attn_mask is not None:
            scores = scores * attn_mask
        attn = scores / (torch.sum(scores, dim=-1, keepdim=True)  + 1e-8)
        
        context = torch.matmul(attn, V)
        return context, attn

class MultiHeadSelfAttention(nn.Module):
    def __init__(self, d_model, n_heads, d_k, d_v):
        super().__init__()
        self.d_model = d_model # 300
        self.n_heads = n_heads # 20
        self.d_k = d_k # 20
        self.d_v = d_v # 20
        
        self.W_Q = nn.Linear(d_model, d_k * n_heads) # 300, 400
        self.W_K = nn.Linear(d_model, d_k * n_heads) # 300, 400
        self.W_V = nn.Linear(d_model, d_v * n_heads) # 300, 400
        
        self._initialize_weights()
                
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_uniform_(m.weight, gain=1)
                
    def forward(self, Q, K, V, attn_mask=None):
        residual, batch_size = Q, Q.size(0)
        
        q_s = self.W_Q(Q).view(batch_size, -1, self.n_heads, self.d_k).transpose(1,2)
        k_s = self.W_K(K).view(batch_size, -1, self.n_heads, self.d_k).transpose(1,2)
        v_s = self.W_V(V).view(batch_size, -1, self.n_heads, self.d_v).transpose(1,2)
        
        if attn_mask is not None:
            attn_mask = attn_mask.unsqueeze(1).expand(batch_size, max_len, max_len) 
            attn_mask = attn_mask.unsqueeze(1).repeat(1, self.n_heads, 1, 1) 
        
        context, attn = ScaledDotProductAttention(self.d_k)(q_s, k_s, v_s, attn_mask) 
        context = context.transpose(1, 2).contiguous().view(batch_size, -1, self.n_heads * self.d_v) 
        return context # (n_batch, n_seq, emb_dim)

In [36]:
class AttnNewsEncoder(nn.Module):
    def __init__(self, args, word_emb, cate_emb, ent_emb):
        super().__init__()
        self.args = args
        if 'aggr_relu' not in args:
            args['aggr_relu'] = False
        if 'ent_mode' not in args:
            args['ent_mode'] = 'attn'
        self.args = args
        news_dim, query_dim = 256, 200
        
        self.word_embedding = nn.Embedding.from_pretrained(word_emb)
        self.self_attn = MultiHeadSelfAttention(word_emb.shape[1], 16, 16, 16)
        self.addi_attn = AttentionPooling(news_dim, query_dim)
        self.dropout = nn.Dropout(0.2)
        
        out_dim = news_dim
        if args['use_ent']:
            if args['ent_emb'] == 'transe':
                self.ent_embedding = nn.Embedding.from_pretrained(ent_emb)
            if args['ent_emb'] == 'random':
                self.ent_embedding = nn.Embedding(ent_emb.shape[0], ent_emb.shape[1])
            if args['ent_emb'] == 'avg':
                self.ent_embedding = self.word_embedding
            
            if args['ent_attn'] == True:
                self.ent_transformer = MultiHeadSelfAttention(ent_emb.shape[1], 16, 16, 16)
            else:
                self.ent_fc1 = nn.Linear(ent_emb.shape[1], query_dim)
                self.ent_fc2 = nn.Linear(query_dim, news_dim)
            self.ent_attn = AttentionPooling(news_dim, query_dim)
            out_dim += news_dim
        self.aggr_fc = nn.Linear(out_dim, news_dim)
        
    
    def forward(self, news, ents = None):
        title, body, cate = news[:, :max_title], news[:, max_title: -2], news[:, -2:]
        
        t_rep = self.word_embedding(title) # (n_batch, n_seq, emb_dim)
        t_rep = self.dropout(t_rep)
        t_rep = self.self_attn(t_rep, t_rep, t_rep) # (n_batch, n_seq, 256)
        t_rep = self.addi_attn(t_rep) # (n_batch, 256)
        
        if self.args['use_ent']:
            e_rep = self.ent_embedding(ents) # (n_batch, n_ent, emb_dim)
            e_rep = self.dropout(e_rep)
            
            if self.args['ent_emb'] = 'avg':
                e_rep = torch.mean(e_rep, dim = 1)
            
            if self.args['ent_attn'] == True:
                e_rep = self.ent_transformer(e_rep, e_rep, e_rep) # (n_batch, n_ent, 256)
            else:
                e_rep = F.relu(self.ent_fc1(e_rep))
                e_rep = self.ent_fc2(e_rep) # (n_news, n_ent, news_dim)
            e_rep = self.ent_attn(e_rep) # (n_batch, 256)
            t_rep = torch.cat((t_rep, e_rep), dim = -1)
        
        r = self.aggr_fc(t_rep)
        if 'aggr_relu' in self.args and self.args['aggr_relu']:
            r = F.relu(r)
        return r # (n_news, n_filter)

class AttnUserEncoder(nn.Module):
    def __init__(self, n_head, news_dim, query_dim):
        super().__init__()
        self.self_attn = MultiHeadSelfAttention(news_dim, n_head, 16, 16)
        self.addi_attn = AttentionPooling(news_dim, query_dim)
    
    def forward(self, h): # (n_batch, n_news, 256)
        u = self.self_attn(h, h, h) # (n_batch, n_news, 256)
        u = self.addi_attn(u) # (n_batch, 256)
        return u

In [37]:
class Model(nn.Module):
    def __init__(self, args, word_emb, cate_emb, ent_emb):
        super().__init__()
        if args['model'] == 'NAML':
            self.news_encoder = ConvNewsEncoder(args, word_emb, cate_emb, ent_emb)
            self.user_encoder = UserEncoder(256)
        if args['model'] == 'NRMS':
            n_head, query_dim, news_dim = 16, 200, 256
            self.news_encoder = AttnNewsEncoder(args, word_emb, cate_emb, ent_emb)
            self.user_encoder = AttnUserEncoder(n_head, news_dim, query_dim)
    
    def forward(self, hist, samp, samp_ents = None, user_ents = None):
        n_batch, n_news, n_sequence = hist.shape
        n_samp = samp.shape[1] # k + 1
        n_ents = samp_ents.shape[2]
        
        hist = hist.reshape(n_batch * n_news, n_sequence)
        if user_ents is not None:
            user_ents = user_ents.reshape(n_batch * n_news, n_ents)
        h = self.news_encoder(hist, user_ents) # (n_batch*n_news, n_filter)
        h = h.reshape(n_batch, n_news, -1)  # (n_batch, n_news, n_filter)
        u = self.user_encoder(h) # (n_batch, n_filter)
        
        samp = samp.reshape(n_batch * n_samp, n_sequence)
        if samp_ents is not None:
            samp_ents = samp_ents.reshape(n_batch * n_samp, n_ents)
        r = self.news_encoder(samp, samp_ents) # (n_batch*(k+1), n_filter)
        r = r.reshape(n_batch, n_samp, -1) # (n_batch, k + 1, n_filter)
        
        y = torch.bmm(r, u.unsqueeze(2)) # (n_batch, K + 1, 1)
        return y.squeeze(2)

In [None]:
class EntityAttnNewsEncoder(nn.Module):
    def __init__(self, args, word_emb, cate_emb, ent_emb):
        super().__init__()
        self.args = args
        if 'aggr_relu' not in args:
            args['aggr_relu'] = False
        if 'ent_mode' not in args:
            args['ent_mode'] = 'attn'
        self.args = args
        news_dim, query_dim = 256, 200
        
        self.word_embedding = nn.Embedding.from_pretrained(word_emb)
        self.self_attn = MultiHeadSelfAttention(word_emb.shape[1], 16, 16, 16)
        self.addi_attn = AttentionPooling(news_dim, query_dim)
        self.dropout = nn.Dropout(0.2)
        
        out_dim = news_dim
        if args['use_ent']:
            if args['ent_emb'] == 'transe':
                self.ent_embedding = nn.Embedding.from_pretrained(ent_emb)
            if args['ent_emb'] == 'random':
                self.ent_embedding = nn.Embedding(ent_emb.shape[0], ent_emb.shape[1])
            if args['ent_emb'] == 'avg':
                self.ent_embedding = self.word_embedding
            
            if args['ent_attn'] == True:
                self.ent_transformer = MultiHeadSelfAttention(ent_emb.shape[1], 16, 16, 16)
            else:
                self.ent_fc1 = nn.Linear(ent_emb.shape[1], query_dim)
                self.ent_fc2 = nn.Linear(query_dim, news_dim)
            self.ent_attn = AttentionPooling(news_dim, query_dim)
            out_dim += news_dim
        self.aggr_fc = nn.Linear(out_dim, news_dim)
        
    
    def forward(self, news, ents = None):
        title, body, cate = news[:, :max_title], news[:, max_title: -2], news[:, -2:]
        
        t_rep = self.word_embedding(title) # (n_batch, n_seq, emb_dim)
        t_rep = self.dropout(t_rep)
        t_rep = self.self_attn(t_rep, t_rep, t_rep) # (n_batch, n_seq, 256)
        t_rep = self.addi_attn(t_rep) # (n_batch, 256)
        
        if self.args['use_ent']:
            e_rep = self.ent_embedding(ents) # (n_batch, n_ent, emb_dim)
            e_rep = self.dropout(e_rep)
            
            if self.args['ent_emb'] = 'avg':
                e_rep = torch.mean(e_rep, dim = 1)
            
            if self.args['ent_attn'] == True:
                e_rep = self.ent_transformer(e_rep, e_rep, e_rep) # (n_batch, n_ent, 256)
            else:
                e_rep = F.relu(self.ent_fc1(e_rep))
                e_rep = self.ent_fc2(e_rep) # (n_news, n_ent, news_dim)
            e_rep = self.ent_attn(e_rep) # (n_batch, 256)
            t_rep = torch.cat((t_rep, e_rep), dim = -1)
        
        r = self.aggr_fc(t_rep)
        if 'aggr_relu' in self.args and self.args['aggr_relu']:
            r = F.relu(r)
        return r # (n_news, n_filter)

class AttnUserEncoder(nn.Module):
    def __init__(self, n_head, news_dim, query_dim):
        super().__init__()
        self.self_attn = MultiHeadSelfAttention(news_dim, n_head, 16, 16)
        self.addi_attn = AttentionPooling(news_dim, query_dim)
    
    def forward(self, h): # (n_batch, n_news, 256)
        u = self.self_attn(h, h, h) # (n_batch, n_news, 256)
        u = self.addi_attn(u) # (n_batch, 256)
        return u

In [25]:
def train_epoch(model, train_dataset, optimizer, entrophy):
    train_losses = []
    model.train()
    for _, batch in enumerate(train_dataset):
        if batch[0].shape[0] == 0:
            break
        sample = torch.tensor(batch[0], dtype = torch.long, device = device)
        history = torch.tensor(batch[1], dtype = torch.long, device = device)
        correct = torch.argmax(torch.tensor(batch[2], dtype = torch.long, device = device), dim = 1)
        samp_ents = torch.tensor(batch[3], dtype = torch.long, device = device)
        user_ents = torch.tensor(batch[4], dtype = torch.long, device = device)
        optimizer.zero_grad()
        output = model(history, sample, samp_ents, user_ents)
        loss = entrophy(output, correct)
        train_losses.append(loss.item())
        loss.backward()
        optimizer.step()
    return np.average(train_losses)

In [26]:
def evaluate(model, dev_dataset, news_info, dev_users, dev_user_hist, news_ents = None):
    news_rep = encode_all_news(model.news_encoder, news_info, news_ents) # (65238, 400)
    user_rep = encode_all_user(model.user_encoder, dev_users, dev_user_hist, news_rep)
    
    model.eval()
    with torch.no_grad():
        auc_scores = []
        mrr_scores = []
        ndcg5_scores = []
        ndcg10_scores = []
        for i, batch in enumerate(dev_dataset):
            if len(batch[0]) == 0:
                break
            user = user_rep[i]
            for j in range(len(batch[0])):
                sample = news_rep[batch[0][j]] # (n_imp, emb_dim)
                positive = batch[2][j] # (1, n_imp)

                score = np.matmul(sample, user[j]) # (1, n_imp)
                predict = np.exp(score) / np.sum(np.exp(score))

                auc_scores.append(roc_auc_score(positive, predict))
                mrr_scores.append(mrr_score(positive, predict))
                ndcg5_scores.append(ndcg_score(positive, predict, k = 5))
                ndcg10_scores.append(ndcg_score(positive, predict, k = 10))
    return np.mean(auc_scores), np.mean(mrr_scores), np.mean(ndcg5_scores), np.mean(ndcg10_scores)

In [27]:
def train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents = None, epochs = 4):
    optimizer = optim.Adam(model.parameters(), lr = 1e-4)
    entrophy = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        begin_time = time.time()
        loss = train_epoch(model, train_dataset, optimizer, entrophy)
        auc, mrr, ndcg5, ndcg10 = evaluate(model, dev_dataset, news_info, dev_users, dev_user_hist, news_ents)
        end_time = time.time()
        print('[epoch {:d}] loss: {:.4f}, AUC: {:.4f}, MRR: {:.4f}, nDCG5:{:.4f}, nDCG10: {:.4f}, Time: {:.2f}'.format(
            epoch + 1, loss, auc, mrr, ndcg5, ndcg10, end_time - begin_time))

In [28]:
def train(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents = None, epochs = 6):
    optimizer = optim.Adam(model.parameters(), lr = 1e-4)
    entrophy = nn.CrossEntropyLoss()
    for epoch in range(epochs):
        begin_time = time.time()
        loss = train_epoch(model, train_dataset, optimizer, entrophy)
        end_time = time.time()
    auc, mrr, ndcg5, ndcg10 = evaluate(model, dev_dataset, news_info, dev_users, dev_user_hist, news_ents)
    return auc, mrr, ndcg5, ndcg10

In [167]:
def train_multi_times(args, word_emb, cate_emb, ent_emb, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents = None):
    print(args)
#     aucs = []
    for i in range(5):
        model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
        train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, args['epochs'])
        # auc, mrr, ndcg5, ndcg10 = train(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, args['epochs'])
        # print('[Test {:d}] AUC: {:.4f}, MRR: {:.4f}, nDCG5:{:.4f}, nDCG10: {:.4f}'.format(
        #     i + 1, auc, mrr, ndcg5, ndcg10))
#         aucs.append(auc)
#     print('Average AUC: {:.4f}'.format(np.average(aucs)))

In [29]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': False}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 10)

[epoch 1] loss: 1.4479, AUC: 0.6306, MRR: 0.2848, nDCG5:0.3120, nDCG10: 0.3777, Time: 478.95
[epoch 2] loss: 1.3703, AUC: 0.6320, MRR: 0.2886, nDCG5:0.3153, nDCG10: 0.3816, Time: 480.58
[epoch 3] loss: 1.3414, AUC: 0.6383, MRR: 0.2924, nDCG5:0.3187, nDCG10: 0.3854, Time: 481.62
[epoch 4] loss: 1.3241, AUC: 0.6403, MRR: 0.2980, nDCG5:0.3254, nDCG10: 0.3908, Time: 484.74
[epoch 5] loss: 1.3103, AUC: 0.6503, MRR: 0.3018, nDCG5:0.3294, nDCG10: 0.3961, Time: 482.21
[epoch 6] loss: 1.2982, AUC: 0.6410, MRR: 0.3029, nDCG5:0.3292, nDCG10: 0.3949, Time: 484.93
[epoch 7] loss: 1.2877, AUC: 0.6566, MRR: 0.3097, nDCG5:0.3409, nDCG10: 0.4046, Time: 484.04
[epoch 8] loss: 1.2786, AUC: 0.6476, MRR: 0.3003, nDCG5:0.3289, nDCG10: 0.3950, Time: 482.55
[epoch 9] loss: 1.2695, AUC: 0.6528, MRR: 0.3042, nDCG5:0.3329, nDCG10: 0.3986, Time: 480.09
[epoch 10] loss: 1.2610, AUC: 0.6513, MRR: 0.3029, nDCG5:0.3333, nDCG10: 0.3981, Time: 480.38


In [31]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_emb': 'transe'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4466, AUC: 0.6202, MRR: 0.2836, nDCG5:0.3079, nDCG10: 0.3749, Time: 566.29
[epoch 2] loss: 1.3655, AUC: 0.6376, MRR: 0.2927, nDCG5:0.3201, nDCG10: 0.3860, Time: 564.89
[epoch 3] loss: 1.3394, AUC: 0.6455, MRR: 0.2987, nDCG5:0.3278, nDCG10: 0.3921, Time: 568.10
[epoch 4] loss: 1.3205, AUC: 0.6495, MRR: 0.3011, nDCG5:0.3311, nDCG10: 0.3966, Time: 568.08
[epoch 5] loss: 1.3067, AUC: 0.6538, MRR: 0.3027, nDCG5:0.3319, nDCG10: 0.3981, Time: 567.56
[epoch 6] loss: 1.2944, AUC: 0.6428, MRR: 0.2947, nDCG5:0.3233, nDCG10: 0.3897, Time: 572.76


In [38]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_emb': 'attn'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4443, AUC: 0.6303, MRR: 0.2783, nDCG5:0.3019, nDCG10: 0.3704, Time: 581.89
[epoch 2] loss: 1.3616, AUC: 0.6356, MRR: 0.2881, nDCG5:0.3135, nDCG10: 0.3804, Time: 589.09
[epoch 3] loss: 1.3303, AUC: 0.6552, MRR: 0.3049, nDCG5:0.3360, nDCG10: 0.4000, Time: 588.36
[epoch 4] loss: 1.3084, AUC: 0.6410, MRR: 0.2954, nDCG5:0.3216, nDCG10: 0.3878, Time: 589.86
[epoch 5] loss: 1.2900, AUC: 0.6473, MRR: 0.3019, nDCG5:0.3295, nDCG10: 0.3954, Time: 590.22
[epoch 6] loss: 1.2734, AUC: 0.6504, MRR: 0.3049, nDCG5:0.3342, nDCG10: 0.3987, Time: 590.02


In [39]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_emb': 'random'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4505, AUC: 0.6241, MRR: 0.2814, nDCG5:0.3071, nDCG10: 0.3747, Time: 589.30
[epoch 2] loss: 1.3641, AUC: 0.6426, MRR: 0.2946, nDCG5:0.3218, nDCG10: 0.3882, Time: 587.75
[epoch 3] loss: 1.3324, AUC: 0.6290, MRR: 0.2913, nDCG5:0.3153, nDCG10: 0.3826, Time: 589.14
[epoch 4] loss: 1.3120, AUC: 0.6495, MRR: 0.3029, nDCG5:0.3311, nDCG10: 0.3967, Time: 587.41
[epoch 5] loss: 1.2933, AUC: 0.6464, MRR: 0.3000, nDCG5:0.3283, nDCG10: 0.3946, Time: 586.67
[epoch 6] loss: 1.2774, AUC: 0.6466, MRR: 0.3030, nDCG5:0.3332, nDCG10: 0.3970, Time: 590.34


In [None]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_emb': 'avg'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

### 测试不同的 embedding 初始化方法(NRMS)

In [186]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'attn', 'aggr_relu': True, 'ent_emb': 'random'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4383, AUC: 0.6307, MRR: 0.2879, nDCG5:0.3159, nDCG10: 0.3813, Time: 564.91
[epoch 2] loss: 1.3498, AUC: 0.6422, MRR: 0.2908, nDCG5:0.3205, nDCG10: 0.3867, Time: 573.45
[epoch 3] loss: 1.3202, AUC: 0.6497, MRR: 0.3027, nDCG5:0.3317, nDCG10: 0.3974, Time: 571.45
[epoch 4] loss: 1.3012, AUC: 0.6488, MRR: 0.2948, nDCG5:0.3244, nDCG10: 0.3907, Time: 572.53
[epoch 5] loss: 1.2878, AUC: 0.6608, MRR: 0.3052, nDCG5:0.3377, nDCG10: 0.4038, Time: 573.79
[epoch 6] loss: 1.2744, AUC: 0.6622, MRR: 0.3066, nDCG5:0.3401, nDCG10: 0.4049, Time: 573.79
[epoch 1] loss: 1.4393, AUC: 0.6337, MRR: 0.2896, nDCG5:0.3184, nDCG10: 0.3840, Time: 575.59
[epoch 2] loss: 1.3492, AUC: 0.6451, MRR: 0.2995, nDCG5:0.3275, nDCG10: 0.3941, Time: 571.59
[epoch 3] loss: 1.3194, AUC: 0.6522, MRR: 0.3099, nDCG5:0.3383, nDCG10: 0.4034, Time: 572.29
[epoch 4] loss: 1.3000, AUC: 0.6576, MRR: 0.3120, nDCG5:0.3419, nDCG10: 0.4071, Time: 570.98
[epoch 5] loss: 1.2860, AUC: 0.6518, MRR: 0.3105, nDCG5:0.3404, nDCG10

In [187]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'attn', 'aggr_relu': False, 'ent_emb': 'random'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4468, AUC: 0.6300, MRR: 0.2870, nDCG5:0.3126, nDCG10: 0.3793, Time: 570.09
[epoch 2] loss: 1.3693, AUC: 0.6439, MRR: 0.2937, nDCG5:0.3236, nDCG10: 0.3900, Time: 572.46
[epoch 3] loss: 1.3407, AUC: 0.6430, MRR: 0.2966, nDCG5:0.3260, nDCG10: 0.3919, Time: 569.42
[epoch 4] loss: 1.3203, AUC: 0.6524, MRR: 0.3023, nDCG5:0.3328, nDCG10: 0.3975, Time: 570.49
[epoch 5] loss: 1.3057, AUC: 0.6602, MRR: 0.3106, nDCG5:0.3431, nDCG10: 0.4068, Time: 570.44
[epoch 6] loss: 1.2928, AUC: 0.6680, MRR: 0.3108, nDCG5:0.3452, nDCG10: 0.4086, Time: 565.02
[epoch 1] loss: 1.4494, AUC: 0.6320, MRR: 0.2927, nDCG5:0.3192, nDCG10: 0.3854, Time: 569.70
[epoch 2] loss: 1.3692, AUC: 0.6327, MRR: 0.2901, nDCG5:0.3180, nDCG10: 0.3835, Time: 571.21
[epoch 3] loss: 1.3394, AUC: 0.6456, MRR: 0.2956, nDCG5:0.3239, nDCG10: 0.3899, Time: 571.37
[epoch 4] loss: 1.3208, AUC: 0.6535, MRR: 0.3028, nDCG5:0.3336, nDCG10: 0.3987, Time: 568.18
[epoch 5] loss: 1.3053, AUC: 0.6602, MRR: 0.3060, nDCG5:0.3369, nDCG10

In [188]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'fc', 'aggr_relu': True, 'ent_emb': 'random'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4483, AUC: 0.6321, MRR: 0.2817, nDCG5:0.3088, nDCG10: 0.3753, Time: 526.27
[epoch 2] loss: 1.3518, AUC: 0.6484, MRR: 0.2952, nDCG5:0.3248, nDCG10: 0.3905, Time: 518.15
[epoch 3] loss: 1.3207, AUC: 0.6540, MRR: 0.2982, nDCG5:0.3281, nDCG10: 0.3943, Time: 518.08
[epoch 4] loss: 1.3013, AUC: 0.6619, MRR: 0.3081, nDCG5:0.3407, nDCG10: 0.4052, Time: 525.89
[epoch 5] loss: 1.2851, AUC: 0.6637, MRR: 0.3065, nDCG5:0.3400, nDCG10: 0.4042, Time: 528.42
[epoch 6] loss: 1.2735, AUC: 0.6640, MRR: 0.3072, nDCG5:0.3392, nDCG10: 0.4048, Time: 517.40
[epoch 1] loss: 1.4486, AUC: 0.6321, MRR: 0.2867, nDCG5:0.3160, nDCG10: 0.3810, Time: 518.23
[epoch 2] loss: 1.3504, AUC: 0.6572, MRR: 0.3052, nDCG5:0.3369, nDCG10: 0.4008, Time: 515.27
[epoch 3] loss: 1.3201, AUC: 0.6606, MRR: 0.3094, nDCG5:0.3391, nDCG10: 0.4045, Time: 522.13
[epoch 4] loss: 1.3015, AUC: 0.6686, MRR: 0.3169, nDCG5:0.3493, nDCG10: 0.4133, Time: 519.76
[epoch 5] loss: 1.2866, AUC: 0.6684, MRR: 0.3138, nDCG5:0.3478, nDCG10

In [189]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'fc', 'aggr_relu': False, 'ent_emb': 'random'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4469, AUC: 0.6396, MRR: 0.2987, nDCG5:0.3272, nDCG10: 0.3913, Time: 519.75
[epoch 2] loss: 1.3637, AUC: 0.6482, MRR: 0.3018, nDCG5:0.3312, nDCG10: 0.3958, Time: 518.80
[epoch 3] loss: 1.3363, AUC: 0.6477, MRR: 0.3036, nDCG5:0.3316, nDCG10: 0.3979, Time: 520.45
[epoch 4] loss: 1.3161, AUC: 0.6492, MRR: 0.3038, nDCG5:0.3316, nDCG10: 0.3972, Time: 521.69
[epoch 5] loss: 1.3033, AUC: 0.6566, MRR: 0.3098, nDCG5:0.3408, nDCG10: 0.4050, Time: 518.11
[epoch 6] loss: 1.2904, AUC: 0.6599, MRR: 0.3106, nDCG5:0.3406, nDCG10: 0.4049, Time: 519.00
[epoch 1] loss: 1.4491, AUC: 0.6320, MRR: 0.2880, nDCG5:0.3152, nDCG10: 0.3815, Time: 516.30
[epoch 2] loss: 1.3665, AUC: 0.6384, MRR: 0.2915, nDCG5:0.3212, nDCG10: 0.3875, Time: 520.31
[epoch 3] loss: 1.3387, AUC: 0.6450, MRR: 0.3009, nDCG5:0.3297, nDCG10: 0.3958, Time: 517.26
[epoch 4] loss: 1.3194, AUC: 0.6489, MRR: 0.2963, nDCG5:0.3267, nDCG10: 0.3933, Time: 519.91
[epoch 5] loss: 1.3061, AUC: 0.6543, MRR: 0.3027, nDCG5:0.3348, nDCG10

### 测试 Entity 是否有用(NRMS)

In [158]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': False}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 10)

[epoch 1] loss: 1.4462, AUC: 0.6244, MRR: 0.2781, nDCG5:0.3042, nDCG10: 0.3711, Time: 485.22
[epoch 2] loss: 1.3703, AUC: 0.6389, MRR: 0.2871, nDCG5:0.3147, nDCG10: 0.3818, Time: 484.46
[epoch 3] loss: 1.3427, AUC: 0.6469, MRR: 0.2958, nDCG5:0.3274, nDCG10: 0.3926, Time: 484.29
[epoch 4] loss: 1.3245, AUC: 0.6550, MRR: 0.3018, nDCG5:0.3347, nDCG10: 0.3988, Time: 484.62
[epoch 5] loss: 1.3099, AUC: 0.6428, MRR: 0.2925, nDCG5:0.3224, nDCG10: 0.3894, Time: 484.83
[epoch 6] loss: 1.2977, AUC: 0.6503, MRR: 0.3019, nDCG5:0.3323, nDCG10: 0.3974, Time: 483.74
[epoch 7] loss: 1.2871, AUC: 0.6455, MRR: 0.2989, nDCG5:0.3286, nDCG10: 0.3938, Time: 483.92
[epoch 8] loss: 1.2766, AUC: 0.6507, MRR: 0.3005, nDCG5:0.3303, nDCG10: 0.3958, Time: 484.97
[epoch 9] loss: 1.2687, AUC: 0.6512, MRR: 0.2982, nDCG5:0.3287, nDCG10: 0.3953, Time: 484.59
[epoch 10] loss: 1.2583, AUC: 0.6500, MRR: 0.2970, nDCG5:0.3277, nDCG10: 0.3942, Time: 484.65


In [166]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': False}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 10)

[epoch 1] loss: 1.4473, AUC: 0.6210, MRR: 0.2764, nDCG5:0.3031, nDCG10: 0.3698, Time: 485.94
[epoch 2] loss: 1.3705, AUC: 0.6196, MRR: 0.2834, nDCG5:0.3087, nDCG10: 0.3749, Time: 486.81
[epoch 3] loss: 1.3410, AUC: 0.6445, MRR: 0.2940, nDCG5:0.3233, nDCG10: 0.3896, Time: 487.56
[epoch 4] loss: 1.3232, AUC: 0.6443, MRR: 0.2998, nDCG5:0.3278, nDCG10: 0.3935, Time: 486.07
[epoch 5] loss: 1.3092, AUC: 0.6544, MRR: 0.3032, nDCG5:0.3342, nDCG10: 0.3982, Time: 486.22
[epoch 6] loss: 1.2972, AUC: 0.6581, MRR: 0.3061, nDCG5:0.3373, nDCG10: 0.4022, Time: 486.36
[epoch 7] loss: 1.2866, AUC: 0.6562, MRR: 0.3093, nDCG5:0.3404, nDCG10: 0.4047, Time: 486.24
[epoch 8] loss: 1.2764, AUC: 0.6580, MRR: 0.3114, nDCG5:0.3402, nDCG10: 0.4060, Time: 487.55
[epoch 9] loss: 1.2673, AUC: 0.6537, MRR: 0.3024, nDCG5:0.3329, nDCG10: 0.3982, Time: 486.88
[epoch 10] loss: 1.2596, AUC: 0.6610, MRR: 0.3096, nDCG5:0.3404, nDCG10: 0.4054, Time: 486.40


In [170]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': False}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4513, AUC: 0.6293, MRR: 0.2851, nDCG5:0.3109, nDCG10: 0.3767, Time: 480.09
[epoch 2] loss: 1.3717, AUC: 0.6458, MRR: 0.2973, nDCG5:0.3254, nDCG10: 0.3909, Time: 485.01
[epoch 3] loss: 1.3435, AUC: 0.6546, MRR: 0.3038, nDCG5:0.3337, nDCG10: 0.3987, Time: 484.68
[epoch 4] loss: 1.3247, AUC: 0.6538, MRR: 0.3026, nDCG5:0.3310, nDCG10: 0.3976, Time: 485.80
[epoch 5] loss: 1.3103, AUC: 0.6576, MRR: 0.3040, nDCG5:0.3357, nDCG10: 0.4002, Time: 484.26
[epoch 6] loss: 1.2990, AUC: 0.6563, MRR: 0.2997, nDCG5:0.3293, nDCG10: 0.3962, Time: 485.17


In [171]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'attn'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4479, AUC: 0.6290, MRR: 0.2898, nDCG5:0.3153, nDCG10: 0.3816, Time: 569.57
[epoch 2] loss: 1.3694, AUC: 0.6255, MRR: 0.2917, nDCG5:0.3163, nDCG10: 0.3826, Time: 567.59
[epoch 3] loss: 1.3412, AUC: 0.6560, MRR: 0.3084, nDCG5:0.3414, nDCG10: 0.4042, Time: 566.85
[epoch 4] loss: 1.3226, AUC: 0.6552, MRR: 0.3036, nDCG5:0.3345, nDCG10: 0.4002, Time: 569.53
[epoch 5] loss: 1.3082, AUC: 0.6549, MRR: 0.3006, nDCG5:0.3327, nDCG10: 0.3981, Time: 568.60
[epoch 6] loss: 1.2942, AUC: 0.6432, MRR: 0.2974, nDCG5:0.3246, nDCG10: 0.3913, Time: 567.74
[epoch 1] loss: 1.4501, AUC: 0.6422, MRR: 0.2979, nDCG5:0.3296, nDCG10: 0.3927, Time: 566.12
[epoch 2] loss: 1.3709, AUC: 0.6418, MRR: 0.2978, nDCG5:0.3264, nDCG10: 0.3920, Time: 567.52
[epoch 3] loss: 1.3415, AUC: 0.6510, MRR: 0.3000, nDCG5:0.3310, nDCG10: 0.3967, Time: 567.21
[epoch 4] loss: 1.3224, AUC: 0.6540, MRR: 0.2971, nDCG5:0.3269, nDCG10: 0.3938, Time: 568.71
[epoch 5] loss: 1.3082, AUC: 0.6586, MRR: 0.3035, nDCG5:0.3335, nDCG10

In [172]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'fc'}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4486, AUC: 0.6269, MRR: 0.2849, nDCG5:0.3123, nDCG10: 0.3787, Time: 509.30
[epoch 2] loss: 1.3667, AUC: 0.6385, MRR: 0.2924, nDCG5:0.3214, nDCG10: 0.3871, Time: 516.36
[epoch 3] loss: 1.3372, AUC: 0.6512, MRR: 0.3034, nDCG5:0.3346, nDCG10: 0.3989, Time: 515.64
[epoch 4] loss: 1.3199, AUC: 0.6518, MRR: 0.3028, nDCG5:0.3351, nDCG10: 0.3986, Time: 517.65
[epoch 5] loss: 1.3049, AUC: 0.6571, MRR: 0.3065, nDCG5:0.3385, nDCG10: 0.4022, Time: 517.81
[epoch 6] loss: 1.2934, AUC: 0.6576, MRR: 0.3074, nDCG5:0.3406, nDCG10: 0.4050, Time: 516.86


In [173]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'attn', 'aggr_relu': True}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4433, AUC: 0.6283, MRR: 0.2862, nDCG5:0.3131, nDCG10: 0.3795, Time: 566.94
[epoch 2] loss: 1.3491, AUC: 0.6433, MRR: 0.2923, nDCG5:0.3192, nDCG10: 0.3863, Time: 566.96
[epoch 3] loss: 1.3194, AUC: 0.6612, MRR: 0.3052, nDCG5:0.3363, nDCG10: 0.4020, Time: 567.34
[epoch 4] loss: 1.3004, AUC: 0.6636, MRR: 0.3088, nDCG5:0.3410, nDCG10: 0.4058, Time: 568.53
[epoch 5] loss: 1.2865, AUC: 0.6659, MRR: 0.3082, nDCG5:0.3417, nDCG10: 0.4067, Time: 567.39
[epoch 6] loss: 1.2742, AUC: 0.6663, MRR: 0.3073, nDCG5:0.3394, nDCG10: 0.4052, Time: 567.37


In [175]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'attn', 'aggr_relu': True}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4382, AUC: 0.6307, MRR: 0.2836, nDCG5:0.3103, nDCG10: 0.3773, Time: 560.73
[epoch 2] loss: 1.3482, AUC: 0.6526, MRR: 0.2954, nDCG5:0.3248, nDCG10: 0.3918, Time: 567.96
[epoch 3] loss: 1.3192, AUC: 0.6546, MRR: 0.3027, nDCG5:0.3327, nDCG10: 0.3986, Time: 569.99
[epoch 4] loss: 1.3007, AUC: 0.6559, MRR: 0.3056, nDCG5:0.3349, nDCG10: 0.4010, Time: 568.43
[epoch 5] loss: 1.2858, AUC: 0.6691, MRR: 0.3123, nDCG5:0.3451, nDCG10: 0.4098, Time: 568.84
[epoch 6] loss: 1.2731, AUC: 0.6643, MRR: 0.3047, nDCG5:0.3366, nDCG10: 0.4027, Time: 567.93


In [176]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'attn', 'aggr_relu': True}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4404, AUC: 0.6312, MRR: 0.2802, nDCG5:0.3047, nDCG10: 0.3733, Time: 572.46
[epoch 2] loss: 1.3500, AUC: 0.6380, MRR: 0.2931, nDCG5:0.3191, nDCG10: 0.3870, Time: 568.53
[epoch 3] loss: 1.3191, AUC: 0.6503, MRR: 0.3043, nDCG5:0.3330, nDCG10: 0.3989, Time: 568.45
[epoch 4] loss: 1.2999, AUC: 0.6539, MRR: 0.3026, nDCG5:0.3330, nDCG10: 0.3984, Time: 571.66
[epoch 5] loss: 1.2854, AUC: 0.6600, MRR: 0.3100, nDCG5:0.3421, nDCG10: 0.4060, Time: 582.24
[epoch 6] loss: 1.2726, AUC: 0.6662, MRR: 0.3164, nDCG5:0.3483, nDCG10: 0.4124, Time: 583.32


In [177]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'fc', 'aggr_relu': True}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4461, AUC: 0.6327, MRR: 0.2894, nDCG5:0.3157, nDCG10: 0.3806, Time: 526.69
[epoch 2] loss: 1.3510, AUC: 0.6542, MRR: 0.3014, nDCG5:0.3303, nDCG10: 0.3962, Time: 523.70
[epoch 3] loss: 1.3196, AUC: 0.6542, MRR: 0.3040, nDCG5:0.3319, nDCG10: 0.3988, Time: 524.60
[epoch 4] loss: 1.3010, AUC: 0.6602, MRR: 0.3119, nDCG5:0.3434, nDCG10: 0.4081, Time: 530.13
[epoch 5] loss: 1.2865, AUC: 0.6686, MRR: 0.3166, nDCG5:0.3473, nDCG10: 0.4125, Time: 525.99
[epoch 6] loss: 1.2749, AUC: 0.6702, MRR: 0.3169, nDCG5:0.3499, nDCG10: 0.4135, Time: 525.86


In [178]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'fc', 'aggr_relu': True}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4525, AUC: 0.6376, MRR: 0.2898, nDCG5:0.3185, nDCG10: 0.3840, Time: 532.85
[epoch 2] loss: 1.3550, AUC: 0.6603, MRR: 0.3051, nDCG5:0.3378, nDCG10: 0.4016, Time: 519.85
[epoch 3] loss: 1.3225, AUC: 0.6603, MRR: 0.3096, nDCG5:0.3432, nDCG10: 0.4056, Time: 521.37
[epoch 4] loss: 1.3034, AUC: 0.6679, MRR: 0.3104, nDCG5:0.3430, nDCG10: 0.4076, Time: 523.43
[epoch 5] loss: 1.2879, AUC: 0.6694, MRR: 0.3111, nDCG5:0.3450, nDCG10: 0.4098, Time: 520.53
[epoch 6] loss: 1.2759, AUC: 0.6674, MRR: 0.3141, nDCG5:0.3474, nDCG10: 0.4110, Time: 521.86


In [179]:
torch.save(model.state_dict(), './model_fc_1')

In [180]:
# 15 epochs
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 10)

[epoch 1] loss: 1.2649, AUC: 0.6683, MRR: 0.3124, nDCG5:0.3449, nDCG10: 0.4102, Time: 521.29
[epoch 2] loss: 1.2547, AUC: 0.6744, MRR: 0.3138, nDCG5:0.3485, nDCG10: 0.4129, Time: 522.47
[epoch 3] loss: 1.2456, AUC: 0.6624, MRR: 0.3107, nDCG5:0.3422, nDCG10: 0.4073, Time: 528.35
[epoch 4] loss: 1.2366, AUC: 0.6672, MRR: 0.3116, nDCG5:0.3454, nDCG10: 0.4100, Time: 529.81
[epoch 5] loss: 1.2284, AUC: 0.6636, MRR: 0.3109, nDCG5:0.3441, nDCG10: 0.4087, Time: 522.38
[epoch 6] loss: 1.2193, AUC: 0.6683, MRR: 0.3132, nDCG5:0.3476, nDCG10: 0.4116, Time: 525.01
[epoch 7] loss: 1.2103, AUC: 0.6690, MRR: 0.3134, nDCG5:0.3476, nDCG10: 0.4116, Time: 523.91
[epoch 8] loss: 1.2025, AUC: 0.6627, MRR: 0.3104, nDCG5:0.3442, nDCG10: 0.4077, Time: 523.42
[epoch 9] loss: 1.1944, AUC: 0.6628, MRR: 0.3128, nDCG5:0.3445, nDCG10: 0.4087, Time: 522.24
[epoch 10] loss: 1.1860, AUC: 0.6607, MRR: 0.3091, nDCG5:0.3411, nDCG10: 0.4059, Time: 523.50


In [181]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'fc', 'aggr_relu': True}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4529, AUC: 0.6212, MRR: 0.2762, nDCG5:0.3016, nDCG10: 0.3685, Time: 521.64
[epoch 2] loss: 1.3539, AUC: 0.6389, MRR: 0.2938, nDCG5:0.3213, nDCG10: 0.3874, Time: 524.88
[epoch 3] loss: 1.3215, AUC: 0.6481, MRR: 0.2973, nDCG5:0.3251, nDCG10: 0.3930, Time: 522.15
[epoch 4] loss: 1.3020, AUC: 0.6558, MRR: 0.3024, nDCG5:0.3322, nDCG10: 0.4000, Time: 527.08
[epoch 5] loss: 1.2874, AUC: 0.6609, MRR: 0.3088, nDCG5:0.3411, nDCG10: 0.4066, Time: 522.00
[epoch 6] loss: 1.2751, AUC: 0.6602, MRR: 0.3066, nDCG5:0.3400, nDCG10: 0.4044, Time: 522.67


In [182]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'fc', 'aggr_relu': True}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4462, AUC: 0.6424, MRR: 0.3003, nDCG5:0.3286, nDCG10: 0.3923, Time: 519.75
[epoch 2] loss: 1.3490, AUC: 0.6495, MRR: 0.3036, nDCG5:0.3329, nDCG10: 0.3973, Time: 533.19
[epoch 3] loss: 1.3191, AUC: 0.6583, MRR: 0.3118, nDCG5:0.3424, nDCG10: 0.4066, Time: 524.70
[epoch 4] loss: 1.3003, AUC: 0.6609, MRR: 0.3135, nDCG5:0.3452, nDCG10: 0.4089, Time: 523.71
[epoch 5] loss: 1.2852, AUC: 0.6637, MRR: 0.3144, nDCG5:0.3451, nDCG10: 0.4096, Time: 523.43
[epoch 6] loss: 1.2731, AUC: 0.6598, MRR: 0.3148, nDCG5:0.3452, nDCG10: 0.4089, Time: 524.72


In [183]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'attn', 'aggr_relu': True}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4413, AUC: 0.6416, MRR: 0.2915, nDCG5:0.3208, nDCG10: 0.3857, Time: 566.27
[epoch 2] loss: 1.3495, AUC: 0.6490, MRR: 0.2962, nDCG5:0.3278, nDCG10: 0.3923, Time: 573.34
[epoch 3] loss: 1.3186, AUC: 0.6588, MRR: 0.3041, nDCG5:0.3359, nDCG10: 0.4009, Time: 570.66
[epoch 4] loss: 1.2995, AUC: 0.6578, MRR: 0.3022, nDCG5:0.3327, nDCG10: 0.3988, Time: 575.35
[epoch 5] loss: 1.2858, AUC: 0.6691, MRR: 0.3087, nDCG5:0.3429, nDCG10: 0.4077, Time: 574.15
[epoch 6] loss: 1.2736, AUC: 0.6662, MRR: 0.3060, nDCG5:0.3396, nDCG10: 0.4053, Time: 577.89


In [184]:
args = {'model': 'NRMS', 'epochs': 6,
        'use_ent': True, 'ent_mode': 'attn', 'aggr_relu': True}
model = Model(args, word_emb, cate_emb, ent_emb).to('cuda')
train_and_eval(model, train_dataset, dev_dataset, news_info, dev_users, dev_user_hist, news_ents, 6)

[epoch 1] loss: 1.4420, AUC: 0.6346, MRR: 0.2888, nDCG5:0.3170, nDCG10: 0.3821, Time: 581.08
[epoch 2] loss: 1.3489, AUC: 0.6503, MRR: 0.2978, nDCG5:0.3272, nDCG10: 0.3931, Time: 573.56
[epoch 3] loss: 1.3184, AUC: 0.6537, MRR: 0.3010, nDCG5:0.3315, nDCG10: 0.3963, Time: 581.44
[epoch 4] loss: 1.2998, AUC: 0.6575, MRR: 0.3058, nDCG5:0.3354, nDCG10: 0.4017, Time: 574.55
[epoch 5] loss: 1.2850, AUC: 0.6654, MRR: 0.3060, nDCG5:0.3384, nDCG10: 0.4040, Time: 576.96
[epoch 6] loss: 1.2726, AUC: 0.6634, MRR: 0.3119, nDCG5:0.3435, nDCG10: 0.4088, Time: 575.09
