#### 三方库的导入

In [1]:
import math
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.utils import dropout_adj
from torch.nn import Parameter
import pdb
import time
from torch_geometric.nn.conv import MessagePassing
from torch_geometric.utils import remove_self_loops, add_self_loops, softmax, degree, dropout_adj
from torch_geometric.nn.inits import uniform

In [2]:
import random

def set_seed(seed=42):
    random.seed(seed)          # Python
    np.random.seed(seed)       # NumPy
    torch.manual_seed(seed)    # PyTorch CPU
    torch.cuda.manual_seed(seed)          # PyTorch GPU（单卡）
    torch.cuda.manual_seed_all(seed)      # PyTorch GPU（多卡）
    torch.backends.cudnn.deterministic = True   # 保证卷积等算子确定性
    torch.backends.cudnn.benchmark = False      # 关闭自动算法优化

# 用法
set_seed(111)

#### 训练数据的导入

In [3]:
import numpy as np
from torch.utils.data import Dataset

'''
    自定义PyTorch数据集加载类
        继承Dataset类
        重写__len__和__getitem__方法
'''


class DataLoad(Dataset):
    def __init__(self, path):
        super(DataLoad, self).__init__()
        self.data = np.load(path+'train_user_pos_neg.npy', allow_pickle=True)

    def __getitem__(self, index):
        user, pos_item, neg_item = self.data[index]
        return [user, pos_item, neg_item]

    def __len__(self):
        return len(self.data)

### 卷积层代码
#### （1）单层卷积过程

In [4]:
class Base_gcn(MessagePassing):
    def __init__(self, in_channels, out_channels, edge_index_weight = None, normalize=True, bias=True, aggr='add', **kwargs):
        super(Base_gcn, self).__init__(aggr=aggr, **kwargs)
        self.aggr = aggr
        self.in_channels = in_channels
        self.out_channels = out_channels
        # self.edge_index_weight = kwargs['edge_index_weight']
        self.edge_index_weight = edge_index_weight

    def forward(self, x, edge_index, size=None):
        if size is None:
            edge_index, _ = remove_self_loops(edge_index)     # 剔除自环边
        x = x.unsqueeze(-1) if x.dim() == 1 else x
        return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x, edge_attr=self.edge_index_weight)

    def message(self, x_j, edge_index, size, edge_attr):
        return torch.mul(x_j, edge_attr.view(-1, 1))

    def update(self, aggr_out):
        return aggr_out

    def __repr(self):
        return '{}({},{})'.format(self.__class__.__name__, self.in_channels, self.out_channels)

#### （2）多层卷积处理

In [5]:
class GCNs(torch.nn.Module):
    def __init__(self, edge_index, num_user, num_item, dim_id, num_layers, log_base, dim_latent=None, device=None, data_path='./'):
        super(GCNs, self).__init__()
        self.edge_index = edge_index
        self.num_user = num_user
        self.num_item = num_item
        self.dim_id = dim_id
        self.dim_latent = dim_latent
        self.device = device
        self.data_path = data_path
        self.num_layers = num_layers
        self.log_base = log_base
        self.conv_embeds = []               # 卷积层对象组成的列表，储存三层卷积操作对象
        self.edge_index_weight_item = None
        self.edge_index_weight_user = None
        self.edge_index_weight_USER = None
        self.edge_index_weight_item = np.load(self.data_path + '/train_log_i.npy', allow_pickle=True)
        self.change_base()
        self.edge_index_weight_item = torch.tensor(self.edge_index_weight_item, dtype=torch.float)
        self.edge_index_weight = torch.cat((self.edge_index_weight_item, self.edge_index_weight_item)).to(self.device)

        if self.dim_latent:
            self.embedding_user = torch.nn.Embedding(
                num_embeddings=self.num_user, embedding_dim=self.dim_latent)
            self.embedding_item = torch.nn.Embedding(
                num_embeddings=self.num_item, embedding_dim=self.dim_latent)
            nn.init.xavier_uniform_(self.embedding_user.weight, gain=1)
            nn.init.xavier_uniform_(self.embedding_item.weight, gain=1)

            self.conv_embed = Base_gcn(self.dim_latent, self.dim_latent, aggr='mean', edge_index_weight=self.edge_index_weight)
    
    def change_base(self):
        if self.log_base != 1:
            # 对log_(i)进行对数函数的换底
            if self.edge_index_weight_item is not None:
                self.edge_index_weight_item = self.edge_index_weight_item - 1
                base = np.log(self.log_base*math.e)
                self.edge_index_weight_item = self.edge_index_weight_item / base
                self.edge_index_weight_item = self.edge_index_weight_item + 1
            # 对log_(u)进行对数函数的换底
            if self.edge_index_weight_user is not None:
                base = np.log(self.log_base*math.e)
                self.edge_index_weight_user = self.edge_index_weight_user / base
            # 对log_(U)进行对数函数的换底
            if self.edge_index_weight_USER is not None:
                base = np.log(self.log_base*math.e)
                self.edge_index_weight_USER = self.edge_index_weight_USER / base
    
    
    def forward(self):
        users_emb = self.embedding_user.weight
        items_emb = self.embedding_item.weight
        all_emb = torch.cat([users_emb, items_emb])     
        embs = [all_emb]
        for layer in range(self.num_layers):
            all_emb = self.conv_embed(all_emb, self.edge_index)
            embs.append(all_emb)
        embs = torch.stack(embs, dim=1)
        light_out = torch.mean(embs, dim=1)

        return light_out, self.embedding_user, self.embedding_item


### 指标计算代码

In [6]:
def getLabel(test_data, pred_data):
    r = []
    for i in range(len(test_data)):
        groundTrue = test_data[i]
        predictTopK = pred_data[i]
        pred = list(map(lambda x: x in groundTrue, predictTopK))      # 两者同时存在的元素即为命中元素
        pred = np.array(pred).astype("float")
        r.append(pred)
    return np.array(r).astype('float')

def RecallPrecision_ATk(test_data, r, k):
    """
    test_data should be a list? cause users may have different amount of pos items. shape (test_batch, k)
    pred_data : shape (test_batch, k) NOTE: pred_data should be pre-sorted
    k : top-k
    """
    right_pred = r[:, :k].sum(1)       # 命中个数
    precis_n = k
    recall_n = np.array([len(test_data[i]) for i in range(len(test_data))])
    recall = np.sum(right_pred/recall_n)
    precis = np.sum(right_pred)/precis_n
    return {'recall': recall, 'precise': precis}

def NDCGatK_r(test_data,r,k):
    """
    Normalized Discounted Cumulative Gain
    rel_i = 1 or 0, so 2^{rel_i} - 1 = 1 or 0
    """
    assert len(r) == len(test_data)
    pred_data = r[:, :k]

    test_matrix = np.zeros((len(pred_data), k))
    for i, items in enumerate(test_data):
        length = k if k <= len(items) else len(items)
        test_matrix[i, :length] = 1
    max_r = test_matrix
    idcg = np.sum(max_r * 1./np.log2(np.arange(2, k + 2)), axis=1)
    dcg = pred_data*(1./np.log2(np.arange(2, k + 2)))
    dcg = np.sum(dcg, axis=1)
    idcg[idcg == 0.] = 1.
    ndcg = dcg/idcg
    ndcg[np.isnan(ndcg)] = 0.
    return np.sum(ndcg)

def test_one_batch( X):
    sorted_items = X[0].numpy()
    groundTrue = X[1]
    r = getLabel(groundTrue, sorted_items)           # groundTrue为测试的真实标签，sorted_items为预测标签
    pre, recall, ndcg = [], [], []
    for k in [5,10,15,20]:
        ret = RecallPrecision_ATk(groundTrue, r, k)
        pre.append(ret['precise'])
        recall.append(ret['recall'])
        ndcg.append(NDCGatK_r(groundTrue,r,k))
    return {'recall':np.array(recall), 
            'precise':np.array(pre), 
            'ndcg':np.array(ndcg)}

#### BoostGCN模型代码

In [7]:
import multiprocessing

class BoostGCN(torch.nn.Module):
    def __init__(self, edge_index, batch_size, num_user, num_item, num_neg, dim_x, reg_weight, num_layers, log_base, device=None, data_path='./'):
        super(BoostGCN, self).__init__()
        self.batch_size = batch_size
        self.num_user = num_user
        self.num_item = num_item
        self.reg_weight = reg_weight
        self.num_layers = num_layers
        self.log_base = log_base
        self.device = device
        self.data_path = data_path
        self.dim_latent = dim_x
        self.CORES = multiprocessing.cpu_count() // 2
        
        self.edge_index = torch.tensor(edge_index, dtype=torch.int64).t().contiguous().to(self.device)       # 转置并使其在内存中连续存储
        self.edge_index = torch.cat((self.edge_index, self.edge_index[[1, 0]]), dim=1).to(self.device)      # (2, num_edge*2)   edge_index[0]source结点

        self.gcns = GCNs(self.edge_index, num_user, num_item, dim_x, num_layers=self.num_layers, log_base=self.log_base, dim_latent=self.dim_latent, device=self.device, data_path=self.data_path)
        self.id_embedding = nn.init.xavier_normal_(torch.rand((num_user + num_item, dim_x), requires_grad=True)).to(self.device)
        self.result_embed = nn.init.xavier_normal_(torch.rand((num_user + num_item, self.dim_latent))).to(self.device)

    def forward(self, user_nodes, pos_item_nodes, neg_item_nodes):
        
        representation, users_emb, items_emb = self.gcns()
        item_rep = representation[self.num_user:]
        user_rep = representation[:self.num_user]
        
        self.result_embed = torch.cat((user_rep, item_rep), dim=0)
        user_tensor = self.result_embed[user_nodes]
        pos_item_tensor = self.result_embed[pos_item_nodes]
        neg_item_tensor = self.result_embed[neg_item_nodes]
        pos_scores = torch.sum(user_tensor * pos_item_tensor, dim=1)
        neg_scores = torch.sum(user_tensor * neg_item_tensor, dim=1)
        return pos_scores, neg_scores, users_emb, items_emb

    def loss(self, data):
        user, pos_items, neg_items = data
        pos_scores, neg_scores, users_emb, items_emb = self.forward(user.to(self.device), pos_items.to(self.device), neg_items.to(self.device))
        loss_value = -torch.mean(torch.log2(torch.sigmoid(pos_scores - neg_scores)))
        userEmb = users_emb(user.to(self.device))
        posEmb = items_emb((pos_items - self.num_user).to(self.device))
        negEmb = items_emb((neg_items - self.num_user).to(self.device))
        reg_loss = (1 / 2) * (userEmb.norm(2).pow(2) + posEmb.norm(2).pow(2) + negEmb.norm(2).pow(2)) / float(len(user))
        reg_loss = self.reg_weight * (reg_loss)
        return loss_value + reg_loss

    def minibatch(self, tensors, **kwargs):

        batch_size = kwargs.get('batch_size', 512)   #若未赋值则默认为512

        if len(tensors) == 1:
            tensor = tensors[0]
            for i in range(0, len(tensor), batch_size):
                yield tensor[i:i + batch_size]
        else:
            for i in range(0, len(tensors), batch_size):
                yield tensors[i:i + batch_size]
                
    def getUserPosItems(self, users):
        posItems = []
        for user in users:
            posItems.append(self.UserItemNet[user].nonzero()[1]) # 在用户项目稀疏交互矩阵中，取出用户的已交互项目的索引，返回的是一个一维的 numpy 数组
        return posItems

    def accuracy(self, dataset, num_neg, batch_size=512, multicore=0, topks=[5,10,15,20]):
        max_K = max(topks)                    # 取最大的topk值
        if multicore == 1:                    # 是否开启多线程
            pool = multiprocessing.Pool(self.CORES)
        # 初始化各指标数值
        results = {'precise': np.zeros(len(topks)), 
                   'recall': np.zeros(len(topks)), 
                   'ndcg': np.zeros(len(topks))
                  }
        users = list(range(len(dataset)))
        try:
            assert batch_size <= len(users) / 10
        except AssertionError:
            print(f"test_u_batch_size is too big for this dataset, try a small one {len(users) // 10}")
            
        users_list = []
        rating_list = []
        groundTrue_list = []
        total_batch = len(users) // batch_size + 1
        bar = tqdm(total=total_batch)
        for batch_users in self.minibatch(users, batch_size=batch_size):
            bar.update(1)
            # 取出每个用户除测试/验证集正负样本之外的所有项目，返回的是一个一维的 numpy 数组
            except_items = []
            for user in batch_users:
                # 需要计算交互的项目的索引位置
                test_items = np.array(dataset[user][1:])-self.num_user
                except_items.append(test_items)
            # 取出测试集用户交互项目(嵌套列表)   
            groundTrue = [list(np.array(dataset[user][1:-num_neg])-self.num_user) for user in batch_users]
            batch_users_gpu = torch.Tensor(batch_users).long()
            batch_users_gpu = batch_users_gpu.to(self.device)
            # 获得批量用户和所有项目的embedding(多层聚合后的)交互得分
            rating = torch.matmul(self.result_embed[batch_users_gpu], self.result_embed[self.num_user:].t()).to(self.device)
                   
            for index in range(len(except_items)):
                rating[index][list(except_items[index])] += 10240     # 将测试集中的交互得分统一加上10240
            _, rating_K = torch.topk(rating, k=max_K)
            rating = rating.detach().cpu().numpy()
            del rating
            users_list.append(batch_users)
            rating_list.append(rating_K.cpu())
            groundTrue_list.append(groundTrue)
        assert total_batch == len(users_list)
        X = zip(rating_list, groundTrue_list)
        if multicore == 1:
            pre_results = pool.map(test_one_batch, X)
        else:
            pre_results = []
            for x in X:
                pre_results.append(test_one_batch(x))
        for result in pre_results:
            results['recall'] += result['recall']
            results['precise'] += result['precise']
            results['ndcg'] += result['ndcg']
        results['recall'] /= float(len(users))
        results['precise'] /= float(len(users))
        results['ndcg'] /= float(len(users))
        if multicore == 1:
            pool.close()
        bar.close()
        return results 
 
    def accuracy_full_neg(self, dataset, len_datas, batch_size=512, multicore=0, topks=[5,10,15,20]):         # topk = 10, neg_num=1000  
        max_K = max(topks)                    # 取最大的topk值
        if multicore == 1:                    # 是否开启多线程
            pool = multiprocessing.Pool(self.CORES)
        # 初始化各指标数值
        results = {'precise': np.zeros(len(topks)), 
                   'recall': np.zeros(len(topks)), 
                   'ndcg': np.zeros(len(topks))
                  }
        users = list(range(len(dataset)))
        try:
            assert batch_size <= len(users) / 10
        except AssertionError:
            print(f"test_u_batch_size is too big for this dataset, try a small one {len(users) // 10}")
            
        users_list = []
        rating_list = []
        groundTrue_list = []
        total_batch = len(users) // batch_size + 1
        bar = tqdm(total=total_batch)
        for batch_users in self.minibatch(users, batch_size=batch_size):
            bar.update(1)
            # 取出每个用户除测试/验证集正负样本之外的所有项目，返回的是一个一维的 numpy 数组            
            except_items = []
            for user in batch_users:
                all_items = np.arange(self.num_item)
                # 需要删除的索引位置
                test_items = np.array(dataset[user][1:])-self.num_user
                # 创建布尔掩码，表示保留哪些索引位置的元素
                mask = np.ones(len(all_items), dtype=bool)
                mask[test_items] = False
                # 使用布尔掩码来筛选数组
                save_items = all_items[mask]
                except_items.append(save_items)          
                        
            # 取出测试集用户交互项目(嵌套列表)   
            groundTrue = [list(np.array(dataset[user][1:len_datas[user]+1])-self.num_user) for user in batch_users]
            batch_users_gpu = torch.Tensor(batch_users).long()
            batch_users_gpu = batch_users_gpu.to(self.device)
            # 获得批量用户和所有项目的embedding(多层聚合后的)交互得分
            rating = torch.matmul(self.result_embed[batch_users_gpu], self.result_embed[self.num_user:].t()).to(self.device)

            # rating[exclude_index, exclude_items] = -(1<<10)           # 将测试集之外的交互得分设置为-1024
            for index in range(len(except_items)):
                rating[index][list(except_items[index])] = -10240
            _, rating_K = torch.topk(rating, k=max_K)
            rating = rating.detach().cpu().numpy()
            # rating = rating.detach().numpy()
            del rating
            users_list.append(batch_users)
            rating_list.append(rating_K.cpu())
            groundTrue_list.append(groundTrue)
        assert total_batch == len(users_list)
        X = zip(rating_list, groundTrue_list)
        if multicore == 1:
            pre_results = pool.map(test_one_batch, X)
        else:
            pre_results = []
            for x in X:
                pre_results.append(test_one_batch(x))
        for result in pre_results:
            results['recall'] += result['recall']
            results['precise'] += result['precise']
            results['ndcg'] += result['ndcg']
        results['recall'] /= float(len(users))
        results['precise'] /= float(len(users))
        results['ndcg'] /= float(len(users))
        if multicore == 1:
            pool.close()
        bar.close()
        return results

#### 模型训练

In [8]:
import argparse
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader


class Net:
    def __init__(self):
        self.device = torch.device("cuda:0")                 # 使用gpu设备cuda:0
        self.data_path = '/Data/yelp2018/'                       # 数据集路径
        self.learning_rate = 0.001                           # 学习率
        self.weight_decay = 1e-6                             # 权重衰退:本质上是一个 L2正则化系数,解决过拟合问题
        self.batch_size = 1024                               # 批训练
        self.num_layers = 4                                # 图卷积层数
        self.log_base = 1                                  # 对数函数的底数(e的倍数)，用于消融实验
        self.num_workers = 2                               # 训练数据加载器DataLoader中用于数据加载的子进程数量
        self.num_epoch = 1000                              # 训练轮次
        self.num_user = 31668                                   # 用户节点数量
        self.num_item = 38048                                   # 项目节点数量
        self.num_neg = 1500
        self.PATH_weight_save = True
        self.dim_latent = 64
        self.update_epoch_num = 0                          # 记录已经多少轮模型没有得到更新
        self.save_best_model = 'best_model.pth'               # 保存验证集最佳模型
        
        print('Loading data  ...')
        # 即两种正负样本
        self.train_dataset = DataLoad(self.data_path)
        self.train_dataloader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)

        self.edge_index = np.load(self.data_path + 'train_edge_index.npy', allow_pickle=True)
        self.val_dataset = np.load(self.data_path + 'val_data.npy', allow_pickle=True)
        print('Data has been loaded.')

        self.model = BoostGCN(self.edge_index, self.batch_size, self.num_user, self.num_item, self.num_neg, self.dim_latent, self.weight_decay, self.num_layers, self.log_base, self.device, self.data_path)
        self.model = self.model.to(self.device)

        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.learning_rate, weight_decay=self.weight_decay)

    def run(self):
        max_recall = 0.
        
        df = pd.DataFrame(columns=['epoch', 'Loss', 'precise@5', 'recall@5', 'ndcg@5', 'precise@10', 'recall@10', 
                                   'ndcg@10', 'precise@15', 'recall@15', 'ndcg@15', 'precise@20', 'recall@20', 'ndcg@20', 'train_time', 'test_time'])
        df.to_csv("./val_res_BoostGCN.csv", index=False)  # 路径可以根据需要更改
        
        for epoch in range(self.num_epoch):
            if self.update_epoch_num >= 10:
                break
            self.update_epoch_num = self.update_epoch_num + 1
            self.model.train()
            sum_loss = 0.0
            start_time = time.perf_counter()   # 打印时间以秒为单位
            for data in self.train_dataloader:
                self.optimizer.zero_grad()
                self.loss = self.model.loss(data)

                self.loss.backward()
                self.optimizer.step()
                sum_loss += self.loss
            end_time = time.perf_counter()
            # 计算时间差,得到模型每轮训练时间
            train_time = end_time - start_time
            current_loss = sum_loss.item() / self.batch_size
            
            self.model.eval()
            with torch.no_grad():
                start_time = time.perf_counter()
                evaluations = self.model.accuracy(self.val_dataset, self.num_neg, multicore=1, batch_size=1024)
                end_time = time.perf_counter()
                # 计算时间差，得到指标计算时间
                test_time = end_time - start_time
            if evaluations['recall'][0] > max_recall:
                self.update_epoch_num = 0      # 标志模型更新
            print('{0}-th Loss:{1:.4f} precise@5:{2:.4f} recall@5:{3:.4f} ndcg@5:{4:.4f} precise@10:{5:.4f} recall@10:{6:.4f} ndcg@10:{7:.4f} precise@15:{8:.4f} recall@15:{9:.4f} ndcg@15:{10:.4f} precise@20:{11:.4f} recall@20:{12:.4f} ndcg@20:{13:.4f}'.format(epoch, current_loss, 
            evaluations['precise'][0], evaluations['recall'][0], evaluations['ndcg'][0], 
            evaluations['precise'][1], evaluations['recall'][1], evaluations['ndcg'][1], 
            evaluations['precise'][2], evaluations['recall'][2], evaluations['ndcg'][2], 
            evaluations['precise'][3], evaluations['recall'][3], evaluations['ndcg'][3]))

            list = [epoch, current_loss, evaluations['precise'][0], evaluations['recall'][0], evaluations['ndcg'][0], 
                    evaluations['precise'][1], evaluations['recall'][1], evaluations['ndcg'][1], 
                    evaluations['precise'][2], evaluations['recall'][2], evaluations['ndcg'][2], 
                    evaluations['precise'][3], evaluations['recall'][3], evaluations['ndcg'][3], train_time, test_time]

            # 由于DataFrame是Pandas库中的一种数据结构，它类似excel，是一种二维表，所以需要将list以二维列表的形式转化为DataFrame
            data = pd.DataFrame([list])
            # 3）将数据写入csv文件
            data.to_csv('./val_res_BoostGCN.csv', mode='a', header=False, index=False)  # mode设为a,就可以向csv文件追加数据了

            if self.PATH_weight_save and evaluations['recall'][0] > max_recall:
                max_recall = evaluations['recall'][0]
                torch.save(self.model, self.save_best_model)
                print('module weights saved....')
            
            if epoch == 0:
                torch.save(self.model, 'init_train_model.pth')
                
            print(f'EPOCH[{epoch+1}/{self.num_epoch}]')  

In [9]:
model = Net()
model.run()

Loading data  ...
Data has been loaded.


100%|██████████| 31/31 [00:43<00:00,  1.40s/it]


0-th Loss:0.1164 precise@5:0.3086 recall@5:0.1901 ndcg@5:0.3338 precise@10:0.2540 recall@10:0.3033 ndcg@10:0.3467 precise@15:0.2197 recall@15:0.3849 ndcg@15:0.3713 precise@20:0.1953 recall@20:0.4490 ndcg@20:0.3938
module weights saved....
EPOCH[1/1000]


100%|██████████| 31/31 [00:43<00:00,  1.40s/it]


1-th Loss:0.0834 precise@5:0.3165 recall@5:0.1957 ndcg@5:0.3427 precise@10:0.2602 recall@10:0.3113 ndcg@10:0.3557 precise@15:0.2249 recall@15:0.3944 ndcg@15:0.3810 precise@20:0.1995 recall@20:0.4586 ndcg@20:0.4034
module weights saved....
EPOCH[2/1000]


100%|██████████| 31/31 [00:45<00:00,  1.47s/it]


2-th Loss:0.0681 precise@5:0.3217 recall@5:0.1993 ndcg@5:0.3477 precise@10:0.2633 recall@10:0.3152 ndcg@10:0.3603 precise@15:0.2267 recall@15:0.3979 ndcg@15:0.3852 precise@20:0.2010 recall@20:0.4625 ndcg@20:0.4077
module weights saved....
EPOCH[3/1000]


100%|██████████| 31/31 [00:43<00:00,  1.40s/it]


3-th Loss:0.0556 precise@5:0.3240 recall@5:0.2008 ndcg@5:0.3512 precise@10:0.2655 recall@10:0.3179 ndcg@10:0.3640 precise@15:0.2280 recall@15:0.3999 ndcg@15:0.3883 precise@20:0.2022 recall@20:0.4652 ndcg@20:0.4111
module weights saved....
EPOCH[4/1000]


100%|██████████| 31/31 [00:43<00:00,  1.42s/it]


4-th Loss:0.0462 precise@5:0.3188 recall@5:0.1972 ndcg@5:0.3450 precise@10:0.2605 recall@10:0.3120 ndcg@10:0.3570 precise@15:0.2241 recall@15:0.3934 ndcg@15:0.3814 precise@20:0.1986 recall@20:0.4571 ndcg@20:0.4035
EPOCH[5/1000]


100%|██████████| 31/31 [00:43<00:00,  1.40s/it]


5-th Loss:0.0386 precise@5:0.3204 recall@5:0.1982 ndcg@5:0.3468 precise@10:0.2626 recall@10:0.3141 ndcg@10:0.3594 precise@15:0.2259 recall@15:0.3961 ndcg@15:0.3838 precise@20:0.2003 recall@20:0.4600 ndcg@20:0.4061
EPOCH[6/1000]


100%|██████████| 31/31 [00:44<00:00,  1.42s/it]


6-th Loss:0.0330 precise@5:0.3164 recall@5:0.1957 ndcg@5:0.3428 precise@10:0.2594 recall@10:0.3098 ndcg@10:0.3551 precise@15:0.2235 recall@15:0.3916 ndcg@15:0.3797 precise@20:0.1982 recall@20:0.4555 ndcg@20:0.4020
EPOCH[7/1000]


100%|██████████| 31/31 [00:42<00:00,  1.38s/it]


7-th Loss:0.0280 precise@5:0.3141 recall@5:0.1941 ndcg@5:0.3403 precise@10:0.2569 recall@10:0.3070 ndcg@10:0.3521 precise@15:0.2215 recall@15:0.3881 ndcg@15:0.3765 precise@20:0.1966 recall@20:0.4515 ndcg@20:0.3986
EPOCH[8/1000]


100%|██████████| 31/31 [00:43<00:00,  1.40s/it]


8-th Loss:0.0241 precise@5:0.3205 recall@5:0.1985 ndcg@5:0.3472 precise@10:0.2618 recall@10:0.3131 ndcg@10:0.3591 precise@15:0.2254 recall@15:0.3953 ndcg@15:0.3837 precise@20:0.1996 recall@20:0.4585 ndcg@20:0.4057
EPOCH[9/1000]


100%|██████████| 31/31 [00:44<00:00,  1.42s/it]


9-th Loss:0.0212 precise@5:0.3207 recall@5:0.1989 ndcg@5:0.3476 precise@10:0.2628 recall@10:0.3146 ndcg@10:0.3603 precise@15:0.2261 recall@15:0.3972 ndcg@15:0.3850 precise@20:0.2005 recall@20:0.4616 ndcg@20:0.4075
EPOCH[10/1000]


100%|██████████| 31/31 [00:43<00:00,  1.40s/it]


10-th Loss:0.0190 precise@5:0.3215 recall@5:0.1991 ndcg@5:0.3481 precise@10:0.2626 recall@10:0.3135 ndcg@10:0.3600 precise@15:0.2261 recall@15:0.3960 ndcg@15:0.3846 precise@20:0.2004 recall@20:0.4605 ndcg@20:0.4070
EPOCH[11/1000]


100%|██████████| 31/31 [00:43<00:00,  1.40s/it]


11-th Loss:0.0166 precise@5:0.3196 recall@5:0.1975 ndcg@5:0.3460 precise@10:0.2609 recall@10:0.3116 ndcg@10:0.3575 precise@15:0.2252 recall@15:0.3946 ndcg@15:0.3825 precise@20:0.1998 recall@20:0.4587 ndcg@20:0.4048
EPOCH[12/1000]


100%|██████████| 31/31 [00:42<00:00,  1.38s/it]


12-th Loss:0.0151 precise@5:0.3198 recall@5:0.1982 ndcg@5:0.3461 precise@10:0.2619 recall@10:0.3132 ndcg@10:0.3587 precise@15:0.2261 recall@15:0.3968 ndcg@15:0.3840 precise@20:0.2003 recall@20:0.4601 ndcg@20:0.4061
EPOCH[13/1000]


100%|██████████| 31/31 [00:43<00:00,  1.40s/it]


13-th Loss:0.0143 precise@5:0.3156 recall@5:0.1951 ndcg@5:0.3424 precise@10:0.2586 recall@10:0.3088 ndcg@10:0.3547 precise@15:0.2230 recall@15:0.3906 ndcg@15:0.3792 precise@20:0.1973 recall@20:0.4529 ndcg@20:0.4008
EPOCH[14/1000]


### 加载最佳模型，进行测试

In [10]:
import torchvision.models as models
import pandas as pd

data_path = '/Data/yelp2018/'
test_dataset = np.load(data_path + 'test_data.npy', allow_pickle=True)
num_neg = 1500

df = pd.DataFrame(columns=['precise@5', 'recall@5', 'ndcg@5', 'precise@10', 'recall@10', 'ndcg@10', 'precise@15', 
                           'recall@15', 'ndcg@15', 'precise@20', 'recall@20', 'ndcg@20'])
df.to_csv("./test_res_BoostGCN.csv", index=False)  # 路径可以根据需要更改

test_model = torch.load('best_model.pth')
# 设置模型为评估模式
test_model.eval()
evaluations = test_model.accuracy(test_dataset, num_neg, batch_size=1024)

print('precise@5:{0:.4f} recall@5:{1:.4f} ndcg@5:{2:.4f} precise@10:{3:.4f} recall@10:{4:.4f} ndcg@10:{5:.4f} precise@15:{6:.4f} recall@15:{7:.4f} ndcg@15:{8:.4f} precise@20:{9:.4f} recall@20:{10:.4f} ndcg@20:{11:.4f}'.format(
evaluations['precise'][0], evaluations['recall'][0], evaluations['ndcg'][0], 
evaluations['precise'][1], evaluations['recall'][1], evaluations['ndcg'][1], 
evaluations['precise'][2], evaluations['recall'][2], evaluations['ndcg'][2], 
evaluations['precise'][3], evaluations['recall'][3], evaluations['ndcg'][3]))

li = [evaluations['precise'][0], evaluations['recall'][0], evaluations['ndcg'][0], 
      evaluations['precise'][1], evaluations['recall'][1], evaluations['ndcg'][1], 
      evaluations['precise'][2], evaluations['recall'][2], evaluations['ndcg'][2], 
      evaluations['precise'][3], evaluations['recall'][3], evaluations['ndcg'][3]]

# 由于DataFrame是Pandas库中的一种数据结构，它类似excel，是一种二维表，所以需要将list以二维列表的形式转化为DataFrame
data = pd.DataFrame([li])
# 3）将数据写入csv文件
data.to_csv('./test_res_BoostGCN.csv', mode='a', header=False, index=False)  # mode设为a,就可以向csv文件追加数据了

100%|██████████| 31/31 [00:43<00:00,  1.40s/it]

precise@5:0.3249 recall@5:0.2015 ndcg@5:0.3523 precise@10:0.2651 recall@10:0.3181 ndcg@10:0.3643 precise@15:0.2281 recall@15:0.4011 ndcg@15:0.3893 precise@20:0.2020 recall@20:0.4657 ndcg@20:0.4119



