In [1]:
import os
import gc
import re
import sys
import json
import time
import random
import requests
import argparse
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import scipy.io as sio
from tqdm import tqdm
from collections import Counter
from collections import defaultdict
from logging import getLogger
from ast import Global

from trd import DQN, RLTraining
from utils import *
sys.path.append('/root/linghui/drs/TRD-main/daisyRec/')
import daisy
from daisy.utils.loader import RawDataReader, Preprocessor
from daisy.utils.splitter import TestSplitter, ValidationSplitter
from daisy.utils.config import init_seed, init_config, init_logger
from daisy.utils.metrics import MAP, NDCG, Recall, Precision, HR, MRR
from daisy.utils.sampler import BasicNegtiveSampler, SkipGramNegativeSampler, UniqueNegativeSampler
from daisy.utils.dataset import AEDataset, BasicDataset, CandidatesDataset, get_dataloader
from daisy.utils.utils import get_history_matrix, get_ur, build_candidates_set, ensure_dir, get_inter_matrix
from daisy.model.MFRecommender import MF
from daisy.model.FMRecommender import FM
from daisy.model.NFMRecommender import NFM
from daisy.model.NGCFRecommender import NGCF
from daisy.model.EASERecommender import EASE
from daisy.model.SLiMRecommender import SLiM
from daisy.model.VAECFRecommender import VAECF
from daisy.model.NeuMFRecommender import NeuMF
from daisy.model.PopRecommender import MostPop
from daisy.model.KNNCFRecommender import ItemKNNCF
from daisy.model.PureSVDRecommender import PureSVD
from daisy.model.Item2VecRecommender import Item2Vec
from daisy.model.LightGCNRecommender import LightGCN
from daisy.utils.metrics import calc_ranking_results

In [2]:
config = init_config()
init_seed(config['seed'], config['reproducibility'])
init_logger(config)
logger = getLogger()
logger.info(config)
config['logger'] = logger
model_config, metrics_config, tune_params_config, param_type_config = get_config()
config['model_config'] = model_config

# set path
save_path = config['save_path'] + config['version']
ensure_dir(save_path)

file_path = save_path + f'{config["dataset"]}/'
ensure_dir(file_path)

saved_data_path = file_path + 'data/'
ensure_dir(saved_data_path)

saved_result_path = file_path + f'{config["algo_name"]}/'
ensure_dir(saved_result_path)

saved_trd_path = saved_result_path + f"{config['trd_version']}/"
ensure_dir(saved_trd_path)

saved_model_path = saved_result_path + 'model/'
ensure_dir(saved_model_path)

saved_rec_path = saved_result_path + 'rec_list/'
ensure_dir(saved_rec_path)

saved_metric_path = saved_result_path + 'metric/'
ensure_dir(saved_metric_path)
logger.info('-' * 50)
logger.info(f"choose dataset: {config['dataset']}")
logger.info(f"choose algo: {config['algo_name']}")
logger.info(f"choose debias method: {config['debias_method']}")
logger.info(f"choose weight: {config['weight']}")

17 Jan 10:49 INFO - {'version': 'daisy_1/', 'trd_version': 'drs_trd', 'gpu': '0', 'seed': 2024, 'reproducibility': True, 'state': None, 'optimization_metric': 'ndcg', 'hyperopt_trail': 20, 'tune_testset': False, 'tune_pack': '{"lr": [0.001, 0.005, 0.01], "reg_1": [0.000001, 0.00001, 0.0001, 0.001, 0], "reg_2": [0.000001, 0.00001, 0.0001, 0.001, 0], "num_layers": [2, 3, 4]}', 'algo_name': 'mf', 'data_path': '/data/linghui/', 'save_path': '/data/linghui/drs/', 'res_path': None, 'dataset': 'kuairec', 'val_method': 'tsbr', 'test_method': 'tsbr', 'fold_num': 1, 'val_size': 0.1, 'test_size': 0.2, 'topk': 50, 'n_actions': 20, 'cand_num': 1000, 'sample_method': 'uniform', 'sample_ratio': 0, 'num_ng': 4, 'batch_size': 1024, 'loss_type': 'BPR', 'init_method': 'default', 'optimizer': 'default', 'early_stop': True, 'prepro': '10filter', 'level': 'u', 'positive_threshold': 1.0, 'UID_NAME': 'user', 'IID_NAME': 'item', 'INTER_NAME': 'label', 'TID_NAME': 'timestamp', 'binary_inter': True, 'metrics': [

In [3]:
ui_num = np.load(saved_data_path + 'ui_cate.npy')
config['user_num'] = ui_num[0]
config['item_num'] = ui_num[1]
# logger.info(f"user number: {config['user_num']}  item number: {config['item_num']}")
train_base_set = pd.read_csv(saved_data_path + 'train_base_set.csv', index_col=0)
train_rl_set = pd.read_csv(saved_data_path + 'train_rl_set.csv', index_col=0)
train_total_set = pd.read_csv(saved_data_path + 'train_total_set.csv', index_col=0)
train_total_samples = np.load(saved_data_path + 'train_total_samples.npy')

train_rl_ucands = np.load(saved_data_path + 'train_rl_ucands.npy',allow_pickle=True)
test_u_all = np.load(saved_data_path + 'test_u_all.npy', allow_pickle=True)
test_ucands_all = np.load(saved_data_path + 'test_ucands_all.npy',allow_pickle=True)
test_ur_all = np.load(saved_data_path + 'test_ur_all.npy', allow_pickle=True)
rl_user_his_pref = pd.read_csv(saved_data_path + 'rl_user_his_pref.csv', index_col = 0)
rl_user_his_pref = rl_user_his_pref.sort_values(by='user', ascending=True)
config['user_his'] = rl_user_his_pref
with open(saved_data_path + 'train_total_ur.json', 'r') as file:
    train_total_ur = json.load(file)
config['train_ur'] = train_total_ur

item_pop_train = pd.read_csv(saved_data_path + 'item_pop_train.csv',index_col=0)
item_pop_train_dict = dict(list(zip(item_pop_train.item, item_pop_train.train_counts)))
warm_item_list = np.load(saved_data_path + 'warm_item_list.npy')
cold_item_list = np.load(saved_data_path + 'cold_item_list.npy')
config['warm_item_list'] = warm_item_list
config['cold_item_list'] = cold_item_list
config['item_pop_train_dict'] = item_pop_train_dict

In [97]:
# rl_user_his_pref = pd.read_csv(saved_data_path + 'rl_user_his_pref.csv', index_col = 0)

In [4]:
rl_user_his_pref['user_history'] = rl_user_his_pref['user_history'].apply(ast.literal_eval)

In [5]:
rl_user_his_pref_copy = rl_user_his_pref.copy(deep=True)
rl_user_his_pref_copy

Unnamed: 0,user,user_history,hist_tgf,hist_nc
0,0,"[1304, 1283, 1099, 1321, 1378, 819, 1162, 1274...",0.070130,0.263235
1,1,"[1270, 1109, 93, 399, 1264, 1185, 1062, 1310, ...",0.080792,0.216518
2,2,"[1290, 1283, 1286, 1109, 551, 1340, 1314, 93, ...",0.128830,0.184664
3,3,"[1291, 961, 1270, 710, 1008, 215, 1314, 1289, ...",0.043137,0.240000
4,4,"[1283, 1301, 1285, 1287, 1316, 1076, 789, 1314...",0.057031,0.232877
...,...,...,...,...
1406,1406,"[1308, 1257, 1360, 1404, 1372, 1228, 1407, 143...",-0.041006,0.336066
1407,1407,"[438, 1308, 1270, 1266, 1109, 153, 1267, 1257,...",0.062157,0.249280
1408,1408,"[1284, 452, 1257, 1206, 1289, 654, 1109, 1308,...",0.140247,0.213904
1409,1409,"[1315, 1220, 1362, 1360, 1221, 1369, 716, 993,...",0.054754,0.248175


In [82]:
rl_user_his_pref = rl_user_his_pref_copy.copy(deep=True)
rl_user_his_pref

Unnamed: 0,user,user_history,hist_tgf,hist_nc
0,0,"[1304, 1283, 1099, 1321, 1378, 819, 1162, 1274...",0.070130,0.263235
1,1,"[1270, 1109, 93, 399, 1264, 1185, 1062, 1310, ...",0.080792,0.216518
2,2,"[1290, 1283, 1286, 1109, 551, 1340, 1314, 93, ...",0.128830,0.184664
3,3,"[1291, 961, 1270, 710, 1008, 215, 1314, 1289, ...",0.043137,0.240000
4,4,"[1283, 1301, 1285, 1287, 1316, 1076, 789, 1314...",0.057031,0.232877
...,...,...,...,...
1406,1406,"[1308, 1257, 1360, 1404, 1372, 1228, 1407, 143...",-0.041006,0.336066
1407,1407,"[438, 1308, 1270, 1266, 1109, 153, 1267, 1257,...",0.062157,0.249280
1408,1408,"[1284, 452, 1257, 1206, 1289, 654, 1109, 1308,...",0.140247,0.213904
1409,1409,"[1315, 1220, 1362, 1360, 1221, 1369, 716, 993,...",0.054754,0.248175


In [83]:
train_mode = 'total'
config['debias_method'] = 'backbone'
if config['algo_name'].lower() in ['lightgcn', 'ngcf']:
    config['inter_matrix'] = get_inter_matrix(train_total_set, config)
model = model_config[config['algo_name']](config)
model_pth = saved_model_path + f"{config['debias_method']}_{train_mode}_{config['epochs']}.pth"
model = torch.load(model_pth)
print(f"load model {config['algo_name']}")
if config['algo_name'] == 'mf':
    user_embed = model.embed_user.weight
    item_embed = model.embed_item.weight
elif config['algo_name'] == 'lightgcn':
    user_embed, item_embed = model.forward()
    user_embed = user_embed.weight
    item_embed = item_embed.weight

load model mf


In [117]:

# # rl_user_his_pref['user_history'] = rl_user_his_pref['user_history'].apply(lambda x: sorted(list(x)))
# rl_user_his_pref

In [84]:
history_records = rl_user_his_pref.set_index('user')['user_history'].to_dict()

In [85]:
# 初始训练：
user_pref_dict = rl_user_his_pref.set_index('user')['hist_tgf'].to_dict()
config['user_pref_dict'] = user_pref_dict



# history_state
train_base_ur_list = get_ur_l(train_base_set)
# ground_truth
train_rl_ur = get_ur(train_rl_set)
# train_user
train_rl_u = np.load(saved_data_path + 'train_rl_u.npy', allow_pickle=True)


In [10]:
# get training action space
test_dataset = CandidatesDataset(train_rl_ucands)
test_loader = get_dataloader(test_dataset, batch_size=128, shuffle=False, num_workers=0)
preds = model.rank(test_loader) # np.array (u, topk)
preds_new = remove_duplicates(preds) 
preds_action_space = np.array([x[:config['cand_num']] for x in preds_new])
pred_rl_dict =  get_model_pred(train_rl_u, preds_action_space)

In [11]:
alpha = 2
beta = 1
gama = 0.1
config['alpha'] = alpha
config['beta'] = beta
config['gama'] = gama
config['train_step'] = 120
history_state = train_base_ur_list
action_space = pred_rl_dict
ground_truth = train_rl_ur
train_users = train_rl_u

In [12]:
alpha_list = [2]
beta_list = [0.5]
gama_list = [0.1]
train_time = 1
for count_j in range(len(alpha_list)):
    alpha = alpha_list[count_j]
    beta = beta_list[count_j]
    gama = gama_list[count_j]
    print(f'train rl alpha={alpha}, beta={beta}, gama = {gama}')
    for count_i in range(train_time):
        n_actions = config['n_actions']
        dqn = DQN(user_embed, item_embed, config, n_actions)
        print("=======model initial completed========")

        start_time = time.time()
        RLTrainer = RLTraining(train_rl_u, history_state, ground_truth, action_space, config)
        dqn = RLTrainer.train(dqn)
        end_time = time.time()
        elapsed_time = end_time - start_time
        config['debias_method'] = f'fair_alpha{alpha}_beta{beta}_gama{gama}'
        saved_name = saved_trd_path + f"rl_{config['debias_method']}_step{config['train_step']}_num{count_i}.pt"
        logger.info(f":finished train agent: {elapsed_time:.6f} s")
        torch.save(dqn.state_dict(), saved_name)

train rl alpha=2, beta=0.5, gama = 0.1
=====training=====


  x0 = torch.tensor(x0, dtype=torch.long).cuda()  # 用户 ID，形状为 [batch_size, n_actions, 1]
  x1 = torch.tensor(x1, dtype=torch.long).cuda()  # 用户的历史交互项目，形状为 [batch_size, n_actions, n_states]
  x2 = torch.tensor(x2, dtype=torch.long).cuda()  # 候选动作 ID，形状为 [batch_size, n_actions, 1]
  return F.mse_loss(input, target, reduction=self.reduction)


66.31991482938113


 50%|█████     | 1/2 [01:15<01:15, 75.11s/it]

24.972497174050496


100%|██████████| 2/2 [02:12<00:00, 66.46s/it]
17 Jan 10:52 INFO - :finished train agent: 132.915580 s


In [60]:
# test

In [13]:
def get_next_state(states, actions, t_urs):
    """
    获取下一个状态
    states: 当前状态列表，形状为 [batch_size, n_states]
    actions: 动作列表，形状为 [batch_size]
    t_urs: 目标用户-项目交互历史列表，形状为 [batch_size, n_actions]
    """
    batch_size = len(states)
    next_states = []

    for i in range(batch_size):
        state = states[i]
        action = actions[i]
        t_ur = t_urs[i]

        if action in t_ur:
            state.pop(0)  # 移除最早的交互项目
            state.append(action)  # 添加当前动作
        next_states.append(state)

    return next_states

def evaluate_agent(model_dqn, ground_truth, history_state, action_space, top_k = 50, batch_size = 200):
    print("=====testing agent=====")
    preds = {}
    user_test = set()
    test_u = list(ground_truth.keys())
    num_users = len(ground_truth)
    for batch_start in tqdm(range(0, num_users, batch_size)):
        batch_end = min(batch_start + batch_size, num_users)
        batch_users = test_u[batch_start:batch_end]  # 当前批次的用户

        # 初始化每个用户的参数
        batch_ur = [history_state[user][-5:] for user in batch_users]  # 每个用户的历史状态
        batch_s = [pad_ur(ur, config['item_num']) for ur in batch_ur]  # 当前状态
        batch_recommend_items = [[] for _ in range(len(batch_users))]  # 推荐项目列表

        model_dqn.create_action_space([action_space[user] for user in batch_users])

        for t in range(top_k):
            # 批量选择动作
            batch_a = model_dqn.choose_action(batch_users, batch_s, 0)  # 返回每个用户的动作

            for i, a in enumerate(batch_a):
                batch_recommend_items[i].append(a)

            # 更新动作空间
            model_dqn.update_action_space(batch_a)

            # 获取下一个状态
            batch_s_ = get_next_state(batch_s, batch_a, [ground_truth[user] for user in batch_users])

            # 更新状态
            batch_s = batch_s_

        # 保存推荐结果
        for user, recommend_item in zip(batch_users, batch_recommend_items):
            preds[user] = recommend_item
    pred_topk = np.array(list(preds.values()))
    rec_results = create_recommendation_df(test_u, pred_topk)
    return pred_topk, rec_results

def pad_ur(ur, item_num):
    user_record = ur
    for _ in range(5 - len(ur)):
        user_record.insert(0, item_num)
    return user_record

In [None]:
config['topk_list'] =  [10, 20, 50]

train_step = 120
batch_size = 500  # 批量大小

if load:
    config['debias_method'] = f'fair_alpha{alpha}_beta{beta}_gama{gama}'
    model_dqn = DQN(user_embed, item_embed, config, n_actions, batch_size = batch_size)
    print("=======model initial completed========")
    model_name = saved_trd_path + f"rl_{config['debias_method']}_step{config['train_step']_num0}.pt"
    model_dqn.load_state_dict(torch.load(model_name, map_location=torch.device('cuda')))
    model_dqn.cuda()

# model_dqn = torch.load(saved_trd_path + f"rl_{config['debias_method']}_{train_mode}_step{train_step}_{count_i}.pth")
# model_dqn.sync_target_network()
model_dqn.eval()



In [86]:
data_last_stage = train_total_set
# result_df_all = pd.DataFrame()

In [87]:
stage = 0
test_u = test_u_all[stage]
test_ur = test_ur_all[stage]
test_ucands = test_ucands_all[stage]
test_set = pd.read_csv(saved_data_path + f'test_set_{stage}.csv')

In [88]:
start_time = time.time()
warm_item_list, cold_item_list = update_new_item(data_last_stage, config)
config['warm_item_list'] = warm_item_list
config['cold_item_list'] = cold_item_list
new_item_list = list(set(test_set[config['IID_NAME']].unique()) - set(warm_item_list))
config['topk_list'] = [10, 20, 50]
data_last_stage = pd.concat([data_last_stage, test_set])
end_time = time.time()
elapsed_time = end_time - start_time
logger.info(f":finished update warm/cold_list: {elapsed_time:.6f} s")

17 Jan 14:58 INFO - :finished update warm/cold_list: 0.079258 s


In [79]:
def generate_user_action_space(user_items_dict, user_prob_dict, new_item_list, content_flag, config):
    """
    对每个用户，根据概率从 user_items_dict 和 new_item_list 中挑选商品，生成新的 user_action_space。

    参数:
    - user_items_dict: 字典，格式为 {'user': [item1, item2, ...]}
    - user_prob_dict: 字典，格式为 {'user': value}，value 是概率值
    - new_item_list: 列表，包含新的项目
    - content_flag: 整数，如果为 1，则从 new_item_list 中按顺序挑选；如果为 0，则随机挑选
    - config: 配置字典，包含随机种子和 topk 参数

    返回:
    - user_action_space: 新的字典，格式为 {'user': [item1, item2, ...]}
    """
    seed = config.get('seed', None)
    len_action_space = config.get('topk', 10) + 50  # 默认 topk 为 10

    # 设置随机种子
    if seed is not None:
        np.random.seed(seed)

    # 将 new_item_list 转换为 numpy 数组以提高效率
    new_item_array = np.array(new_item_list)

    # 初始化新的字典
    user_action_space = {}

    # 遍历用户
    for user, items in user_items_dict.items():
        # 获取用户的概率值
        prob = user_prob_dict.get(user, 0)
        
        # 计算需要从 items 中挑选的商品数量 n_old
        n_new = int(len_action_space * prob)
        
        # 计算需要从 new_item_list 中挑选的商品数量 n_new
        n_old = len_action_space - n_new
        
        # 如果 n_old 或 n_new 为 0，跳过该用户
        # if n_old <= 0  n_new <= 0:
        #     continue
        
        # 从 items 中挑选前 n_old 个商品
        if n_old > 0:
            selected_old_items = items[:n_old]
        
        if n_new > 0:
            if content_flag == 1:
                # 按顺序挑选前 n_new 个商品
                selected_new_items = new_item_array[:n_new]
            else:
                # 随机挑选 n_new 个商品
                selected_new_items = np.random.choice(new_item_array, size=n_new, replace=False)
        
        # 将 selected_new_items 随机插入到 selected_old_items 中，同时保持 selected_new_items 的顺序
        updated_items = list(selected_old_items)
        insert_indices = np.random.choice(len(updated_items) + 1, size=n_new, replace=True)
        insert_indices.sort()  # 确保插入顺序正确
        for idx, item in zip(insert_indices, selected_new_items):
            updated_items.insert(idx, item)
        
        # 将结果添加到新的字典中
        user_action_space[user] = updated_items
    
    return user_action_space

In [89]:
test_ucands = test_ucands_all[stage]
test_dataset = CandidatesDataset(test_ucands)
test_loader = get_dataloader(test_dataset, batch_size=128, shuffle=False, num_workers=0)
preds = model.rank(test_loader) # np.array (u, topk)
preds_new = remove_duplicates(preds) 
preds_test_stage = np.array([x[:config['cand_num']] for x in preds_new])

In [90]:
user_nc = rl_user_his_pref.set_index('user')['hist_nc'].to_dict()
history_records = rl_user_his_pref.set_index('user')['user_history'].to_dict()

In [39]:
new_item_list

[2048,
 2049,
 2050,
 2051,
 2052,
 2053,
 2054,
 2055,
 2056,
 2057,
 2058,
 2059,
 2060,
 2061,
 2062,
 2063,
 2064,
 2065,
 2066,
 2067,
 2068,
 2069,
 2070,
 2071,
 2072,
 2073,
 2074,
 2075,
 2076,
 2077,
 2078,
 2079,
 2080,
 2081,
 2082,
 2083,
 2084,
 2085,
 2086,
 2087,
 2088,
 2089,
 2090,
 2091,
 2092,
 2093,
 2094,
 2095,
 2096,
 2097,
 2098,
 2099,
 2100,
 2101,
 2102,
 2103,
 2104,
 2105,
 2106,
 2107,
 2108,
 2109,
 2110,
 2111,
 2112,
 2113,
 2114,
 2115,
 2116,
 2117,
 2118,
 2119,
 2120,
 2121,
 2122,
 2123,
 2124,
 2125,
 2126,
 2127,
 2128,
 2129,
 2130,
 2131,
 2132,
 2133,
 2134,
 2135,
 2136,
 2137,
 2138,
 2139,
 2140,
 2141,
 2142,
 2143,
 2144,
 2145,
 2146,
 2147,
 2148,
 2149,
 2150,
 2151,
 2152,
 2153,
 2154,
 2155,
 2156,
 2157,
 2158,
 2159,
 2160,
 2161,
 2162,
 2163,
 2164,
 2165,
 2166,
 2167,
 2168,
 2169,
 2170,
 2171,
 2172,
 2173,
 2174,
 2175,
 2176,
 2177,
 2178,
 2179,
 2180,
 2181,
 2182,
 2183,
 2184,
 2185,
 2186,
 2187,
 2188,
 2189,
 2190,

In [91]:
pred_test_dict =  get_model_pred(test_u, preds_test_stage)
if config['algo_name'] in ['mf', 'lightgcn']:
    content_flag = 0
elif config['algo_name'] in ['ALDI']:
    content_flag = 1
pred_test_dict =  get_model_pred(test_u, preds_test_stage)
action_space_stage = generate_user_action_space(pred_test_dict, user_nc, new_item_list, content_flag, config)

In [21]:
# check

def calculate_total_new_item_ratio(user_items_dict, new_item_list, k):
    """
    计算所有用户的前 k 个值中，整体在 new_item_list 中的比例。

    参数:
    - user_items_dict: 字典，格式为 {'user': [item1, item2, ...]}
    - new_item_list: 列表，包含新的项目
    - k: 整数，表示需要检查的前 k 个值

    返回:
    - total_ratio: 所有用户的前 k 个值中，整体在 new_item_list 中的比例
    """
    # 将 new_item_list 转换为集合，提高查找效率
    new_item_set = set(new_item_list)

    # 初始化总计数器和总物品数
    total_count = 0
    total_items = 0

    # 遍历每个用户
    for user, items in user_items_dict.items():
        # 取前 k 个值
        top_k_items = items[:k]

        # 计算在 new_item_list 中的数量
        count = sum(1 for item in top_k_items if item in new_item_set)

        # 更新总计数器和总物品数
        total_count += count
        total_items += len(top_k_items)

    # 计算总比例
    print(total_items)
    print(total_count)
    # total_ratio = total_count / ((k-19) * len(user_items_dict))
    total_ratio = total_count / total_items

    return total_ratio

In [44]:
user_nc

{0: 0.263235294117647,
 1: 0.2165178571428571,
 2: 0.1846635367762128,
 3: 0.24,
 4: 0.2328767123287671,
 5: 0.2166666666666666,
 6: 0.2496099843993759,
 7: 0.2619047619047619,
 8: 0.2,
 9: 0.1922330097087378,
 10: 0.2156424581005586,
 11: 0.2348178137651821,
 12: 0.2118055555555555,
 13: 0.129737609329446,
 14: 0.2591792656587473,
 15: 0.1736111111111111,
 16: 0.3132530120481928,
 17: 0.1992337164750957,
 18: 0.3066037735849056,
 19: 0.3258655804480652,
 20: 0.2417218543046357,
 21: 0.2561797752808988,
 22: 0.2355371900826446,
 23: 0.2671480144404332,
 24: 0.174863387978142,
 25: 0.2251908396946564,
 26: 0.2787456445993031,
 27: 0.1749174917491749,
 28: 0.209375,
 29: 0.2453781512605042,
 30: 0.2048192771084337,
 31: 0.2170542635658914,
 32: 0.2063106796116504,
 33: 0.1759036144578313,
 34: 0.2243270189431704,
 35: 0.224,
 36: 0.2542113323124043,
 37: 0.2299546142208774,
 38: 0.2321981424148606,
 39: 0.2727272727272727,
 40: 0.2116104868913857,
 41: 0.2226148409893993,
 42: 0.22460937

In [22]:
k = 10
calculate_total_new_item_ratio(action_space_stage, new_item_list, k+19)

40861
12182


0.2981326937666724

In [92]:
# ground_truth, history_state, action_space

# ground_truth
stage = 0
ground_truth = test_ur
history_state = history_records

KeyError: 0

In [93]:
model.eval()
preds_topk, rec_results = evaluate_agent(dqn, test_ur, history_records, action_space_stage, config['topk'])
result_df = get_evaluation_metric(test_u, test_ur, preds_topk, rec_results, config)

=====testing agent=====


  x0 = torch.tensor(x0, dtype=torch.long).cuda()  # 用户 ID，形状为 [batch_size, n_actions, 1]
  x1 = torch.tensor(x1, dtype=torch.long).cuda()  # 用户的历史交互项目，形状为 [batch_size, n_actions, n_states]
  x2 = torch.tensor(x2, dtype=torch.long).cuda()  # 候选动作 ID，形状为 [batch_size, n_actions, 1]
100%|██████████| 8/8 [00:03<00:00,  2.04it/s]
17 Jan 14:59 INFO - Recall@10: 0.0202
17 Jan 14:59 INFO - MRR@10: 0.4174
17 Jan 14:59 INFO - NDCG@10: 0.4980
17 Jan 14:59 INFO - Hit Ratio@10: 0.8062
17 Jan 14:59 INFO - Precision@10: 0.1894


In [None]:
preds_topk

In [25]:
result_df

Unnamed: 0,NDCG@10,NDCG@20,NDCG@50,Hit Ratio@10,Hit Ratio@20,Hit Ratio@50,TGF@10,TGF@20,TGF@50,NC@10,NC@20,NC@50
0,0.521431,0.540063,0.546675,0.81973,0.933286,0.991483,0.088667,0.100176,0.179347,0.492406,0.500213,0.403918


In [67]:
np.shape(preds_topk)

(1409, 50)

In [26]:
rl_user_his_pref

Unnamed: 0,user,user_history,hist_tgf,hist_nc
0,0,"[1304, 1283, 1099, 1321, 1378, 819, 1162, 1274...",0.070130,0.263235
1,1,"[1270, 1109, 93, 399, 1264, 1185, 1062, 1310, ...",0.080792,0.216518
2,2,"[1290, 1283, 1286, 1109, 551, 1340, 1314, 93, ...",0.128830,0.184664
3,3,"[1291, 961, 1270, 710, 1008, 215, 1314, 1289, ...",0.043137,0.240000
4,4,"[1283, 1301, 1285, 1287, 1316, 1076, 789, 1314...",0.057031,0.232877
...,...,...,...,...
1406,1406,"[1308, 1257, 1360, 1404, 1372, 1228, 1407, 143...",-0.041006,0.336066
1407,1407,"[438, 1308, 1270, 1266, 1109, 153, 1267, 1257,...",0.062157,0.249280
1408,1408,"[1284, 452, 1257, 1206, 1289, 654, 1109, 1308,...",0.140247,0.213904
1409,1409,"[1315, 1220, 1362, 1360, 1221, 1369, 716, 993,...",0.054754,0.248175


In [68]:
def update_hist_df(rec_results, rl_user_his_pref, test_ur, config):
    """
    更新用户历史偏好 DataFrame。

    参数:
    - rec_results: 推荐结果数据
    - rl_user_his_pref: 用户历史偏好 DataFrame
    - test_ur: 测试集的用户-物品交互数据
    - config: 配置字典

    返回:
    - rl_user_his_pref: 更新后的用户历史偏好 DataFrame
    """
    # 复制推荐结果并添加用户交互数据
    temp_rec_result = rec_results.copy()
    temp_rec_result = add_user_inter(temp_rec_result, test_ur)
    
    # 生成用户推荐记录
    user_rec_records = generate_user_records(temp_rec_result)
    
    # 更新用户历史偏好
    rl_user_his_pref = update_user_history(rl_user_his_pref, user_rec_records)
    
    # 计算用户的 TGF + NC
    
    return rl_user_his_pref

def update_user_history(df1, df2):
    """
    更新用户历史记录。

    参数:
    - df1: 包含用户历史记录的 DataFrame
    - df2: 包含用户新增交互记录的 DataFrame

    返回:
    - df1: 更新后的 DataFrame
    """
    # 确保 user_history 是列表类型
    df1['user_history'] = df1['user_history'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)
    
    # 遍历 df2 更新 user_history
    for user, positive_inter in df2[['user', 'positive_inter']].itertuples(index=False):
        if positive_inter:  # 如果 positive_inter 不为空
            # 找到 df1 中对应的 user，并更新 user_history
            df1.loc[df1['user'] == user, 'user_history'] = df1.loc[df1['user'] == user, 'user_history'].apply(
                lambda x: x + [item for item in positive_inter if item not in x]
            )
    
    return df1

In [113]:
def get_stage_user_tgf(df, col, config, nc_weight = 0.9):
    
    # 将 warm_item_list 和 cold_item_list 转换为 NumPy 数组
    warm_item_list = np.array(config['warm_item_list'])
    cold_item_list = np.array(config['cold_item_list'])

    # 预先计算权重
    warm_weight, cold_weight = get_weight(warm_item_list, cold_item_list)

    # 应用函数生成 exp_list
    df['exp_list'] = df[col].apply(lambda x: set_values_by_id(x, config['item_num']))

    # 计算用户的 TGF 值
    df['hist_tgf'] = df['exp_list'].apply(
        lambda x: get_user_hist_tgf(x, warm_item_list, cold_item_list, warm_weight, cold_weight)
    )

    # 将 cold_item_list 转换为集合
    cold_item_set = set(cold_item_list)
    

    # 计算用户的 NC（Novelty Coverage）值
     
    
    new_stage_nc = df[col].apply(lambda x: calculate_nc(x, cold_item_set))

    # 更新 hist_nc 列
    df['hist_nc'] = nc_weight * df['hist_nc'] + (1 - nc_weight) * new_stage_nc

    return df


def calculate_nc(input_set, target_set):
    """
    统计给定集合中有多少比例的值出现在目标集合中

    参数:
    input_set: 输入的集合
    target_set: 目标集合

    返回:
    proportion: 出现在目标集合中的比例
    """
    if not input_set:
        return 0.0
    count = len(set(input_set) & target_set)  # 计算交集的大小
    return count / len(input_set)

def get_user_hist_tgf(exp_list, warm_item_list, cold_item_list, warm_weight, cold_weight):
    """
    计算用户的分类权重差异（tgf）
    """
    if np.sum(exp_list) == 0:
        return 0

    # 归一化经验值
    exp_list = exp_list / np.sum(exp_list)

    # 提取暖项目和冷项目的经验值
    warm_exp_list = exp_list[warm_item_list]
    cold_exp_list = exp_list[cold_item_list]

    # 计算暖项目和冷项目的部分
    warm_part = np.sum(warm_exp_list * warm_weight) / len(warm_item_list)
    cold_part = np.sum(cold_exp_list * cold_weight) / len(cold_item_list)

    # 计算分类的权重差异
    user_tgf = warm_part - cold_part
    return user_tgf

In [36]:
rl_user_his_pref['user_last_hist'] = rl_user_his_pref['user_history'].apply(lambda x: x[-50:] if len(x) >= 50 else x)
rl_user_his_pref

Unnamed: 0,user,user_history,hist_tgf,hist_nc,user_last_hist
0,0,"[1304, 1283, 1099, 1321, 1378, 819, 1162, 1274...",0.070130,0.263235,"[789, 1561, 1712, 1700, 1695, 1376, 1757, 1616..."
1,1,"[1270, 1109, 93, 399, 1264, 1185, 1062, 1310, ...",0.080792,0.216518,"[1735, 1556, 1569, 1738, 1549, 1447, 1489, 145..."
2,2,"[1290, 1283, 1286, 1109, 551, 1340, 1314, 93, ...",0.128830,0.184664,"[1461, 721, 1395, 1641, 1826, 1745, 152, 1484,..."
3,3,"[1291, 961, 1270, 710, 1008, 215, 1314, 1289, ...",0.043137,0.240000,"[1603, 1590, 1563, 1636, 1621, 1461, 1313, 134..."
4,4,"[1283, 1301, 1285, 1287, 1316, 1076, 789, 1314...",0.057031,0.232877,"[1697, 908, 46, 1734, 1704, 1748, 1676, 1628, ..."
...,...,...,...,...,...
1406,1406,"[1308, 1257, 1360, 1404, 1372, 1228, 1407, 143...",-0.041006,0.336066,"[1698, 1709, 1774, 1400, 1738, 1798, 1770, 172..."
1407,1407,"[438, 1308, 1270, 1266, 1109, 153, 1267, 1257,...",0.062157,0.249280,"[1716, 1590, 1537, 1341, 1831, 1747, 1525, 178..."
1408,1408,"[1284, 452, 1257, 1206, 1289, 654, 1109, 1308,...",0.140247,0.213904,"[1765, 1411, 1756, 1768, 1312, 1675, 1052, 174..."
1409,1409,"[1315, 1220, 1362, 1360, 1221, 1369, 716, 993,...",0.054754,0.248175,"[1801, 1783, 1619, 1839, 1852, 1700, 1855, 170..."


In [117]:
rl_user_his_pref = rl_user_his_pref_copy.copy(deep=True)
rl_user_his_pref

Unnamed: 0,user,user_history,hist_tgf,hist_nc
0,0,"[1304, 1283, 1099, 1321, 1378, 819, 1162, 1274...",0.070130,0.263235
1,1,"[1270, 1109, 93, 399, 1264, 1185, 1062, 1310, ...",0.080792,0.216518
2,2,"[1290, 1283, 1286, 1109, 551, 1340, 1314, 93, ...",0.128830,0.184664
3,3,"[1291, 961, 1270, 710, 1008, 215, 1314, 1289, ...",0.043137,0.240000
4,4,"[1283, 1301, 1285, 1287, 1316, 1076, 789, 1314...",0.057031,0.232877
...,...,...,...,...
1406,1406,"[1308, 1257, 1360, 1404, 1372, 1228, 1407, 143...",-0.041006,0.336066
1407,1407,"[438, 1308, 1270, 1266, 1109, 153, 1267, 1257,...",0.062157,0.249280
1408,1408,"[1284, 452, 1257, 1206, 1289, 654, 1109, 1308,...",0.140247,0.213904
1409,1409,"[1315, 1220, 1362, 1360, 1221, 1369, 716, 993,...",0.054754,0.248175


In [82]:
rl_user_his_pref.iloc[0,4]

[1715,
 1717,
 1720,
 1725,
 1728,
 1733,
 1734,
 1735,
 1754,
 1756,
 1757,
 1758,
 1761,
 1764,
 1768,
 1772,
 1774,
 1776,
 1788,
 1790,
 1791,
 1793,
 1794,
 1798,
 1809,
 1811,
 1815,
 1820,
 1828,
 1831,
 1833,
 1834,
 1836,
 1839,
 1842,
 1845,
 1853,
 1857,
 1859,
 1863,
 1897,
 1902,
 1907,
 1908,
 1912,
 1916,
 1924,
 1959,
 1965,
 1977]

In [118]:
start_time = time.time()
rl_user_his_pref = update_hist_df(rec_results, rl_user_his_pref, test_ur, config)
rl_user_his_pref['user_history'] = rl_user_his_pref['user_history'].apply(lambda x: x[-50:] if len(x) >= 50 else x)
rl_user_his_pref = get_stage_user_tgf(rl_user_his_pref, 'user_history', config)
config['user_his'] = rl_user_his_pref
end_time = time.time()
elapsed_time = end_time - start_time
logger.info(f":finished update user_hist: {elapsed_time:.6f} s")

17 Jan 15:33 INFO - :finished update user_hist: 4.750700 s


In [119]:
rl_user_his_pref

Unnamed: 0,user,user_history,hist_tgf,hist_nc,exp_list
0,0,"[1561, 1712, 1700, 1695, 1376, 1757, 1616, 183...",0.229764,0.236912,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,1,"[1569, 1738, 1549, 1447, 1489, 1454, 1602, 176...",0.200351,0.194866,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ..."
2,2,"[1826, 1745, 152, 1484, 184, 535, 633, 619, 18...",0.336479,0.168197,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,3,"[1590, 1563, 1636, 1621, 1461, 1313, 1341, 176...",0.154998,0.218000,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,4,"[1697, 908, 46, 1734, 1704, 1748, 1676, 1628, ...",0.186566,0.209589,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...
1406,1406,"[1774, 1400, 1738, 1798, 1770, 1720, 1551, 156...",0.187933,0.304459,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1407,1407,"[1789, 1791, 746, 1573, 1754, 1557, 1924, 1912...",0.125705,0.228352,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1408,1408,"[1411, 1756, 1768, 1312, 1675, 1052, 1749, 984...",0.208945,0.194513,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1409,1409,"[1801, 1783, 1619, 1839, 1852, 1700, 1855, 170...",0.221995,0.223358,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [41]:
logger.info(f'begin get fine-tune data test stage {stage}')
start_time = time.time()
temp_rec_result = add_user_inter(rec_results, test_ur)
temp_rec_result.to_csv(saved_rec_path + f'rec_test_stage{stage}.csv') 
user_rec_records = generate_user_records(temp_rec_result)
fine_tune_data = negative_sampling(user_rec_records)
if config['save'] > 0:
    np.save(saved_data_path + f'fine_tune_data_test_{stage}', fine_tune_data)
end_time = time.time()
elapsed_time = end_time - start_time
logger.info(f":finished get fine tune data: {elapsed_time:.6f} s")

17 Jan 11:03 INFO - begin get fine-tune data test stage 0
17 Jan 11:03 INFO - :finished get fine tune data: 2.359390 s


In [None]:
# fine-tune model

In [None]:
history_records = rl_user_his_pref.set_index('user')['user_history'].to_dict()

In [58]:
pred_test_dict =  get_model_pred(test_u, preds_topk)
user_nc = rl_user_his_pref.set_index('user')['hist_nc'].to_dict()
action_space_stage = generate_user_action_space(pred_test_dict, user_nc, new_item_list, content_flag, config)

In [65]:
config['train_step'] = config['topk'] - 20
history_state = rl_user_his_pref.set_index('user')['user_history'].to_dict()
action_space = action_space_stage
ground_truth = test_ur
train_users = test_u

In [59]:
action_space_stage[721]

[1516,
 2045,
 1401,
 2217,
 2159,
 128,
 2232,
 276,
 2174,
 987,
 251,
 1012,
 702,
 13,
 1461,
 419,
 71,
 2339,
 560,
 153,
 1571,
 927,
 1481,
 916,
 1620,
 2282,
 33,
 1773,
 133,
 2083,
 516,
 551,
 855,
 626,
 37,
 2035,
 1081,
 723,
 2242,
 1029,
 1337,
 2333,
 2043,
 25,
 360,
 1718,
 2021,
 263,
 2123,
 140]

In [50]:
if 721 in test_u:
    print('yes')

yes


In [53]:
pred_test_dict[721]

array([1516, 2045, 1401, 2217, 2159,  128, 2232,  276, 2174,  987,  251,
       1012,  702,   13, 1461,  419,   71, 2339,  560,  153, 1571,  927,
       1481,  916, 1620, 2282,   33, 1773,  133, 2083,  516,  551,  855,
        626,   37, 2035, 1081,  723, 2242, 1029, 1337, 2333, 2043,   25,
        360, 1718, 2021,  263, 2123,  140])

In [66]:
n_actions = config['n_actions']
# dqn = DQN(user_embed, item_embed, config, n_actions)
# print("=======model initial completed========")

start_time = time.time()
RLTrainer = RLTraining(train_users, history_state, ground_truth, action_space, config)
dqn = RLTrainer.train(dqn)
end_time = time.time()
elapsed_time = end_time - start_time
config['debias_method'] = f'fair_alpha{alpha}_beta{beta}_gama{gama}'
saved_name = saved_trd_path + f"rl_{config['debias_method']}_step{config['train_step']}_stage{stage}.pt"
logger.info(f":finished train agent: {elapsed_time:.6f} s")
# torch.save(dqn.state_dict(), saved_name)

=====training=====


  0%|          | 0/2 [00:00<?, ?it/s]

4.129797786843539


 50%|█████     | 1/2 [00:30<00:30, 30.28s/it]

1.529046937836069


100%|██████████| 2/2 [00:49<00:00, 24.95s/it]
17 Jan 11:27 INFO - :finished train agent: 49.907655 s


NameError: name 'test_u_cands' is not defined