In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
#load datasets
from lib.datautils import LoadData, DataProcessor
users, movies, ratings = LoadData()
data_processor = DataProcessor(users, movies, ratings)
recently_watched_movies = data_processor.get_recent_movies()
print(movies.shape)
print(users.shape)

(1682, 19)
(943, 4)


In [9]:
from lib.DoubleDQNAgent import ReplayMemory, TransformerNetV2, DoubleDQN
from lib.recenv import RecommendationEnvironment
from torch.utils.tensorboard import SummaryWriter

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#超参数定义
user_size = 943+1   
movie_size = 1682+1 
embed_size = 32  # 嵌入尺寸
num_heads = 4  # Transformer头数
num_layers = 1 # Transformer层数
state_size = 6  # 状态尺寸(1个用户ID + 5个电影ID)
action_size = movie_size  # 动作空间尺寸(可推荐电影数量)
gamma = 0.99  # 折扣因子
epsilon = 0.05  # ε-greedy策略的ε值
learning_rate = 1e-4  # 学习率
capacity = 1000  # 回放缓冲区的大小
batch_size = 512 # 批量大小
num_episodes = 500  # 训练回合数

# 初始化环境
env = RecommendationEnvironment(device, users, movies, recently_watched_movies)
# 初始化子网络
policy_net = TransformerNetV2(user_size, movie_size, embed_size, num_heads, num_layers, action_size).to(device)
target_net = TransformerNetV2(user_size, movie_size, embed_size, num_heads, num_layers, action_size).to(device)
optimizer = optim.Adam(policy_net.parameters(), lr=learning_rate)
memory = ReplayMemory(capacity)
agent = DoubleDQN(policy_net, target_net, memory, gamma, epsilon, optimizer, action_size, device)
# 初始化tensorboard
writer = SummaryWriter(log_dir='./log/DDQN')

for i in range(800):
    total_reward = 0
    Rating = 0
    Repeat_rate = 0
    for t in range(942):
        state = env.prepared_states[:,t].to(device)
        for j in range(5):
            action = agent.select_action(state).to(device)  # 选择动作
            next_state, reward, reward_info = env.step(action, state)  # 执行动作并获取下一个状态和奖励,以及reward_info(rating,repeatRate)
            total_reward += reward
            Rating += reward_info["rating"]
            Repeat_rate += reward_info["repeatRate"]
            next_state = torch.tensor(next_state, dtype=torch.int64).to(device)
            reward = torch.tensor([reward], dtype=torch.float32).to(device)
            memory.push(state, action, next_state, reward)
            state = next_state
            agent.optimize_model(batch_size, capacity)
#     942*4=4710
    avg_rating = Rating/4710
    avg_Repeat_rate = Repeat_rate/4710
    writer.add_scalar('Rating/Train', avg_rating, i+1)
    writer.add_scalar('Repeat_rate/Train', avg_Repeat_rate, i+1)
    if (i+1) % 100 == 0:
    # 保存模型
        policy_PATH = './checkpoint/DDQN_policy_net_%d.pth' % (i+1)
        target_PATH = './checkpoint/DDQN_target_net_%d.pth' % (i+1)
        torch.save(policy_net.state_dict(), policy_PATH)
        torch.save(target_net.state_dict(), target_PATH)
    print(f"Episode {i+1}, Total reward: {total_reward}")
    writer.add_scalar('Reward/eposide', total_reward, i+1)
#     target_net.load_state_dict(policy_net.state_dict())
    
print("Training completed")

  next_state = torch.tensor(next_state, dtype=torch.int64).to(device)


Episode 1, Total reward: 7221.473042596746
Episode 2, Total reward: 7711.334664124372
Episode 3, Total reward: 8194.199964436228
Episode 4, Total reward: 8307.200190612464
Episode 5, Total reward: 8624.997044944163
Episode 6, Total reward: 9361.946031475998
Episode 7, Total reward: 8600.841391346199
Episode 8, Total reward: 9433.020125058867
Episode 9, Total reward: 9494.603072124679
Episode 10, Total reward: 9777.97728496772
Episode 11, Total reward: 9583.385768970958
Episode 12, Total reward: 10190.10992896349
Episode 13, Total reward: 10648.299552425111
Episode 14, Total reward: 10700.425434079465
Episode 15, Total reward: 11003.756964566692
Episode 16, Total reward: 11562.75241792817
Episode 17, Total reward: 12065.923146202707
Episode 18, Total reward: 12274.990519225736
Episode 19, Total reward: 13194.602593941392
Episode 20, Total reward: 13305.778005038952
Episode 21, Total reward: 14468.541146190322
Episode 22, Total reward: 14771.987682064497
Episode 23, Total reward: 15924.8

Episode 184, Total reward: 25626.041475376045
Episode 185, Total reward: 25863.38994517885
Episode 186, Total reward: 26092.280724845823
Episode 187, Total reward: 25647.849348785378
Episode 188, Total reward: 26187.74131255439
Episode 189, Total reward: 26235.96405983712
Episode 190, Total reward: 26002.314215618946
Episode 191, Total reward: 25771.255532443913
Episode 192, Total reward: 26155.587851779328
Episode 193, Total reward: 25834.449415262727
Episode 194, Total reward: 25982.052504217118
Episode 195, Total reward: 26154.436148840363
Episode 196, Total reward: 26050.09554679893
Episode 197, Total reward: 25837.846298191405
Episode 198, Total reward: 26057.049516854124
Episode 199, Total reward: 26259.010558295187
Episode 200, Total reward: 26452.183743086967
Episode 201, Total reward: 26719.51201607848
Episode 202, Total reward: 26271.503920921554
Episode 203, Total reward: 26298.008216484744
Episode 204, Total reward: 26026.135033495495
Episode 205, Total reward: 26025.367001

Episode 364, Total reward: 28008.14547874165
Episode 365, Total reward: 28483.79288224076
Episode 366, Total reward: 28588.72996464135
Episode 367, Total reward: 28483.665031325123
Episode 368, Total reward: 28372.508437411434
Episode 369, Total reward: 28574.236688208806
Episode 370, Total reward: 28647.26384823677
Episode 371, Total reward: 28852.648352980574
Episode 372, Total reward: 28409.647490352556
Episode 373, Total reward: 28607.079404823475
Episode 374, Total reward: 28368.808530333485
Episode 375, Total reward: 28688.19863101693
Episode 376, Total reward: 28444.739268532318
Episode 377, Total reward: 28753.09969185453
Episode 378, Total reward: 28033.373037349982
Episode 379, Total reward: 28549.19600569678
Episode 380, Total reward: 28800.684844367133
Episode 381, Total reward: 28738.8651091156
Episode 382, Total reward: 29065.114064944722
Episode 383, Total reward: 28233.210671802855
Episode 384, Total reward: 28683.235173012545
Episode 385, Total reward: 28737.6229597932

Episode 544, Total reward: 28830.24319557053
Episode 545, Total reward: 28976.640914282587
Episode 546, Total reward: 28606.010481578338
Episode 547, Total reward: 29383.746214250685
Episode 548, Total reward: 28903.144916947345
Episode 549, Total reward: 29157.619640878496
Episode 550, Total reward: 28962.49367519557
Episode 551, Total reward: 29218.741969737
Episode 552, Total reward: 29199.993458340534
Episode 553, Total reward: 28839.248776600227
Episode 554, Total reward: 29196.463864599515
Episode 555, Total reward: 29152.826700365757
Episode 556, Total reward: 29118.69118835344
Episode 557, Total reward: 28738.045192070425
Episode 558, Total reward: 28900.391554937167
Episode 559, Total reward: 28991.694320524988
Episode 560, Total reward: 29084.33198827065
Episode 561, Total reward: 29123.405491412155
Episode 562, Total reward: 29535.16085096237
Episode 563, Total reward: 28980.955720417547
Episode 564, Total reward: 29335.997977903426
Episode 565, Total reward: 29406.856110288

Episode 724, Total reward: 30224.657464476993
Episode 725, Total reward: 29880.93635015199
Episode 726, Total reward: 30073.289248241737
Episode 727, Total reward: 30016.29035762189
Episode 728, Total reward: 30044.330525259757
Episode 729, Total reward: 30010.00284298186
Episode 730, Total reward: 30213.384379752202
Episode 731, Total reward: 29702.729144948393
Episode 732, Total reward: 29869.882116855362
Episode 733, Total reward: 30132.816580655257
Episode 734, Total reward: 30182.036335141234
Episode 735, Total reward: 30352.595922534656
Episode 736, Total reward: 30420.616370238662
Episode 737, Total reward: 30492.300693325687
Episode 738, Total reward: 30289.66421133615
Episode 739, Total reward: 30317.76107714479
Episode 740, Total reward: 30561.517212588937
Episode 741, Total reward: 30355.072811237696
Episode 742, Total reward: 30614.19154077057
Episode 743, Total reward: 30794.49218529852
Episode 744, Total reward: 30850.816965260597
Episode 745, Total reward: 30727.91016532