In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix

In [40]:
def rwr_recommendation(user_preference_matrix, restart_prob, num_steps, num_recommendations):
    num_users, num_items = user_preference_matrix.shape

    # 构建转移矩阵
    user_preference_matrix = user_preference_matrix.toarray()
    full_matrix = np.zeros((num_users + num_items, num_users + num_items), dtype = 'float32')
    full_matrix[:num_users, num_users:] = user_preference_matrix
    full_matrix[num_users:, :num_users] = user_preference_matrix.T
    transition_matrix = (1 - restart_prob) * full_matrix + restart_prob * np.eye(num_users + num_items)
    
    print("Transition Matrix:\n", transition_matrix)
    
    # 初始化游走概率分布矩阵
    walk_prob = np.zeros((num_users + num_items, num_users))
    for i in range(num_users):
        walk_prob[i, i] = 1

    print("\nInitial Walk Probabilities:\n", walk_prob)
    
    # 执行随机游走
    for step in range(num_steps):
        walk_prob = transition_matrix.dot(walk_prob)
        print(f"\nWalk Probabilities after step {step + 1}:\n", walk_prob)
    
    # 获取推荐列表
    recommendations = []
    for user_index in range(num_users):
        item_probabilities = walk_prob[num_users:, user_index].flatten()
        item_indices = np.argsort(item_probabilities)[::-1]
        recommendations.append(item_indices[:num_recommendations])
    
    # 输出前几个用户的推荐结果
    for user_id, recs in enumerate(recommendations):
        print(f"\nUser {user_id + 1} recommendations: {recs}")
    
    return tuple(recommendations)

In [44]:
url = './ml-latest-small/ratings.csv'

ml_data = pd.read_csv(url)

ml_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [45]:
ml_data.shape

(100836, 4)

In [46]:
ml_data = ml_data.drop(['timestamp'], axis = 1)
ml_data.shape[0]

100836

In [47]:
user_ids = ml_data['userId'].unique()
movie_ids = ml_data['movieId'].unique()
user_map = {id: idx for idx, id in enumerate(user_ids)}
movie_map = {id: idx for idx, id in enumerate(movie_ids)}

In [54]:
# 创建用户-电影评分矩阵
num_users = len(user_ids)
num_movies = len(movie_ids)
ratings_matrix = np.zeros((num_users, num_movies))

In [55]:
for row in ml_data.itertuples():
    user_idx = user_map[row.userId]
    movie_idx = movie_map[row.movieId]
    ratings_matrix[user_idx, movie_idx] = row.rating

In [56]:
ratings_matrix = csr_matrix(ratings_matrix)

print(ratings_matrix)

  (0, 0)	4.0
  (0, 1)	4.0
  (0, 2)	4.0
  (0, 3)	5.0
  (0, 4)	5.0
  (0, 5)	3.0
  (0, 6)	5.0
  (0, 7)	4.0
  (0, 8)	5.0
  (0, 9)	5.0
  (0, 10)	5.0
  (0, 11)	5.0
  (0, 12)	3.0
  (0, 13)	5.0
  (0, 14)	4.0
  (0, 15)	5.0
  (0, 16)	3.0
  (0, 17)	3.0
  (0, 18)	5.0
  (0, 19)	4.0
  (0, 20)	4.0
  (0, 21)	5.0
  (0, 22)	4.0
  (0, 23)	3.0
  (0, 24)	4.0
  :	:
  (609, 9699)	3.5
  (609, 9700)	3.5
  (609, 9701)	4.0
  (609, 9702)	1.5
  (609, 9703)	5.0
  (609, 9704)	3.0
  (609, 9705)	3.0
  (609, 9706)	4.0
  (609, 9707)	4.0
  (609, 9708)	3.5
  (609, 9709)	3.0
  (609, 9710)	4.0
  (609, 9711)	0.5
  (609, 9712)	4.0
  (609, 9713)	4.0
  (609, 9714)	3.0
  (609, 9715)	3.5
  (609, 9716)	3.5
  (609, 9717)	3.5
  (609, 9718)	3.5
  (609, 9719)	2.5
  (609, 9720)	4.5
  (609, 9721)	3.0
  (609, 9722)	3.5
  (609, 9723)	3.5


In [57]:
restart_prob = 0.15
num_steps = 10
num_recommendations = 10

In [58]:
true_recommendations = rwr_recommendation(ratings_matrix, restart_prob, num_steps, num_recommendations)

Transition Matrix:
 [[0.15 0.   0.   ... 0.   0.   0.  ]
 [0.   0.15 0.   ... 0.   0.   0.  ]
 [0.   0.   0.15 ... 0.   0.   0.  ]
 ...
 [0.   0.   0.   ... 0.15 0.   0.  ]
 [0.   0.   0.   ... 0.   0.15 0.  ]
 [0.   0.   0.   ... 0.   0.   0.15]]

Initial Walk Probabilities:
 [[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

Walk Probabilities after step 1:
 [[0.15       0.         0.         ... 0.         0.         0.        ]
 [0.         0.15       0.         ... 0.         0.         0.        ]
 [0.         0.         0.15       ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.         2.55000019]
 [0.         0.         0.         ... 0.         0.         2.97500014]
 [0.         0.         0.         ... 0.         0.         2.97500014]]

Walk Probabilities after step 2:
 [[3.30257008e+03 2.89000008e+01 5.81612513e+01 ... 1

In [100]:
count_origin = 0
reverse_movie_map = {v: k for k, v in movie_map.items()}
for user_id, recs in enumerate(true_recommendations[:300]):
    recommended_movies = [reverse_movie_map[movie_idx] for movie_idx in recs]
    if(movie_ids[target_item] in recommended_movies):
        count_origin += 1
    print(f"User {user_ids[user_id]} recommendations: {recommended_movies}")

User 1 recommendations: [296, 356, 260, 2571, 1196, 318, 593, 1210, 1198, 2959, 480, 589, 2858, 2028, 50, 858, 110, 608, 1270, 47]
User 2 recommendations: [318, 2571, 356, 2959, 296, 58559, 79132, 4993, 7153, 593, 260, 5952, 858, 3578, 1196, 6874, 50, 48516, 4226, 1198]
User 3 recommendations: [260, 1196, 2571, 1210, 356, 593, 296, 1198, 1214, 2028, 589, 1200, 480, 318, 608, 1270, 2858, 2959, 110, 1240]
User 4 recommendations: [296, 260, 356, 593, 2571, 318, 1196, 2858, 1198, 608, 2959, 858, 1210, 50, 1136, 2762, 480, 589, 1197, 527]
User 5 recommendations: [296, 356, 318, 593, 110, 480, 589, 457, 527, 150, 50, 260, 2571, 47, 1, 588, 364, 590, 592, 1196]
User 6 recommendations: [356, 318, 296, 593, 110, 480, 457, 589, 150, 47, 364, 588, 260, 380, 527, 592, 590, 1, 377, 50]
User 7 recommendations: [356, 2571, 318, 296, 260, 1196, 593, 1210, 2959, 4993, 1198, 589, 480, 7153, 5952, 2028, 1270, 858, 2858, 1]
User 8 recommendations: [356, 318, 296, 593, 110, 480, 457, 589, 527, 150, 47, 50,

In [102]:
# 刚才已经进行了正常的推荐，接下来进行投毒

In [93]:
def poisoning_attack(user_item_matrix, target_item, num_fake_users, max_ratings_per_fake_user, restart_prob, num_steps, num_recommendations):
    num_users, num_items = user_item_matrix.shape

    # 初始化伪造用户评分矩阵
    fake_user_ratings = np.zeros((num_fake_users, num_items))

    # 初始评分随机分配，设置对目标物品的偏好
    for i in range(num_fake_users):
        for j in range(6000):
            rated_items = np.random.choice(num_items, max_ratings_per_fake_user, replace=False)
            fake_user_ratings[i, rated_items] = np.random.randint(1, max_ratings_per_fake_user)
        fake_user_ratings[i, target_item] = 5  # 给予目标物品最高评分

    # 合并伪造用户评分与原始评分矩阵
    extended_user_item_matrix = np.vstack([user_item_matrix.toarray(), fake_user_ratings])
    extended_user_item_matrix = csr_matrix(extended_user_item_matrix)
    print(extended_user_item_matrix)
    # 训练包含伪造用户的推荐系统
    fake_recommendation = rwr_recommendation(extended_user_item_matrix, restart_prob, num_steps, num_recommendations)
    
    return fake_recommendation

In [94]:
num_fake_users = 300
max_ratings_per_fake_user = 2
target_item = 114
print(movie_ids[114])

1927


In [96]:
fake_recommendation = poisoning_attack(ratings_matrix, target_item, num_fake_users, max_ratings_per_fake_user, restart_prob, num_steps, num_recommendations)

  (0, 0)	4.0
  (0, 1)	4.0
  (0, 2)	4.0
  (0, 3)	5.0
  (0, 4)	5.0
  (0, 5)	3.0
  (0, 6)	5.0
  (0, 7)	4.0
  (0, 8)	5.0
  (0, 9)	5.0
  (0, 10)	5.0
  (0, 11)	5.0
  (0, 12)	3.0
  (0, 13)	5.0
  (0, 14)	4.0
  (0, 15)	5.0
  (0, 16)	3.0
  (0, 17)	3.0
  (0, 18)	5.0
  (0, 19)	4.0
  (0, 20)	4.0
  (0, 21)	5.0
  (0, 22)	4.0
  (0, 23)	3.0
  (0, 24)	4.0
  :	:
  (909, 9693)	1.0
  (909, 9694)	1.0
  (909, 9695)	1.0
  (909, 9696)	1.0
  (909, 9697)	1.0
  (909, 9699)	1.0
  (909, 9700)	1.0
  (909, 9701)	1.0
  (909, 9704)	1.0
  (909, 9705)	1.0
  (909, 9706)	1.0
  (909, 9707)	1.0
  (909, 9708)	1.0
  (909, 9709)	1.0
  (909, 9711)	1.0
  (909, 9712)	1.0
  (909, 9713)	1.0
  (909, 9714)	1.0
  (909, 9715)	1.0
  (909, 9716)	1.0
  (909, 9717)	1.0
  (909, 9718)	1.0
  (909, 9719)	1.0
  (909, 9720)	1.0
  (909, 9723)	1.0
Transition Matrix:
 [[0.15 0.   0.   ... 0.   0.   0.  ]
 [0.   0.15 0.   ... 0.   0.   0.  ]
 [0.   0.   0.15 ... 0.   0.   0.  ]
 ...
 [0.   0.   0.   ... 0.15 0.   0.  ]
 [0.   0.   0.   ... 0.   0.15 

In [99]:
count_poisoned = 0
reverse_movie_map = {v: k for k, v in movie_map.items()}
for user_id, recs in enumerate(fake_recommendation[:300]):
    recommended_movies = [reverse_movie_map[movie_idx] for movie_idx in recs]
    if movie_ids[target_item] in recommended_movies:
        count_poisoned += 1
    print(f"User {user_ids[user_id]} recommendations: {recommended_movies}")

User 1 recommendations: [1927, 356, 296, 260, 2571, 1196, 318, 593, 1210, 1198, 2959, 480, 589, 50, 2858, 2028, 858, 110, 1270, 608]
User 2 recommendations: [318, 1927, 356, 2571, 2959, 296, 58559, 79132, 593, 4993, 7153, 260, 5952, 858, 3578, 1196, 6874, 50, 4226, 1198]
User 3 recommendations: [1927, 1196, 260, 1198, 593, 1210, 356, 2571, 296, 1214, 2028, 589, 1200, 480, 1270, 1240, 50, 318, 608, 110]
User 4 recommendations: [1927, 296, 260, 356, 593, 318, 2571, 1196, 1198, 2858, 608, 2959, 858, 1210, 50, 1197, 1136, 2762, 480, 1]
User 5 recommendations: [296, 356, 318, 593, 110, 480, 589, 457, 527, 150, 50, 260, 1, 47, 2571, 588, 364, 1927, 590, 592]
User 6 recommendations: [1927, 356, 318, 296, 593, 110, 480, 457, 589, 150, 588, 364, 47, 1, 260, 592, 380, 527, 590, 50]
User 7 recommendations: [356, 2571, 318, 296, 260, 1196, 593, 1927, 1210, 2959, 4993, 1198, 480, 589, 7153, 5952, 2028, 1270, 1, 858]
User 8 recommendations: [356, 318, 296, 593, 110, 480, 457, 589, 150, 527, 47, 50, 

In [102]:
percentage_poisoned = (count_poisoned / 300) * 100

print(f"Original target item {target_item} recommendation count: {count_origin}")
print(f"Poisoned target item {target_item} recommendation count: {count_poisoned}")

print(f"占据用户总数的 {percentage_poisoned:.2f}%")


Original target item 114 recommendation count: 0
Poisoned target item 114 recommendation count: 245
占据用户总数的 81.67%
