In [1]:
# 数据处理
import pandas as pd
import numpy as np
import json

from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix


# 自然语言处理
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# 推荐系统构建
from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

# 数据可视化（可选）
import matplotlib.pyplot as plt
import seaborn as sns




In [2]:

# 读取 CSV 文件
games_df = pd.read_csv('../data/games.csv')
users_df = pd.read_csv('../data/users.csv')
recommendations_df = pd.read_csv('../data/recommendations.csv')

# 读取 JSON 文件
games_metadata = []
with open('../data/games_metadata.json', 'r') as f:
    for line in f:
        games_metadata.append(json.loads(line))

# 转换 JSON 文件为 DataFrame
games_metadata_df = pd.json_normalize(games_metadata)


In [3]:
# 将 games_df 和 games_metadata_df 按照 app_id 合并
merged_games_df = pd.merge(games_df, games_metadata_df, on='app_id', how='inner')

# 查看合并后的数据结构
print(merged_games_df.head())


   app_id                              title date_release   win    mac  linux  \
0   13500  Prince of Persia: Warrior Within™   2008-11-21  True  False  False   
1   22364            BRINK: Agents of Change   2011-08-03  True  False  False   
2  113020       Monaco: What's Yours Is Mine   2013-04-24  True   True   True   
3  226560                 Escape Dead Island   2014-11-18  True  False  False   
4  249050            Dungeon of the ENDLESS™   2014-10-27  True   True  False   

          rating  positive_ratio  user_reviews  price_final  price_original  \
0  Very Positive              84          2199         9.99            9.99   
1       Positive              85            21         2.99            2.99   
2  Very Positive              92          3722        14.99           14.99   
3          Mixed              61           873        14.99           14.99   
4  Very Positive              88          8784        11.99           11.99   

   discount  steam_deck               

In [4]:
# 创建用户-游戏交互矩阵，推荐为1，不推荐为0
user_game_matrix = recommendations_df.pivot_table(index='user_id', columns='app_id', values='is_recommended')

# 填充缺失值为0
user_game_matrix = user_game_matrix.fillna(0)

# 查看矩阵的形状
print(user_game_matrix.shape)

# 将矩阵转换为稀疏矩阵以提高计算效率
user_game_sparse_matrix = csr_matrix(user_game_matrix)




In [None]:
# 使用 Truncated SVD 进行矩阵分解
svd = TruncatedSVD(n_components=50, random_state=42)  # n_components 是降维后的维度
user_game_matrix_svd = svd.fit_transform(user_game_sparse_matrix)

# 查看降维后的矩阵形状
print(user_game_matrix_svd.shape)


In [None]:
# 定义基于用户的推荐系统
def recommend_games_for_user(user_id, n_recommendations=10):
    # 确保用户存在
    if user_id not in user_game_matrix.index:
        print(f"用户 {user_id} 不存在！")
        return []

    # 找到用户的行索引
    user_idx = user_game_matrix.index.get_loc(user_id)
    
    # 获取用户的矩阵表示
    user_vector = user_game_matrix_svd[user_idx]
    
    # 计算用户与游戏之间的相似性
    game_similarities = cosine_similarity([user_vector], user_game_matrix_svd.T).flatten()
    
    # 获取推荐游戏的索引
    recommended_games_idx = np.argsort(-game_similarities)[:n_recommendations]
    
    # 返回推荐的游戏，并从 merged_games_df 中提取更多信息
    return merged_games_df[merged_games_df['app_id'].isin(user_game_matrix.columns[recommended_games_idx])][['app_id', 'title', 'price_final', 'rating']]

# # 测试推荐系统
# user_id = 123456  # 替换为实际的用户ID
# recommendations = recommend_games_for_user(user_id, n_recommendations=10)
# print(recommendations)
