## 基于内容的推荐

首先对内容栏进行拼接

In [1]:
import pandas as pd

# 读取内容推荐数据集
df = pd.read_csv("content_based_data.csv")

# 创建一个新的列 "combined_features"，将文本特征拼接成一个字符串
df['combined_features'] = df['genre'].astype(str) + ' ' + \
                          df['platform'].astype(str) + ' ' + \
                          df['publisher'].astype(str) + ' ' + \
                          df['rating'].astype(str)

df['combined_features'] = df['combined_features'].str.lower()
print('done')

done


对特征进行向量化

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

# 创建 TF-IDF 向量
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['combined_features'])
print('done')

done


计算余弦相似度矩阵

In [5]:
from sklearn.metrics.pairwise import linear_kernel

# 使用线性核（实际上是余弦相似度）
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)
print('done')

done


创建推荐函数并进行尝试：

In [8]:
# 创建游戏名到索引的映射字典
indices = pd.Series(df.index, index=df['name']).drop_duplicates()

def recommend_games(title, num_recommendations=5):
    title = title.lower()
    
    if title not in df['name'].str.lower().values:
        print("游戏名称未找到，请检查拼写")
        return []

    # 通过匹配小写名称获取索引
    idx = df[df['name'].str.lower() == title].index[0]

    # 获取与该游戏的相似度分数
    sim_scores = list(enumerate(cosine_sim[idx]))

    # 根据相似度降序排序，排除自己本身
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:num_recommendations+1]

    # 获取推荐结果索引
    game_indices = [i[0] for i in sim_scores]

    # 返回推荐游戏名称
    return df['name'].iloc[game_indices].tolist()

#test
recommendations = recommend_games("Sports", num_recommendations=5)
print("推荐游戏：")
for i, game in enumerate(recommendations, 1):
    print(f"{i}. {game}")



游戏名称未找到，请检查拼写
推荐游戏：
