In [4]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import euclidean

ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])

utility_matrix = ratings.pivot(index='user_id', columns='item_id', values='rating')
centered_matrix = utility_matrix.sub(utility_matrix.mean(axis=1), axis=0)
filled_matrix = centered_matrix.fillna(0)

similarities = cosine_similarity(filled_matrix)

similarity_df = pd.DataFrame(similarities, index=utility_matrix.index, columns=utility_matrix.index)

top_10_similar_users = similarity_df.loc[1].drop(1).sort_values(ascending=False).head(10)
print("Top 10 similar users to user 1:")
print(top_10_similar_users)

item_508_ratings = utility_matrix.loc[top_10_similar_users.index, 508].dropna()

expected_rating = item_508_ratings.mean()
print(f"\nExpected rating for user 1 on item 508: {expected_rating:.2f}")

Top 10 similar users to user 1:
user_id
773    0.204792
868    0.202321
592    0.196592
880    0.195801
429    0.190661
276    0.187476
916    0.186358
222    0.182415
457    0.182253
8      0.180891
Name: 1, dtype: float64

Expected rating for user 1 on item 508: 4.20


Top 10 similar users to user 1:

user_id

773    
868    
592    
880    
429    
276    
916    
222    
457    
8      

Expected rating for user 1 on item 508: 4.20

In [5]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

users_rated_95 = centered_matrix[95].dropna().index
def predict_rating(target_user_id):
    target_vector = centered_matrix.loc[target_user_id]
    similarities = []
    ratings = []

    for other_user_id in users_rated_95:
        if other_user_id == target_user_id:
            continue

        other_vector = centered_matrix.loc[other_user_id]
        common_items = (~target_vector.isna()) & (~other_vector.isna())

        if common_items.sum() == 0:
            continue 

        vec1 = target_vector[common_items].values
        vec2 = other_vector[common_items].values
        sim = cosine_similarity([vec1], [vec2])[0][0]
        similarities.append(sim)
        rating = utility_matrix.loc[other_user_id, 95]
        ratings.append(rating)

    similarities = np.array(similarities)
    ratings = np.array(ratings)
    if np.sum(np.abs(similarities)) == 0:
        return np.nan  # 无法估计
    predicted_rating = np.dot(similarities, ratings) / np.sum(np.abs(similarities))
    return predicted_rating

pred_200 = predict_rating(200)
pred_15 = predict_rating(15)

print(f"Predicted rating for User 200 on Movie 95: {pred_200:.4f}")
print(f"Predicted rating for User 15  on Movie 95: {pred_15:.4f}")
if pred_200 > pred_15:
    print("Recommend Movie 95 to: User 200")
elif pred_15 > pred_200:
    print("Recommend Movie 95 to: User 15")
else:
    print("Recommend Movie 95 to: Either user (ratings equal)")

Predicted rating for User 200 on Movie 95: 3.8573
Predicted rating for User 15  on Movie 95: 2.9594
Recommend Movie 95 to: User 200


Predicted rating for User 200 on Movie 95: 3.8573
Predicted rating for User 15  on Movie 95: 2.9594
Recommend Movie 95 to: User 200 User 15