In [5]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import os

data_path = 'E:/ml-100k/u.data'  # Your file path
try:
    ratings = pd.read_csv(data_path, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'], encoding='utf-8')
except FileNotFoundError:
    print(f"Error: File {data_path} not found. Ensure the MovieLens 100k dataset is downloaded and extracted to the correct path.")
    raise

utility_matrix = ratings.pivot(index='user_id', columns='item_id', values='rating')
user_means = utility_matrix.mean(axis=1)
centralized_matrix = utility_matrix.sub(user_means, axis=0).fillna(0)

cosine_sim = cosine_similarity(centralized_matrix)
cosine_sim_df = pd.DataFrame(cosine_sim, index=utility_matrix.index, columns=utility_matrix.index)
user_1_sim = cosine_sim_df.loc[1].sort_values(ascending=False)[1:11]  # Exclude user 1
top_10_users = user_1_sim.index
print("Top 10 users most similar to user 1:", top_10_users.tolist())

similar_users_ratings = utility_matrix.loc[top_10_users, 508].dropna()
if similar_users_ratings.empty:
    print("No similar users have rated item 508. Prediction cannot be made.")
else:
    weights = user_1_sim.loc[similar_users_ratings.index]
    weighted_sum = np.sum(similar_users_ratings * weights)
    weight_sum = np.sum(weights)
    predicted_rating = weighted_sum / weight_sum if weight_sum != 0 else 0
    user_1_mean = user_means.loc[1]
    final_predicted_rating = predicted_rating + user_1_mean
    print(f"Predicted rating for user 1 on item 508: {final_predicted_rating:.2f}")

Top 10 users most similar to user 1: [773, 868, 592, 880, 429, 276, 916, 222, 457, 8]
Predicted rating for user 1 on item 508: 7.82
