In [None]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity

# Load the MovieLens 100k dataset
# Assuming the file is 'u.data' from the MovieLens 100k dataset
columns = ['user_id', 'item_id', 'rating', 'timestamp']

data = pd.read_csv('/kaggle/input/recommender/u.data',sep='\t', names=columns)


# Create a user-item matrix
user_item_matrix = data.pivot_table(index='user_id', columns='item_id', values='rating')

# Fill missing ratings with 0 (or you can use other imputation methods like mean)
user_item_matrix.fillna(0, inplace=True)

# Convert to a numpy array
user_item_matrix = user_item_matrix.values

print("User-Item Matrix shape:", user_item_matrix.shape)


In [None]:
# Compute cosine similarity between users
user_similarity = cosine_similarity(user_item_matrix)

# Display user similarity matrix
print("User Similarity Matrix:\n", user_similarity)

# Fit KNN model to find nearest users
knn = NearestNeighbors(metric='cosine', algorithm='brute')
knn.fit(user_item_matrix)

# Example: Predict rating for a user (say user 1) for an item (say item 50)
user_idx = 0  # Index for user 1 (users are 0-indexed here)
item_idx = 49  # Index for item 50

# Get the ratings of other users for item 50
item_ratings = user_item_matrix[:, item_idx]

# Find 5 nearest neighbors to user 1
distances, indices = knn.kneighbors([user_item_matrix[user_idx]], n_neighbors=5)

# Exclude the first neighbor (the user itself)
similar_users = indices[0][1:]

# Predict the rating using the average of the similar users' ratings for item 50
predicted_rating = np.mean(item_ratings[similar_users])
print(f"Predicted rating for User {user_idx+1} on Item {item_idx+1}: {predicted_rating}")
