In [118]:
import pandas as pd
from sklearn.metrics import mean_squared_error

from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix
#a simple function that returns true if the movie is in the dataset or false if there isn't
def is_movie_id_in_dataset(id, dataset):
    """
    Check if a movie ID is present in the dataset.
    Returns,True if the user ID is in the dataset, False otherwise.
    """
    return id in dataset['movieId'].unique()

# Assuming 'ratings.csv' has columns: userId, movieId, rating
ratings_df = pd.read_csv("ratings.csv")

# Drop timestamp column
ratings_df = ratings_df.drop("timestamp", axis=1)

# Create a user-item matrix (pivot table)
user_movie_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Convert the user-item matrix to a sparse matrix for efficiency
user_movie_matrix_sparse = csr_matrix(user_movie_matrix.values)

# Build a Nearest Neighbors model
knn_model = NearestNeighbors( algorithm='kd_tree', n_neighbors=7)
knn_model.fit(user_movie_matrix_sparse)

# Choose a user for whom you want to make recommendations (replace 'user_id' with the desired user's ID)
user_id = 295

user_index = user_movie_matrix.index.get_loc(user_id)
actual_ratings = ratings_df[ratings_df['userId'] == user_id].set_index('movieId')['rating']

# Assuming you have obtained the predicted ratings using the k-nearest neighbors model
predicted_ratings = user_movie_matrix.loc[user_id]

# Keep only the overlapping movies for which we have both actual and predicted ratings
common_movies = actual_ratings.index.intersection(predicted_ratings.index)
actual_ratings = actual_ratings.loc[common_movies]
predicted_ratings = predicted_ratings.loc[common_movies]

# Evaluate Mean Squared Error
mse = mean_squared_error(actual_ratings, predicted_ratings)

print(f"Mean Squared Error: {mse}")
# Get the k-nearest neighbors for the chosen user
distances, indices = knn_model.kneighbors(user_movie_matrix_sparse[user_index], n_neighbors=7 + 1)

# Exclude the user itself from the recommendations
neighbor_indices = indices.flatten()[1:]

# Print the indices of the nearest neighbors
print("Nearest Neighbors Indices:", neighbor_indices)
movie_d = pd.read_csv('movies.csv')
#some times we get Movies Id that are not in the database, because the data base has empty cells
five_moves_fill = 0
movies_R = []

for j in range(7):
    if(is_movie_id_in_dataset(neighbor_indices[j], movie_d)):
        movies_R.append(neighbor_indices[j])
        five_moves_fill += 1
        if(five_moves_fill == 5):
            break
movie_d[movie_d['movieId'].isin(movies_R)]      
# You can use these indices to recommend movies based on what the neighbors have liked


Mean Squared Error: 0.0
Nearest Neighbors Indices: [432 441 337 548 430 244 117]




Unnamed: 0,movieId,title,genres
295,337,What's Eating Gilbert Grape (1993),Drama
374,430,Calendar Girl (1993),Comedy|Drama
376,432,City Slickers II: The Legend of Curly's Gold (...,Adventure|Comedy|Western
384,441,Dazed and Confused (1993),Comedy
480,548,Terminal Velocity (1994),Action|Mystery|Thriller


In [116]:
#THE implimentation with the category best movie as prefernce

import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.neighbors import NearestNeighbors
from scipy.sparse import csr_matrix

# Load ratings data
ratings_df = pd.read_csv("ratings.csv")
ratings_df = ratings_df.drop("timestamp", axis=1)

# Create a user-item matrix (pivot table)
user_movie_matrix = ratings_df.pivot(index='userId', columns='movieId', values='rating').fillna(0)
user_movie_matrix_sparse = csr_matrix(user_movie_matrix.values)

# Build a Nearest Neighbors model
knn_model = NearestNeighbors(algorithm='kd_tree', n_neighbors=7)
knn_model.fit(user_movie_matrix_sparse)

# Choose a user for whom you want to make recommendations
user_id = 295

# Get the index of the chosen user
user_index = user_movie_matrix.index.get_loc(user_id)

# Get the k-nearest neighbors for the chosen user
distances, indices = knn_model.kneighbors(user_movie_matrix_sparse, n_neighbors=7 + 1)
neighbor_indices = indices.flatten()[1:]

# Load movies data
movies_df = pd.read_csv('movies.csv')

# Extract one-hot encoded genres
one_hot_genres = movies_df['genres'].str.get_dummies('|')

# Combine the one-hot encoded genres with the movies DataFrame
movies_df = pd.concat([movies_df, one_hot_genres], axis=1)

# Filter movies that have genres similar to the user's highly-rated movies
similar_genres_movies = movies_df[movies_df['movieId'].isin(user_movie_matrix.columns)]

# Calculate the average rating for each movie
similar_genres_movies['avg_rating'] = user_movie_matrix.loc[:, similar_genres_movies['movieId']].mean(axis=0).values

# Exclude movies that have already been seen by the user
seen_movies = ratings_df[ratings_df['userId'] == user_id]['movieId'].tolist()
similar_genres_movies = similar_genres_movies[~similar_genres_movies['movieId'].isin(seen_movies)]

# Sort the movies based on average rating (you may use other ranking criteria)
similar_genres_movies = similar_genres_movies.sort_values(by='avg_rating', ascending=False)

# Take the top 5 recommended movies
top_5_movies = similar_genres_movies.head(5)

# Print the top 5 recommended movies
print("Top 5 Recommended Movies:")
print(top_5_movies[['movieId', 'title', 'genres']])




Top 5 Recommended Movies:
      movieId                                      title  \
314       356                        Forrest Gump (1994)   
1939     2571                         Matrix, The (1999)   
510       593           Silence of the Lambs, The (1991)   
224       260  Star Wars: Episode IV - A New Hope (1977)   
97        110                          Braveheart (1995)   

                        genres  
314   Comedy|Drama|Romance|War  
1939    Action|Sci-Fi|Thriller  
510      Crime|Horror|Thriller  
224    Action|Adventure|Sci-Fi  
97            Action|Drama|War  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  similar_genres_movies['avg_rating'] = user_movie_matrix.loc[:, similar_genres_movies['movieId']].mean(axis=0).values
