In [1]:
import pandas as pd
from scipy.sparse import csr_matrix
import numpy as np
from sklearn.neighbors import NearestNeighbors

In [2]:
# Load user ratings
ratings = pd.read_csv('ratings.csv')
# Load movie titles
movies = pd.read_csv('movies.csv')

In [3]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,17,4.0,944249077
1,1,25,1.0,944250228
2,1,29,2.0,943230976
3,1,30,5.0,944249077
4,1,32,5.0,943228858


In [4]:
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
Movie = pd.merge(ratings, movies, on='movieId')
print(Movie.head())

# Filter to include only the top 500 movies by the number of ratings
topmovie = Movie['title'].value_counts().nlargest(500).index
filterrating = Movie[Movie['title'].isin(topmovie)]
print(topmovie)
print(filterrating)

aggrerating = filterrating.groupby(['userId', 'title']).agg({'rating': 'mean'}).reset_index()
# Perform the pivot operation on the reduced dataset
moviematrix = aggrerating.pivot(index='userId', columns='title', values='rating').fillna(0)
print(moviematrix.head())


   userId  movieId  rating   timestamp                         title  \
0       1       17     4.0   944249077  Sense and Sensibility (1995)   
1       3       17     5.0  1084485217  Sense and Sensibility (1995)   
2      15       17     4.5  1289858271  Sense and Sensibility (1995)   
3      28       17     4.0   961513829  Sense and Sensibility (1995)   
4      29       17     4.0   845056111  Sense and Sensibility (1995)   

          genres  
0  Drama|Romance  
1  Drama|Romance  
2  Drama|Romance  
3  Drama|Romance  
4  Drama|Romance  
Index(['Shawshank Redemption, The (1994)', 'Forrest Gump (1994)',
       'Pulp Fiction (1994)', 'Matrix, The (1999)',
       'Silence of the Lambs, The (1991)',
       'Star Wars: Episode IV - A New Hope (1977)', 'Fight Club (1999)',
       'Jurassic Park (1993)', 'Schindler's List (1993)',
       'Lord of the Rings: The Fellowship of the Ring, The (2001)',
       ...
       'Maltese Falcon, The (1941)', 'Fantasia (1940)', 'Troy (2004)',
       'Des

In [6]:
# Convert movie matrix to a sparse matrix format
moviesparse = csr_matrix(moviematrix.values)

In [7]:

# Fit the NearestNeighbors model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=20, n_jobs=-1)
model_knn.fit(moviesparse)


NearestNeighbors(algorithm='brute', metric='cosine', n_jobs=-1, n_neighbors=20)

In [10]:
# Example: Find top 10 similar users for a specific user
user = int(input("Enter user no: "))
useri = user-1
distances, position = model_knn.kneighbors(moviesparse[useri], n_neighbors=10)

print(f"Top 10 similar users for User {user}:\n", position + 1)

Enter user no: 4
Top 10 similar users for User 4:
 [[     4  88787 175882 159574  44652  97587  33350 124903 152042  87048]]


In [12]:
def movierecommendation(userid, moviesparse, position, movies, top_n=5):
    useri = userid - 1
    similaruser = position.flatten()
    
    # Aggregate ratings from similar users
    similarusers = moviesparse[similaruser].toarray().mean(axis=0)
    
    # Sort and filter movies not yet rated by the user
    userrating = moviesparse[useri].toarray().flatten()
    unratemovie = np.where(userrating == 0)[0]
    recommendmovieposi = np.argsort(similarusers[unratemovie])[-top_n:]
    
    recommendedmovies = movies.iloc[unratemovie[recommendmovieposi]]
    return recommendedmovies[['title']]

# Get movie recommendations for the example user
recommendations = movierecommendation(user, moviesparse, position, movies)
print(f"Recommended movies for User {user}:\n", recommendations)

Recommended movies for User 4:
                                    title
385  Faster Pussycat! Kill! Kill! (1965)
487      Manhattan Murder Mystery (1993)
372                         Speed (1994)
24              Leaving Las Vegas (1995)
162         Devil in a Blue Dress (1995)
