In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [2]:
movies_df = pd.read_csv('movies.csv', usecols=['movieId', 'title'], dtype={'movieId': 'int32', 'title': 'str'})
ratings_df = pd.read_csv('ratings.csv', usecols=['userId', 'movieId', 'rating'],
                         dtype={'userId': 'int32', 'movieId': 'int32', 'rating': 'float32'})


In [3]:
combined_df = pd.merge(ratings_df, movies_df, on='movieId')

In [4]:
movie_rating_count = combined_df.groupby(by='title')['rating'].count().reset_index().rename(
    columns={'rating': 'totalRatingCount'})

In [5]:
combined_df_with_count = pd.merge(combined_df, movie_rating_count, on='title', how='left')


In [6]:
popularity_threshold = 50
popular_movies = combined_df_with_count.query('totalRatingCount >= @popularity_threshold')


In [7]:
movie_features_df = popular_movies.pivot_table(index='title', columns='userId', values='rating').fillna(0)

In [8]:
movie_features_df_matrix = csr_matrix(movie_features_df.values)


In [9]:
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(movie_features_df_matrix)


In [10]:
def recommend_similar_movies(movie_title, num_recommendations=5):
    # Find the index of the queried movie
    query_index = movie_features_df.index.get_loc(movie_title)

    # Find the k-nearest neighbors
    distances, indices = model_knn.kneighbors(movie_features_df.iloc[query_index, :].values.reshape(1, -1),
                                              n_neighbors=num_recommendations+1)

    # Print recommendations
    print(f"Recommendations for {movie_title}:\n")
    for i in range(1, len(distances.flatten())):
        print(f"{i}: {movie_features_df.index[indices.flatten()[i]]}, with distance of {distances.flatten()[i]}")


In [11]:
# Example usage
query_movie_title = "Toy Story (1995)"
recommend_similar_movies(query_movie_title)

Recommendations for Toy Story (1995):

1: Toy Story 2 (1999), with distance of 0.427398681640625
2: Jurassic Park (1993), with distance of 0.4343631863594055
3: Independence Day (a.k.a. ID4) (1996), with distance of 0.435738205909729
4: Star Wars: Episode IV - A New Hope (1977), with distance of 0.4426117539405823
5: Forrest Gump (1994), with distance of 0.45290398597717285
