In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [3]:
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

print("Movies Dataset:")
print(movies.head(), "\n")

print("Ratings Dataset:")
print(ratings.head(), "\n")


user_movie_matrix = ratings.pivot_table(index="userId", columns="movieId", values="rating").fillna(0)

print("User-Movie Matrix:")
print(user_movie_matrix.head(), "\n")


movie_similarity = cosine_similarity(user_movie_matrix.T)  # Transpose so movies are compared
movie_similarity_df = pd.DataFrame(movie_similarity, index=user_movie_matrix.columns, columns=user_movie_matrix.columns)

print("Movie Similarity Matrix:")
print(movie_similarity_df.head(), "\n")


def recommend_movies(movie_id, n=5):
    if movie_id not in movie_similarity_df:
        print("Movie ID not found!")
        return []
    
   
    similar_scores = movie_similarity_df[movie_id].sort_values(ascending=False)
    top_movies = similar_scores.iloc[1:n+1].index  # Skip itself (first one)
    
    recommended = movies[movies["movieId"].isin(top_movies)][["movieId", "title", "genres"]]
    return recommended


movie_to_search = 1 
print(f"🔍 Recommendations for '{movies[movies.movieId==movie_to_search]['title'].values[0]}'\n")
print(recommend_movies(movie_to_search, n=5))

Movies Dataset:
   movieId          title                      genres
0        1     The Matrix               Action|Sci-Fi
1        2      John Wick             Action|Thriller
2        3      Toy Story     Animation|Comedy|Family
3        4   Finding Nemo  Animation|Adventure|Family
4        5  The Lion King      Animation|Drama|Family 

Ratings Dataset:
   userId  movieId  rating
0       1        1       5
1       1        2       4
2       1        3       3
3       2        2       5
4       2        3       4 

User-Movie Matrix:
movieId   1    2    3    4    5    6    7    8    9    10
userId                                                   
1        5.0  4.0  3.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
2        0.0  5.0  4.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
3        4.0  0.0  2.0  5.0  0.0  0.0  0.0  0.0  0.0  0.0
4        0.0  0.0  0.0  0.0  5.0  4.0  0.0  0.0  0.0  0.0
5        0.0  0.0  0.0  0.0  0.0  0.0  5.0  3.0  0.0  0.0 

Movie Similarity Matrix:
movieId        1         