In [4]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [5]:
movies = pd.read_csv("movies.csv")
ratings = pd.read_csv("ratings.csv")

In [6]:
# Combine genres as features
movies['features'] = movies['genres']

In [7]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['features'])

In [8]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [9]:
def recommend_movie(title, cosine_sim=cosine_sim):
    if title not in movies['title'].values:
        return "Movie not found in dataset."

    idx = movies[movies['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # top 5 recommendations
    movie_indices = [i[0] for i in sim_scores]
    return movies['title'].iloc[movie_indices].tolist()

In [10]:
print("Movies similar to Inception:")
print(recommend_movie("Inception"))

Movies similar to Inception:
['The Matrix', 'Avengers: Endgame', 'Heat', 'Toy Story', 'Jumanji']
