In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = pd.read_csv('/kaggle/input/tmdb-movie-metadata/tmdb_5000_movies.csv')

features = ['id', 'title', 'genres', 'keywords', 'popularity']

df = df[features].dropna()

df['genres'] = df['genres'].apply(lambda x: ' '.join([genre['name'] for genre in eval(x)]))
df['keywords'] = df['keywords'].apply(lambda x: ' '.join([keyword['name'] for keyword in eval(x)]))

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['genres'] + ' ' + df['keywords'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def recommend_movies(title, cosine_sim=cosine_sim, df=df):
    
    idx = df[df['title'] == title].index[0]
    
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    
    sim_scores = sim_scores[1:11] 
    
    movie_indices = [i[0] for i in sim_scores]
    
    return df['title'].iloc[movie_indices]


In [2]:
movie_title = 'Gravity'
recommendations = recommend_movies(movie_title)
print(f"Recommended movies for '{movie_title}':")
print(recommendations)

Recommended movies for 'Gravity':
1473         The Astronaut's Wife
1951                   Space Dogs
373               Mission to Mars
1650               Wing Commander
4332               Silent Running
4108    In the Shadow of the Moon
1531                    Moonraker
1354                 Space Chimps
2966        2001: A Space Odyssey
3158                        Alien
Name: title, dtype: object
