In [13]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
data = pd.read_csv('movies.csv')

# Create feature column
data['Features'] = (
    data['Genre'].fillna('') + ' ' +
    data['Audience score %'].astype(str) + ' ' +
    data['Profitability'].astype(str)
)

# TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['Features'])

# Cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Recommendation function
def recommend_movies(movie_title, cosine_sim=cosine_sim, data=data):
    try:
        idx = data[data['Film'] == movie_title].index[0]
    except IndexError:
        return "Movie not found in the dataset!"
    
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # top 5 excluding itself
    movie_indices = [i[0] for i in sim_scores]
    
    return data['Film'].iloc[movie_indices]

# User input
movie_title = input("Enter the name of the movie to get recommendations: ")
recommended_movies = recommend_movies(movie_title)

# Output
if isinstance(recommended_movies, str):
    print(recommended_movies)
else:
    print(f"Movies similar to '{movie_title}':")
    print(recommended_movies.to_string(index=False))


Enter the name of the movie to get recommendations:  The Proposal


Movies similar to 'The Proposal':
            Penelope
  Our Family Wedding
        When in Rome
Sex and the City Two
  Sex and the City 2
