In [1]:
import pandas as pd
import numpy as np

!pip install scikit-learn

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity



In [12]:
def load_dataset():
    """Load a dataset of movies with relevant columns."""
    df = pd.read_csv("IMDB Top 250 Movies.csv")
    return df[['name', 'genre', 'tagline']].rename(columns={'name': 'movie', 'tagline': 'description'})

In [22]:
def recommend_movies(user_input, dataset, top_n=5):
    """Recommend movies based on text similarity, emphasizing genre."""
    
    # Create a new feature combining genre and description
    dataset['combined_features'] = dataset['genre'] + " " + dataset['description'].fillna("")
    
    # Combine user input with movie descriptions
    descriptions = dataset['combined_features'].tolist()
    descriptions.append(user_input)
    
    # Convert text into TF-IDF vectors
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(descriptions)
    
    # Compute cosine similarity
    similarity_scores = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])
    
    # Get top N matches for movie recommendations
    top_indices = similarity_scores.argsort()[0][-top_n:][::-1]
    recommendations = dataset.iloc[top_indices]
    
    return recommendations[['movie', 'genre']]

In [23]:
# Enter user query
if __name__ == "__main__":
    dataset = load_dataset()
    user_query = "I love scary sci-fi movies"
    recommendations = recommend_movies(user_query, dataset)
    
    # Print only movie titles ranked 1 to 5
    for i, row in enumerate(recommendations.itertuples(), start=1):
        print(f"{i}. {row.movie} ({row.genre})")

1. Aliens (Action,Adventure,Sci-Fi)
2. Terminator 2: Judgment Day (Action,Sci-Fi)
3. Logan (Action,Drama,Sci-Fi)
4. The Matrix (Action,Sci-Fi)
5. The Thing (Horror,Mystery,Sci-Fi)
