In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from ast import literal_eval

class MovieRecommender:
    def __init__(self, data_path):
        """Initialize the recommender system with the movie dataset."""
        self.df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/movies.csv')
        self.prepare_data()
        self.create_content_matrix()

    def prepare_data(self):
        """Prepare and clean the data for recommendation."""
        # Convert relevant columns to string type
        text_columns = ['genre', 'cast_name', 'director_name', 'writer_name', 'storyline']
        for column in text_columns:
            self.df[column] = self.df[column].fillna('')

        # Create a combined features column
        self.df['combined_features'] = (
            self.df['genre'] + ' ' +
            self.df['cast_name'] + ' ' +
            self.df['director_name'] + ' ' +
            self.df['writer_name'] + ' ' +
            self.df['storyline']
        )

    def create_content_matrix(self):
        """Create TF-IDF matrix for content-based filtering."""
        self.tfidf = TfidfVectorizer(
            stop_words='english',
            max_features=5000
        )
        self.tfidf_matrix = self.tfidf.fit_transform(self.df['combined_features'])
        self.cosine_sim = cosine_similarity(self.tfidf_matrix, self.tfidf_matrix)

    def get_recommendations(self, movie_title, n_recommendations=5):
        """
        Get movie recommendations based on similarity to the input movie.

        Parameters:
        movie_title (str): Title of the movie to base recommendations on
        n_recommendations (int): Number of recommendations to return

        Returns:
        list: List of recommended movie titles with their similarity scores
        """
        try:
            # Get the index of the movie
            idx = self.df[self.df['title'].str.lower() == movie_title.lower()].index[0]

            # Get similarity scores for the movie
            sim_scores = list(enumerate(self.cosine_sim[idx]))

            # Sort movies based on similarity scores
            sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

            # Get top N most similar movies (excluding the input movie)
            sim_scores = sim_scores[1:n_recommendations+1]

            # Get movie indices and similarity scores
            movie_indices = [i[0] for i in sim_scores]
            similarities = [i[1] for i in sim_scores]

            # Create recommendations list with titles and similarity scores
            recommendations = []
            for idx, score in zip(movie_indices, similarities):
                recommendations.append({
                    'title': self.df.iloc[idx]['title'],
                    'year': self.df.iloc[idx]['year'],
                    'genre': self.df.iloc[idx]['genre'],
                    'similarity_score': round(score * 100, 2)
                })

            return recommendations

        except IndexError:
            return f"Movie '{movie_title}' not found in the database."
        except Exception as e:
            return f"An error occurred: {str(e)}"

    def get_movies_by_genre(self, genre, n_movies=5):
        """Get top-rated movies by genre."""
        genre_movies = self.df[self.df['genre'].str.contains(genre, case=False, na=False)]
        return genre_movies.nlargest(n_movies, 'imbd_rating')[['title', 'year', 'genre', 'imbd_rating']]

    def get_director_recommendations(self, director_name, n_recommendations=5):
        """Get top-rated movies by a specific director."""
        director_movies = self.df[self.df['director_name'].str.contains(director_name, case=False, na=False)]
        return director_movies.nlargest(n_recommendations, 'imbd_rating')[['title', 'year', 'genre', 'imbd_rating']]

# Example usage:
if __name__ == "__main__":
    # Initialize the recommender
    recommender = MovieRecommender('movies.csv')

    # Get recommendations for a movie
    movie_title = "The Godfather"
    recommendations = recommender.get_recommendations(movie_title)

    print(f"\nRecommendations based on '{movie_title}':")
    for rec in recommendations:
        print(f"- {rec['title']} ({rec['year']}) - Genre: {rec['genre']} - Similarity: {rec['similarity_score']}%")

    # Get recommendations by genre
    genre = "Drama"
    genre_recommendations = recommender.get_movies_by_genre(genre)

    print(f"\nTop {len(genre_recommendations)} {genre} movies:")
    for _, movie in genre_recommendations.iterrows():
        print(f"- {movie['title']} ({movie['year']}) - Rating: {movie['imbd_rating']}")


Recommendations based on 'The Godfather':
- The Godfather Part II (1974) - Genre: Crime,Drama - Similarity: 53.43%
- Apocalypse Now (1979) - Genre: Drama,Mystery,War - Similarity: 37.95%
- Goodfellas (1990) - Genre: Biography,Crime,Drama - Similarity: 14.72%
- Casino (1995) - Genre: Crime,Drama - Similarity: 12.63%
- Raging Bull (1980) - Genre: Biography,Drama,Sport - Similarity: 12.06%

Top 5 Drama movies:
- The Shawshank Redemption (1994) - Rating: 9.3
- The Godfather (1972) - Rating: 9.2
- The Dark Knight (2008) - Rating: 9.0
- The Godfather Part II (1974) - Rating: 9.0
- 12 Angry Men (1957) - Rating: 9.0
