In [1]:
!pip install pandas numpy neo4j



In [4]:
import pandas as pd
import logging
from neo4j import GraphDatabase

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class MovieRecommendationSystem:
    def __init__(self, uri, username, password, csv_path):
        """
        Initialize the movie recommendation system
        
        Args:
            uri (str): Neo4j database URI
            username (str): Neo4j username
            password (str): Neo4j password
            csv_path (str): Path to the Bollywood movie dataset
        """
        # Database connection parameters
        self._uri = uri
        self._username = username
        self._password = password
        
        # Load and preprocess data
        self.data = self.load_and_preprocess_data(csv_path)
        
        # Create Neo4j driver
        try:
            self._driver = GraphDatabase.driver(uri, auth=(username, password))
            logger.info("Neo4j connection established successfully")
        except Exception as e:
            logger.error(f"Failed to establish Neo4j connection: {e}")
            raise

    def load_and_preprocess_data(self, csv_path):
        """
        Load and preprocess the movie dataset
        
        Args:
            csv_path (str): Path to the CSV file
        
        Returns:
            pandas.DataFrame: Cleaned and processed movie dataset
        """
        try:
            # Load data
            data = pd.read_csv(csv_path)
            
            # Data cleaning steps
            data = data.dropna(subset=['year_of_release'])
            data['actors'] = data['actors'].fillna('Unknown')
            data = data.drop_duplicates(subset=['movie_name'])
            
            # Text standardization
            data['movie_name'] = data['movie_name'].str.strip().str.title()
            data['director'] = data['director'].str.strip().str.title()
            data['actors'] = data['actors'].str.strip().str.title()
            
            # Clean runtime
            data['runtime'] = data['runtime'].str.replace(' min', '', regex=False)
            data['runtime'] = pd.to_numeric(data['runtime'], errors='coerce')
            data = data.dropna(subset=['runtime'])
            data['runtime'] = data['runtime'].astype(int)
            
            # Clean votes
            data['no_of_votes'] = data['no_of_votes'].str.replace(',', '', regex=False)
            data['no_of_votes'] = pd.to_numeric(data['no_of_votes'], errors='coerce')
            data = data.dropna(subset=['no_of_votes'])
            data['no_of_votes'] = data['no_of_votes'].astype(int)
            
            # Ensure 'year_of_release' is treated as integer
            data['year_of_release'] = pd.to_numeric(data['year_of_release'], errors='coerce').fillna(0).astype(int)
            
            logger.info(f"Data preprocessed. Total movies: {len(data)}")
            return data
        
        except Exception as e:
            logger.error(f"Error in data preprocessing: {e}")
            raise

    def create_graph_database(self):
        """
        Create graph database with movie, actor, and director nodes
        """
        def create_constraints(tx):
            # Updated constraint syntax for Neo4j
            tx.run("CREATE CONSTRAINT movie_title_unique FOR (m:Movie) REQUIRE m.title IS UNIQUE")
            tx.run("CREATE CONSTRAINT director_name_unique FOR (d:Director) REQUIRE d.name IS UNIQUE")
            tx.run("CREATE CONSTRAINT actor_name_unique FOR (a:Actor) REQUIRE a.name IS UNIQUE")

        def insert_movies(tx):
            for index, row in self.data.iterrows():
                # Create Movie node with 'year' as integer
                tx.run("""
                    MERGE (m:Movie {
                        title: $title,
                        imdb_id: $imdb_id,
                        year: toInteger($year),
                        runtime: $runtime,
                        rating: $rating,
                        votes: $votes
                    })
                """, 
                title=row['movie_name'], 
                imdb_id=row['imdb-id'], 
                year=row['year_of_release'], 
                runtime=row['runtime'], 
                rating=row['IMDB_rating'], 
                votes=row['no_of_votes'])

                # Create Director node and relationship
                tx.run("""
                    MERGE (d:Director {name: $director})
                    WITH d
                    MATCH (m:Movie {title: $title})
                    MERGE (m)-[:DIRECTED_BY]->(d)
                """, director=row['director'], title=row['movie_name'])

                # Create Actor nodes and relationships
                actors = row['actors'].split(',')
                for actor in actors:
                    actor = actor.strip()
                    tx.run("""
                        MERGE (a:Actor {name: $actor})
                        WITH a
                        MATCH (m:Movie {title: $title})
                        MERGE (m)-[:STARRING]->(a)
                    """, actor=actor, title=row['movie_name'])

        # Execute graph creation in a session
        with self._driver.session() as session:
            try:
                # First, drop existing constraints (if any)
                session.run("DROP CONSTRAINT movie_title_unique IF EXISTS")
                session.run("DROP CONSTRAINT director_name_unique IF EXISTS")
                session.run("DROP CONSTRAINT actor_name_unique IF EXISTS")
                
                # Create new constraints and insert data
                session.execute_write(create_constraints)
                session.execute_write(insert_movies)
                
                # Verify node creation
                result = session.run("MATCH (m:Movie) RETURN COUNT(m) AS movie_count")
                movie_count = result.single()['movie_count']
                logger.info(f"Total movies inserted: {movie_count}")
                
                result = session.run("MATCH (d:Director) RETURN COUNT(d) AS director_count")
                director_count = result.single()['director_count']
                logger.info(f"Total directors inserted: {director_count}")
                
                result = session.run("MATCH (a:Actor) RETURN COUNT(a) AS actor_count")
                actor_count = result.single()['actor_count']
                logger.info(f"Total actors inserted: {actor_count}")
                
            except Exception as e:
                logger.error(f"Error creating graph database: {e}")
                raise

    def recommend_movies(self, movie_title, max_recommendations=3):
        """
        Recommend movies based on multiple strategies
        
        Args:
            movie_title (str): Title of the reference movie
            max_recommendations (int): Maximum number of recommendations per strategy
        
        Returns:
            dict: Recommendations by different strategies
        """
        def recommend_by_director(tx):
            result = tx.run("""
                MATCH (m:Movie {title: $title})-[:DIRECTED_BY]->(d:Director)
                WITH d
                MATCH (recommended:Movie)-[:DIRECTED_BY]->(d)
                WHERE recommended.title <> $title
                RETURN DISTINCT recommended.title AS movie_title, 
                       recommended.rating AS rating
                ORDER BY rating DESC
                LIMIT $limit
            """, title=movie_title, limit=max_recommendations)
            return [{'title': record['movie_title'], 'rating': record['rating']} for record in result]

        def recommend_by_actor(tx):
            result = tx.run("""
                MATCH (m:Movie {title: $title})-[:STARRING]->(a:Actor)
                WITH a
                MATCH (recommended:Movie)-[:STARRING]->(a)
                WHERE recommended.title <> $title
                RETURN DISTINCT recommended.title AS movie_title, 
                       recommended.rating AS rating
                ORDER BY rating DESC
                LIMIT $limit
            """, title=movie_title, limit=max_recommendations)
            return [{'title': record['movie_title'], 'rating': record['rating']} for record in result]

        def recommend_similar_movies(tx):
            result = tx.run("""
                MATCH (m:Movie {title: $title})-[:STARRING]->(a:Actor)
                WITH a, m
                MATCH (recommended:Movie)-[:STARRING]->(a)
                WHERE recommended.title <> $title 
                  AND abs(toInteger(recommended.year) - toInteger(m.year)) <= 5
                RETURN DISTINCT recommended.title AS movie_title, 
                       recommended.rating AS rating
                ORDER BY rating DESC
                LIMIT $limit
            """, title=movie_title, limit=max_recommendations)
            return [{'title': record['movie_title'], 'rating': record['rating']} for record in result]

        with self._driver.session() as session:
            try:
                recommendations = {
                    'by_director': session.execute_read(recommend_by_director),
                    'by_actor': session.execute_read(recommend_by_actor),
                    'similar_movies': session.execute_read(recommend_similar_movies)
                }
                
                # Print recommendations
                print(f"\nRecommendations for '{movie_title}':")
                for strategy, movies in recommendations.items():
                    print(f"\nRecommended by {strategy.replace('_', ' ').title()}:")
                    for idx, movie in enumerate(movies, 1):
                        print(f"{idx}. {movie['title']} - Rating: {movie['rating']}")
                
                return recommendations
            except Exception as e:
                logger.error(f"Error in recommending movies: {e}")
                return None

    def close(self):
        """Close the Neo4j driver connection"""
        if self._driver:
            self._driver.close()
            logger.info("Neo4j connection closed")


# Example usage:
uri = "neo4j+s://27328dbe.databases.neo4j.io"
username = "neo4j"
password = "43lu-jlFa7XyoN9c631n0rf0xz654h8lRHXVw215qmA"
csv_path = "F:/M 20.02.2023/downloads/archive (5)/bollywood_data_set.csv"

recommender = MovieRecommendationSystem(uri, username, password, csv_path)

# Create the database with movie, actor, and director nodes
recommender.create_graph_database()

# Recommend movies for a given movie
movie_title = "3 Idiots"  # Example movie title
recommender.recommend_movies(movie_title)

# Close the connection
recommender.close()

INFO:__main__:Data preprocessed. Total movies: 5302
INFO:__main__:Neo4j connection established successfully
INFO:__main__:Total movies inserted: 5302
INFO:__main__:Total directors inserted: 2339
INFO:__main__:Total actors inserted: 10056
INFO:__main__:Neo4j connection closed



Recommendations for '3 Idiots':

Recommended by By Director:
1. Pk - Rating: 8.1
2. Munna Bhai M.B.B.S. - Rating: 8.1
3. Lage Raho Munna Bhai - Rating: 8.0

Recommended by By Actor:
1. Pk - Rating: 8.1
2. Rang De Basanti - Rating: 8.1
3. Mumbai Meri Jaan - Rating: 7.7

Recommended by Similar Movies:
1. Pk - Rating: 8.1
2. Rang De Basanti - Rating: 8.1
3. Mumbai Meri Jaan - Rating: 7.7


In [7]:
from neo4j import GraphDatabase
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class MovieRecommendationSystem:
    def __init__(self, uri, username, password):
        """
        Initialize the movie recommendation system
        
        Args:
            uri (str): Neo4j database URI
            username (str): Neo4j username
            password (str): Neo4j password
        """
        # Database connection parameters
        self._uri = uri
        self._username = username
        self._password = password
        
        # Create Neo4j driver
        try:
            self._driver = GraphDatabase.driver(uri, auth=(username, password))
            logger.info("Neo4j connection established successfully")
        except Exception as e:
            logger.error(f"Failed to establish Neo4j connection: {e}")
            raise

    def recommend_movies(self, movie_title, max_recommendations=3):
        """
        Recommend movies based on multiple strategies (e.g., director, actor, similar movies)
        
        Args:
            movie_title (str): Title of the reference movie
            max_recommendations (int): Maximum number of recommendations to return
        
        Returns:
            dict: Recommendations by different strategies
        """
        def recommend_by_director(tx):
            result = tx.run("""
                MATCH (m:Movie {title: $title})-[:DIRECTED_BY]->(d:Director)
                WITH d
                MATCH (recommended:Movie)-[:DIRECTED_BY]->(d)
                WHERE recommended.title <> $title
                RETURN DISTINCT recommended.title AS movie_title, 
                       recommended.rating AS rating
                ORDER BY rating DESC
                LIMIT $limit
            """, title=movie_title, limit=max_recommendations)
            return [{'title': record['movie_title'], 'rating': record['rating']} for record in result]

        def recommend_by_actor(tx):
            result = tx.run("""
                MATCH (m:Movie {title: $title})-[:STARRING]->(a:Actor)
                WITH a
                MATCH (recommended:Movie)-[:STARRING]->(a)
                WHERE recommended.title <> $title
                RETURN DISTINCT recommended.title AS movie_title, 
                       recommended.rating AS rating
                ORDER BY rating DESC
                LIMIT $limit
            """, title=movie_title, limit=max_recommendations)
            return [{'title': record['movie_title'], 'rating': record['rating']} for record in result]

        def recommend_similar_movies(tx):
            result = tx.run("""
                MATCH (m:Movie {title: $title})-[:STARRING]->(a:Actor)
                WITH a, m
                MATCH (recommended:Movie)-[:STARRING]->(a)
                WHERE recommended.title <> $title 
                  AND abs(recommended.year - m.year) <= 5
                RETURN DISTINCT recommended.title AS movie_title, 
                       recommended.rating AS rating
                ORDER BY rating DESC
                LIMIT $limit
            """, title=movie_title, limit=max_recommendations)
            return [{'title': record['movie_title'], 'rating': record['rating']} for record in result]

        with self._driver.session() as session:
            try:
                recommendations = {
                    'by_director': session.execute_read(recommend_by_director),
                    'by_actor': session.execute_read(recommend_by_actor),
                    'similar_movies': session.execute_read(recommend_similar_movies)
                }
                
                # Print recommendations
                print(f"\nRecommendations for '{movie_title}':")
                for strategy, movies in recommendations.items():
                    print(f"\n{strategy.replace('_', ' ').title()}:")
                    if movies:
                        for movie in movies:
                            print(f"- {movie['title']} (Rating: {movie['rating']})")
                    else:
                        print("No recommendations found")
                
                return recommendations
            except Exception as e:
                logger.error(f"Error recommending movies for {movie_title}: {e}")
                return {}

    def close(self):
        """Close the Neo4j driver connection"""
        if self._driver:
            self._driver.close()
            logger.info("Neo4j connection closed")

# Initialize the recommendation system with your Neo4j credentials
URI = "neo4j+s://27328dbe.databases.neo4j.io"
USERNAME = "neo4j"
PASSWORD = "43lu-jlFa7XyoN9c631n0rf0xz654h8lRHXVw215qmA"

# Instantiate the MovieRecommendationSystem
recommender = MovieRecommendationSystem(URI, USERNAME, PASSWORD)

# Get recommendations for a given movie
recommender.recommend_movies('Dangal')

# Close the connection after the recommendations
recommender.close()


INFO:__main__:Neo4j connection established successfully
INFO:__main__:Neo4j connection closed



Recommendations for 'Dangal':

By Director:
- Chhichhore (Rating: 8.2)
- Bhoothnath Returns (Rating: 6.6)
- Machaan (Rating: 5.4)

By Actor:
- Badhaai Ho (Rating: 8.0)
- Ludo (Rating: 7.6)
- Photograph (Rating: 6.8)

Similar Movies:
- Badhaai Ho (Rating: 8.0)
- Ludo (Rating: 7.6)
- Photograph (Rating: 6.8)
