In [2]:
import pandas as pd
import heapq
from collections import defaultdict

class Movie:
    def __init__(self, movie_id, name, genres, starring, year, rating, time):
        self.movie_id = movie_id
        self.name = name
        self.genres = set(genres.split(', '))
        self.starring = set(starring.split(', '))
        self.year = year
        self.rating = rating

        if 'h' in time:
            self.time = int(time.split(' ')[0].split('h')[0])
        elif 'm' in time:
            self.time = int(time.split(' ')[0].split('m')[0])
        else:
            self.time = None

class User:
    def __init__(self, user_id):
        self.user_id = user_id
        self.watched_movies = []

    def add_watched_movie(self, movie_id):
        self.watched_movies.append(movie_id)

class NetflixCatalog:
    def __init__(self, data):
        self.movies = {}
        for index, row in data.iterrows():
            movie_id = int(row['id'])
            genres_str = str(row['genres'])
            time_str = str(row['time'])
            movie = Movie(movie_id, row['name'], genres_str, row['starring'],
                         int(row['year']), row['rating'], time_str)
            self.movies[movie_id] = movie

        self.users = {}

        self.genre_index = defaultdict(set)
        self.actor_index = defaultdict(set)
        self.name_index = defaultdict(set)

        for movie_id, movie in self.movies.items():
            for genre in movie.genres:
                self.genre_index[genre.lower()].add(movie_id)
            for actor in movie.starring:
                self.actor_index[actor.lower()].add(movie_id)
            self.name_index[movie.name.lower()].add(movie_id)


    # Problem 1: Finding the Most Popular Genres
    def find_most_popular_genres(self, num_genres):
        genre_counts = defaultdict(int)
        for movie in self.movies.values():
            for genre in movie.genres:
                if genre in genre_counts:
                    genre_counts[genre] += 1
                else:
                    genre_counts[genre] = 1

        top_genres = heapq.nsmallest(num_genres, genre_counts, key=genre_counts.get)
        return top_genres

    # Problem 2: Finding Similar Movies/Shows (Modified to use genres and actors)
    def find_similar_movies(self, movie_id):
        target_movie = self.movies.get(movie_id)
        if target_movie:
            similar_movies = []
            for movie in self.movies.values():
                if set(movie.genres) & set(target_movie.genres):
                    similar_movies.append(movie.name)
            for movie in self.movies.values():
                if set(movie.starring) & set(target_movie.starring):
                    similar_movies.append(movie.name)
            similar_movies = list(set(similar_movies))
            return similar_movies
        else:
            return None


    # Problem 3: Finding Movie/Show by Runtimes
    def find_movies_by_runtime(self, min_runtime, max_runtime):
        matching_movies = []
        for movie in self.movies.values():
            if movie.time is not None:
                if min_runtime <= movie.time <= max_runtime:
                    matching_movies.append(movie.name)
        return matching_movies

    # Problem 4: Searching for Movies/Shows by Keyword
    def search_by_keyword(self, keyword):
        matching_movies = []
        for movie in self.movies.values():
            if keyword.lower() in movie.name.lower():
                matching_movies.append(movie.name)
            if keyword.lower() in ', '.join(movie.genres).lower():
                matching_movies.append(movie.name)
            if keyword.lower() in ', '.join(movie.starring).lower():
                matching_movies.append(movie.name)
        return matching_movies

    def add_user(self, user_id):
        user = User(user_id)
        self.users[user_id] = user
        return user

    def add_watched_movie(self, user_id, movie_id):
        user = self.users.get(user_id)
        if user:
            user.add_watched_movie(movie_id)
            return True
        else:
            return False


def main():
    data = pd.read_csv('netflix_full.csv')
    netflix = NetflixCatalog(data)

    netflix.add_user(1)
    netflix.add_user(2)

    netflix.add_watched_movie(1, 81416533)
    netflix.add_watched_movie(1, 80991406)
    netflix.add_watched_movie(2, 70208104)
    netflix.add_watched_movie(2, 70297761)

    while True:
        print("\nNetflix Catalog Search")
        print("Choose an option:")
        print("1. Find Most Popular Genres")
        print("2. Find Similar Movies/Shows")
        print("3. Find Movies/Shows by Runtime")
        print("4. Search by Keyword")
        print("5. Exit")

        choice = input("Enter your choice: ")

        if choice == '1':
            num_genres = int(input("How many top genres do you want to see? "))
            top_genres = netflix.find_most_popular_genres(num_genres)
            print(f"\nTop {num_genres} Genres:")
            for genre in top_genres:
                print(genre)

        elif choice == '2':
            movie_id = int(input("Enter movie ID: "))
            similar_movies = netflix.find_similar_movies(movie_id)
            if similar_movies:
                print(f"\nSimilar movies/shows:")
                for movie in similar_movies:
                    print(movie)
            else:
                print(f"Movie with ID {movie_id} not found.")


        elif choice == '3':
            min_runtime = int(input("Enter minimum runtime (in minutes): "))
            max_runtime = int(input("Enter maximum runtime (in minutes): "))
            matching_movies = netflix.find_movies_by_runtime(min_runtime, max_runtime)
            print(f"\nMovies/Shows with runtime between {min_runtime} and {max_runtime} minutes:")
            for movie in matching_movies:
                print(movie)

        elif choice == '4':
            keyword = input("Enter keyword: ")
            matching_movies = netflix.search_by_keyword(keyword)
            print(f"\nMovies/Shows containing '{keyword}':")
            for movie in matching_movies:
                print(movie)

        elif choice == '5':
            print("Exiting the program.")
            break

        else:
            print("Invalid choice. Please try again.")

if __name__ == '__main__':
    main()


Netflix Catalog Search
Choose an option:
1. Find Most Popular Genres
2. Find Similar Movies/Shows
3. Find Movies/Shows by Runtime
4. Search by Keyword
5. Exit
Enter your choice: 1
How many top genres do you want to see? 10

Top 10 Genres:
Drama Movies ,Action & Adventure Movies ,Spy Movies
Military Movies ,Drama Movies ,Martial Arts Movies ,Movies Based on Books ,Period Pieces ,Action & Adventure Movies 
Military Movies ,Drama Movies ,Period Pieces ,Action & Adventure Movies ,Social Issue Dramas 
Middle Eastern Movies ,Military Movies ,Drama Movies ,Movies Based on Real Life ,Action & Adventure Movies
Military Movies ,Drama Movies ,Movies Based on Books ,Movies Based on Real Life ,Action & Adventure Movies 
Military Movies ,Period Pieces ,Action & Adventure Movies 
Military Movies ,Action & Adventure Movies
Sci-Fi Movies ,Military Movies ,Action & Adventure Movies 
Military Movies ,Drama Movies ,Period Pieces ,Action & Adventure Movies ,Social Issue Dramas
Military Movies ,Drama Movie