In [1]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display

In [5]:
# Load dataset
def load_dataset(file_path):
    return pd.read_csv(file_path)

In [3]:
# Explore dataset
def explore_dataset(df):
    print("Dataset Overview:")
    display(df.head())
    print("\nDataset Information:")
    print(df.info())
    print("\nDataset Statistics:")
    print(df.describe())


In [4]:
# Filter movies by genre, release year, or director
def filter_movies(df, genre=None, year=None, director=None):
    filtered_df = df
    if genre:
        filtered_df = filtered_df[filtered_df['Genre'].str.contains(genre, case=False, na=False)]
    if year:
        filtered_df = filtered_df[filtered_df['Release Year'] == year]
    if director:
        filtered_df = filtered_df[filtered_df['Director'].str.contains(director, case=False, na=False)]
    return filtered_df

In [6]:
# Search movies by title or keywords
def search_movies(df, query):
    return df[df['Title'].str.contains(query, case=False, na=False) | df['Genre'].str.contains(query, case=False, na=False)]

In [7]:
# Add user ratings and reviews
def add_user_feedback(df, movie_id, user_rating, user_review):
    if movie_id in df['Movie ID'].values:
        df.loc[df['Movie ID'] == movie_id, 'Number of Votes'] += 1
        current_rating = df.loc[df['Movie ID'] == movie_id, 'Rating'].values[0]
        num_votes = df.loc[df['Movie ID'] == movie_id, 'Number of Votes'].values[0]
        new_rating = (current_rating * (num_votes - 1) + user_rating) / num_votes
        df.loc[df['Movie ID'] == movie_id, 'Rating'] = new_rating
        df.loc[df['Movie ID'] == movie_id, 'User Reviews'] = df.loc[df['Movie ID'] == movie_id, 'User Reviews'].fillna('') + f" {user_review}"
    else:
        print("Movie ID not found.")
    return df


In [8]:
# Generate top 10 lists
def generate_top_10(df, by, value=None):
    if value:
        filtered_df = filter_movies(df, genre=value) if by == 'Genre' else filter_movies(df, year=value)
    else:
        filtered_df = df
    return filtered_df.sort_values(by='Rating', ascending=False).head(10)

In [9]:
# Visualize data
def visualize_data(df):
    # Rating distribution
    plt.figure(figsize=(10, 6))
    df['Rating'].hist(bins=20, color='skyblue', edgecolor='black')
    plt.title('Rating Distribution')
    plt.xlabel('Rating')
    plt.ylabel('Count')
    plt.show()

    # Movie counts per genre
    genre_counts = df['Genre'].value_counts().head(10)
    plt.figure(figsize=(10, 6))
    genre_counts.plot(kind='bar', color='lightgreen')
    plt.title('Top 10 Movie Genres')
    plt.xlabel('Genre')
    plt.ylabel('Count')
    plt.show()


In [None]:
# Interactive menu
def interactive_menu():
    print("\nMovie Recommendation System")
    print("1. Load and explore dataset")
    print("2. Filter movies by genre, release year, or director")
    print("3. Search movies by title or keywords")
    print("4. Add ratings and reviews")
    print("5. Generate Top 10 lists")
    print("6. Visualize data")
    print("7. Exit")


In [15]:
if __name__ == "__main__":
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')

    # Specify the path to the dataset in your Google Drive
    file_path = '/content/drive/My Drive/7_Movie_Recommendation_System.csv'  # Update this with the correct file path

    # Load dataset
    movies_df = load_dataset(file_path)

    while True:
        interactive_menu()
        choice = input("Enter your choice: ")

        if choice == '1':
            explore_dataset(movies_df)
        elif choice == '2':
            genre = input("Enter genre (or press Enter to skip): ")
            year = input("Enter release year (or press Enter to skip): ")
            director = input("Enter director (or press Enter to skip): ")
            year = int(year) if year else None
            filtered = filter_movies(movies_df, genre, year, director)
            display(filtered)
        elif choice == '3':
            query = input("Enter title or keyword to search: ")
            results = search_movies(movies_df, query)
            display(results)
        elif choice == '4':
            movie_id = int(input("Enter Movie ID: "))
            user_rating = float(input("Enter your rating (0-5): "))
            user_review = input("Enter your review: ")
            movies_df = add_user_feedback(movies_df, movie_id, user_rating, user_review)
        elif choice == '5':
            by = input("Generate Top 10 by (Genre/Year): ").capitalize()
            value = input("Enter value (or press Enter to skip): ")
            top_10 = generate_top_10(movies_df, by, value)
            display(top_10)
        elif choice == '6':
            visualize_data(movies_df)
        elif choice == '7':
            print("Exiting program. Goodbye!")
            break
        else:
            print("Invalid choice. Please try again.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


ParserError: Error tokenizing data. C error: Expected 8 fields in line 33, saw 9
