In [2]:
import pandas as pd
from google.colab import drive
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics.pairwise import cosine_similarity
!pip install scikit-surprise
from surprise import Reader, Dataset, SVD, accuracy
from surprise.model_selection import train_test_split
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import requests
from PIL import Image
from io import BytesIO

# Mount Google Drive
drive.mount('/content/drive')

# Load the files
movies_metadata = pd.read_csv('/content/drive/MyDrive/Data Science Project/movies_metadata.csv')
ratings_data = pd.read_csv('/content/drive/MyDrive/Data Science Project/ratings_small.csv')

# Preprocess movie data
movie_data = movies_metadata
movie_data['keywords'] = movie_data['keywords'].fillna('[]')
movie_data['Genres'] = movie_data['Genres'].fillna('[]').apply(literal_eval)

# Create lists for genres, languages, and release years
movie_genre_list = ['-- Choose a Sub-Filter --']
movie_language_list = ['-- Choose a Sub-Filter --']
movie_year_list = ['-- Choose a Sub-Filter --']

for genre_list in movie_data['Genres']:
    for genre in genre_list:
        if genre not in movie_genre_list:
            movie_genre_list.append(genre)

for language in movie_data['Original Language']:
    if language == 'nan' or type(language) != str or len(language) != 2:
        continue
    if language not in movie_language_list:
        movie_language_list.append(language)

for year in movie_data['Release Year']:
    if year == 'nan' or year < 1000:
        continue
    try:
        if year not in movie_year_list:
            movie_year_list.append(int(year))
    except:
        continue

sorted_year_list = movie_year_list[1:]
sorted_year_list.sort()
movie_year_list = [movie_year_list[0]] + sorted_year_list

Collecting scikit-surprise
  Downloading scikit-surprise-1.1.3.tar.gz (771 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m772.0/772.0 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3162671 sha256=188aa58bcc38e9aed92c23c3b41a624d4c50694611538473adfe4baad4e5edb4
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.3
Mounted at /content/drive


  movies_metadata = pd.read_csv('/content/drive/MyDrive/Data Science Project/movies_metadata.csv')


In [3]:
# Function to perform collaborative filtering
def collaborative_filtering(trainset):
    cf_model = SVD()
    cf_model.fit(trainset)
    return cf_model

# Function to get collaborative filtering recommendations
def get_cf_recommendations(userId, model, movie_data, ratings_data, n=10):
    user_movies = ratings_data[ratings_data['userId'] == userId]['movieId']
    user_unrated_movies = movie_data[~movie_data['movieId'].isin(user_movies)]['movieId']

    predictions = [model.predict(userId, movieId) for movieId in user_unrated_movies]
    top_recommendations = sorted(predictions, key=lambda x: x.est, reverse=True)[:n]

    recommended_movie_titles = [movie_data[movie_data['movieId'] == pred.iid]['title'].values[0] for pred in top_recommendations]

    return recommended_movie_titles

# Function to preprocess and compute content-based recommendations
def content_based_recommendations(movie_data, movie_genre_list, movie_language_list):
    movie_data_filtered = movie_data[['title', 'keywords', 'Genres', 'Original Language', 'Release Year', 'id']]

    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(movie_data_filtered['keywords'])
    cosine_sim_keywords = linear_kernel(tfidf_matrix, tfidf_matrix)

    indices = pd.Series(movie_data_filtered.index, index=movie_data_filtered['title']).drop_duplicates()

    return movie_data_filtered, cosine_sim_keywords, indices

# Function to get content-based recommendations based on user-selected filters
def get_content_recommendations(title, movie_data_filtered, cosine_sim_keywords, indices, recommendations):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim_keywords[idx]))

    try:
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    except:
        return recommendations

    sim_scores = sim_scores[1:101]
    movie_indices = [i[0] for i in sim_scores]

    similar_movies = movie_data_filtered.iloc[movie_indices]

    for index, row in similar_movies.iterrows():
        if row['title'] not in recommendations['title'].values:
            new_entry = {}
            for column in movie_data_filtered.columns:
                new_entry[column] = row[column]

            recommendations = pd.concat([recommendations, pd.DataFrame([new_entry], columns=recommendations.columns)], ignore_index=True, sort=False)

    return recommendations

# Function to apply user-selected filters to movie data
def apply_filters(selected_filters, recommendations, movie_data):
    for filter_name, filter_value in selected_filters:
        recommendations[filter_name] = movie_data[filter_name]

    return recommendations

# Function to remove unwanted recommendations based on selected filters
def remove_unwanted_recommendations(idx_list, recommendations, selected_filters):
    remove_idx = []

    for filter_name, filter_value in selected_filters:
        if filter_name == 'Genres':
            for i in idx_list:
                try:
                    if filter_value not in recommendations['Genres'][i]:
                        remove_idx.append(i)
                        recommendations.drop(i, inplace=True)
                except:
                    continue

            for i in remove_idx:
                idx_list.remove(i)
            remove_idx.clear()

        else:
            for i in idx_list:
                if filter_value != recommendations[filter_name][i]:
                    remove_idx.append(i)
                    recommendations.drop(i, inplace=True)

            for i in remove_idx:
                idx_list.remove(i)
            remove_idx.clear()

    return recommendations

# Function to display recommendations with interactive buttons and movie posters
def display_recommendations(recommendations):
    # Clear previous output
    clear_output(wait=True)

    # Display the number of recommendations
    num_recommendations = len(recommendations)
    message_widget = widgets.HTML(value=f"<h2>{num_recommendations} Recommendations</h2>", layout=widgets.Layout(margin='10px'))
    display(message_widget)

    # Create a dictionary to store additional information
    movie_info_dict = {}

    # Function to display additional information when a button is clicked
    def show_info(b):
        clear_output(wait=True)
        movie_info = movie_info_dict[b.description]

        # Custom HTML/CSS styling
        html_styled = f"""
        <div style="background-color: #f5f5f5; padding: 10px; border: 1px solid #ddd; text-align: center;">
            <h3 style="color: #333;">{b.description}</h3>
            <p>{movie_info}</p>
        </div>
        """
        display(HTML(html_styled))
        display(back_button)

    # Create buttons for each movie title with posters
    buttons = []
    row_buttons = []
    for i, (title, movie_id) in enumerate(zip(recommendations['title'], recommendations['id'])):

        # Function to create a button with a movie title and poster
        def create_movie_button(title, movie_id):
            button_layout = widgets.Layout(width='auto', margin='10px')

            # Fetch and display the movie poster
            api_key = '' # Use your api_key here
            poster_url = get_movie_poster_url(api_key, movie_id)
            image_widget = widgets.Image(value=requests.get(poster_url).content, format='png', layout=widgets.Layout(width='150px', height='225px'))

            # Combine the title and poster into a larger button
            button = widgets.Button(description=title, layout=button_layout)
            button_widget = widgets.VBox([image_widget, widgets.Label()], layout=widgets.Layout(align_items='center'))
            button_widget.children = [button_widget.children[0], button]

            # Assign a click event to each button
            movie_info_dict[title] = f"Genres: {recommendations.loc[recommendations['title'] == title, 'Genres'].values[0]}<br>" \
                                      f"Original Language: {recommendations.loc[recommendations['title'] == title, 'Original Language'].values[0]}<br>" \
                                      f"Release Year: {recommendations.loc[recommendations['title'] == title, 'Release Year'].values[0]}"

            button.on_click(show_info)

            return button_widget

        button = create_movie_button(title, movie_id)
        row_buttons.append(button)

        # Display buttons in rows of 5
        if (i + 1) % 5 == 0 or i == len(recommendations) - 1:
            buttons.append(widgets.HBox(row_buttons, layout=widgets.Layout(margin='10px')))
            row_buttons = []

    # Create a Back button
    def go_back(b):
        clear_output(wait=True)
        display_recommendations(recommendations)

    global back_button
    back_button = widgets.Button(description='Back', layout=widgets.Layout(width='auto', margin='10px'))
    back_button.on_click(go_back)

    # Display buttons in rows with custom styling
    display(widgets.VBox(buttons, layout=widgets.Layout(display='flex', flex_flow='column wrap')))

# Function to get the movie poster URL
def get_movie_poster_url(api_key, movie_id):

    # Get movie details
    movie_url = f'https://api.themoviedb.org/3/movie/{movie_id}?api_key={api_key}'
    movie_response = requests.get(movie_url)
    movie_data = movie_response.json()

    # Extract poster path
    try:
      poster_path = movie_data['poster_path']
    except:
      return 'https://www.prokerala.com/movies/assets/img/no-poster-available.webp'

    # Get configuration for image base URL
    config_url = f'https://api.themoviedb.org/3/configuration?api_key={api_key}'
    config_response = requests.get(config_url)
    config_data = config_response.json()
    base_url = config_data['images']['base_url']

    # Construct image URL
    full_url = f'{base_url}w185{poster_path}'

    return full_url

# Declaring selected_filters as a global variable
selected_filters = []

# Declaring recommendations as a global variable
recommendations = pd.DataFrame()

# Function to showcase an interactive interface
def interactive_interface():

    # Input box for User ID
    user_id_input = widgets.IntText(value=1, description='User ID:')
    display(user_id_input)

    def create_proceed_button():
        # Button to trigger collaborative filtering
        proceed_button = widgets.Button(description='Proceed', layout=widgets.Layout(width='auto', visibility='hidden'))
        return proceed_button

    # Output area for collaborative filtering recommendations
    cf_output = widgets.Output()
    display(cf_output)

    def create_main_filter_dropdown():
        # Dropdown for main filters
        main_filter_dropdown = widgets.Dropdown(options=['-- Choose a filter --', 'Genres', 'Original Language', 'Release Year'],
                                                description='Main Filter:', layout={'visibility': 'hidden'})
        return main_filter_dropdown

    def create_sub_filter_dropdown():
        # Dropdown for sub-filters
        sub_filter_dropdown = widgets.Dropdown(description='Sub-Filter:', layout={'visibility': 'hidden'})
        return sub_filter_dropdown

    def create_apply_button():
        # Button to apply content-based filtering
        apply_button = widgets.Button(description='Apply', layout=widgets.Layout(width='auto', visibility='hidden'))
        return apply_button

    def create_choice_dropdown():
        # Display a dropdown to ask if you wish to choose more filters
        choice_dropdown = widgets.Dropdown(options=['-- Choose a choice --', 'Yes', 'No'], description='Do you wish to choose more Filters?:', layout={'visibility': 'hidden'})
        return choice_dropdown

    def create_select_button():
        # Create a Select Button to select our choice
        select_button = widgets.Button(description='Select', layout=widgets.Layout(width='auto', visibility='hidden'))
        return select_button

    # Output area for content-based filtering recommendations
    content_output = widgets.Output()
    display(content_output)

    # Display recommendations
    recommendations_output = widgets.Output()
    display(recommendations_output)

    def on_recommendations_display():

        recommendations_output.clear_output()
        with recommendations_output:

            userId = user_id_input.value
            # Collaborative Filtering
            reader = Reader(rating_scale=(0.5, 5))
            data_cf = Dataset.load_from_df(ratings_data[['userId', 'movieId', 'rating']], reader)
            trainset, _ = train_test_split(data_cf, test_size=0.25, random_state=42)
            cf_model = collaborative_filtering(trainset)
            cf_recommendations = get_cf_recommendations(userId, cf_model, movies_metadata, ratings_data)
            print("Collaborative Filtering completed...")

            # Content-Based Filtering
            movie_data_filtered, cosine_sim_keywords, indices = content_based_recommendations(movies_metadata, movie_genre_list, movie_language_list)

            global recommendations
            for column in movie_data_filtered.columns:
                if column == 'keywords': continue
                recommendations[column] = pd.Series(dtype=object)

            # Get content-based recommendations based on collaborative filtering results
            for title in cf_recommendations:
                recommendations = get_content_recommendations(title, movie_data_filtered, cosine_sim_keywords, indices, recommendations)

            print("Content-Based Recommendations completed...")

            def choose_filters():

                proceed_button = create_proceed_button()
                main_filter_dropdown = create_main_filter_dropdown()
                sub_filter_dropdown = create_sub_filter_dropdown()
                apply_button = create_apply_button()
                choice_dropdown = create_choice_dropdown()
                select_button = create_select_button()
                display(proceed_button)
                proceed_button.layout.visibility = 'visible'

                def on_proceed_button_click(b):
                    proceed_button.layout.display = 'none'

                    display(main_filter_dropdown)
                    main_filter_dropdown.layout.visibility = 'visible'
                    display(sub_filter_dropdown)
                    return

                def on_main_filter_change(change):

                    if main_filter_dropdown.layout.visibility == 'hidden':
                        main_filter_dropdown.layout.visibility == 'visible'

                    # Disable the Sub-Filters and do not show them for '-- Choose a filter --'
                    if change['new'] == '-- Choose a filter --':
                        sub_filter_dropdown.layout.visibility = 'hidden'
                        return

                    sub_filter_dropdown.options = []  # Clear sub-filter dropdown
                    if change['new'] == 'Genres':
                        if sub_filter_dropdown.layout.visibility == 'hidden':
                            sub_filter_dropdown.layout.visibility = 'visible'
                        sub_filter_dropdown.options = movie_genre_list
                    elif change['new'] == 'Original Language':
                        if sub_filter_dropdown.layout.visibility == 'hidden':
                            sub_filter_dropdown.layout.visibility = 'visible'
                        sub_filter_dropdown.options = movie_language_list
                    elif change['new'] == 'Release Year':
                        if sub_filter_dropdown.layout.visibility == 'hidden':
                            sub_filter_dropdown.layout.visibility = 'visible'
                        sub_filter_dropdown.options = movie_year_list

                def on_sub_filter_change(change):

                    if sub_filter_dropdown.layout.visibility == 'hidden':
                        sub_filter_dropdown.layout.visibility = 'visible'

                    # Disable the "Choose" button when Sub-Filters is set to '-- Choose a Sub-Filter --'
                    if change['new'] == '-- Choose a Sub-Filter --':
                        apply_button.layout.visibility = 'hidden'
                    else:
                        display(apply_button)
                        apply_button.layout.visibility = 'visible'

                def on_apply_button_click(b):
                    global selected_filters
                    apply_button.layout.display = 'none'

                    # Hide the Main and Sub-Filter dropdowns
                    main_filter_dropdown.layout.display = 'none'
                    sub_filter_dropdown.layout.display = 'none'
                    selected_filters.append((main_filter_dropdown.value, sub_filter_dropdown.value))

                    display(choice_dropdown)
                    choice_dropdown.layout.visibility = 'visible'
                    display(select_button)

                def on_choice_change(change):

                    if choice_dropdown.layout.visibility == 'hidden':
                        choice_dropdown.layout.visibility = 'visible'

                    # Disable the "Select" button when choice is set to '-- Choose a choice --'
                    if change['new'] == '-- Choose a choice --':
                        select_button.layout.visibility = 'hidden'
                    else:
                        select_button.layout.visibility = 'visible'

                def on_select_button_click(b):
                    select_button.layout.display = 'none'

                    # Hide the Choice Dropdown
                    choice_dropdown.layout.display = 'none'

                    if choice_dropdown.value == 'Yes':
                        choose_filters()
                    else:
                        # Apply filters and get contend-based recommendations
                        global recommendations
                        recommendations = apply_filters(selected_filters, recommendations, movie_data)
                        idx_list = [i for i in range(len(recommendations['title']))]
                        recommendations = remove_unwanted_recommendations(idx_list, recommendations, selected_filters)

                        # Display final recommendations
                        display_recommendations(recommendations)

                proceed_button.on_click(on_proceed_button_click)
                main_filter_dropdown.observe(on_main_filter_change, names='value')
                sub_filter_dropdown.observe(on_sub_filter_change, names='value')
                apply_button.on_click(on_apply_button_click)
                choice_dropdown.observe(on_choice_change, names='value')
                select_button.on_click(on_select_button_click)

            choose_filters()

    # Set up event handling
    on_recommendations_display()

# Run the interactive interface
interactive_interface()

HTML(value='<h2>25 Recommendations</h2>', layout=Layout(margin='10px'))

VBox(children=(HBox(children=(VBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x0…