In [7]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import ipywidgets as widgets
from IPython.display import display

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

#Reading data
ratings = pd.read_csv('ratings.csv')

movies = pd.read_csv("movies.csv", low_memory=False) #Low memory false instructs Pandas to read the full file into memory at once

# Initialize TF-IDF vectorizer
vectorizer = TfidfVectorizer(ngram_range=(1,2))
tfidf = vectorizer.fit_transform(movies['title'])

def search(title):
    """
    Function to find similar movies based on a given movie title.

    Args:
        title (str): The input movie title.

    Returns:
        pandas DataFrame: DataFrame containing information about the top 5 most similar movies.
    """
    query_vec = vectorizer.transform([title]) #converts the movie title into a TF-IDF vector
    similarity = cosine_similarity(query_vec, tfidf).flatten() #calculates the cosine similarity between the vector above and all movie vectors
    indices = np.argpartition(similarity, -5)[-5:] #Gets the IDs of top 5 most similar movies
    results = movies.iloc[indices][::-1] #retrieved the details of the moveis based on the details above
    return results

def find_similar_movies(movie_id):
    """
    Function to find movies similar to a given movie ID based on collaborative filtering.

    Args:
        movie_id (int): The ID of the movie.

    Returns:
        pandas DataFrame: DataFrame containing information about recommended movies.
    """
    # Find users who rated the given movie highly
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] >4)]["userId"].unique()
    similar_users_recomendations = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] > 4)]["movieId"]

    # Calculate the percentage of similar users' recommendations
    similar_users_recomendations = similar_users_recomendations.value_counts() / len(similar_users)
    similar_users_recomendations = similar_users_recomendations[similar_users_recomendations > .1]

    # Get all users' recommendations for the same movies
    all_users = ratings [(ratings["movieId"].isin(similar_users_recomendations.index)) & (ratings["rating"] > 4)]
    all_users_recomendations = all_users["movieId"].value_counts() / len(all_users["userId"].unique())

    # Combine and calculate recommendation scores
    recomendation_percentages = pd.concat([similar_users_recomendations, all_users_recomendations], axis = 1)
    recomendation_percentages.columns = ["similar", "all"]
    recomendation_percentages["score"] = recomendation_percentages["similar"] / recomendation_percentages["all"]
    recomendation_percentages = recomendation_percentages.sort_values("score", ascending = False)

    # Merge with movie metadata and return top 10 recommended movies
    return recomendation_percentages.head(10).merge(movies, left_on = "movieId", right_on = "movieId")[["score","title","genres"]]


movie_name_input = widgets.Text(
    value="Toy Story",
    description="Movie:",
    disable=False
)

recommendation_list = widgets.Output()

#Function handles input event
def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        if len(title) > 3:
            results = search(title)
            movie_id = results.iloc[0]["movieId"]
            display(find_similar_movies(movie_id))


movie_name_input.observe(on_type, names="value")

display(movie_name_input, recommendation_list)



Text(value='Toy Story', description='Movie:')

Output()