In [53]:
import pandas as pd

movies = pd.read_csv("movies.csv")

In [54]:
#movies

In [55]:
import re

def clean_title(title):
    return re.sub("{^a-zA-Z0-0 }", "", title)

In [56]:
movies["clean_title"] = movies["title"].apply(clean_title)

In [57]:
#movies

In [58]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(movies["clean_title"])

In [59]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

def search(title):
    title = clean_title(title)
    query_vec = vectorizer.transform([title])
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    indices = np.argpartition(similarity, -5)[-5:]
    results = movies.iloc[indices][::-1]
    return results

In [82]:
import ipywidgets as widgets
from IPython.display import display

# movie_input = widgets.Text(
#     value="Toy Story",
#     description="Movie Title:",
#     disabled=False
# )
# movie_list = widgets.Output()

# def on_type(data):
#     with movie_list:
#         movie_list.clear_output()
#         title = data["new"]
#         if len(title) > 3:
#             display(search(title))

# movie_input.observe(on_type, names='value')

#display(movie_input, movie_list)

In [83]:
movie_id = 89745
movie = movies[movies["movieId"] == movie_id]

In [84]:
ratings = pd.read_csv("ratings.csv")

In [85]:
#ratings.dtypes

In [86]:
similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] >=4)]["userId"].unique()

In [87]:
similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] >4)]["movieId"]

In [88]:
similar_user_recs = similar_user_recs.value_counts() / len(similar_users)

similar_user_recs = similar_user_recs[similar_user_recs > .1]

In [89]:
#similar_user_recs

In [90]:
#similar_user_recs.value_counts()

In [91]:
all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]

In [92]:
all_users_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())

In [93]:
#all_users_recs

In [94]:
rec_percentages = pd.concat([similar_user_recs, all_users_recs], axis=1) 
rec_percentages.columns = ["similar", "all"]

In [95]:
#rec_percentages

In [96]:
rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]

In [97]:
rec_percentages = rec_percentages.sort_values("score", ascending=False)

In [98]:
#rec_percentages

In [99]:
#rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")

In [110]:
def find_similar_movies(movie_id):
    #Finding rec from similar users
    similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] >=4)]["userId"].unique()
    similar_user_recs = ratings[(ratings["userId"].isin(similar_users)) & (ratings["rating"] >4)]["movieId"]
    
    #Only when over 10% of users recommended
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)
    similar_user_recs = similar_user_recs[similar_user_recs > .1]

    #How common recs were between all of the users
    all_users = ratings[(ratings["movieId"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]
    all_users_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())

    rec_percentages = pd.concat([similar_user_recs, all_users_recs], axis=1) 
    rec_percentages.columns = ["similar", "all"]
    
    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    
    
    rec_percentages = rec_percentages.sort_values("score", ascending=False)
    rec_percentages["score"] = rec_percentages["score"].round(0).astype(int)

    return rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]

In [111]:
movie_name_input = widgets.Text(
    value="",
    description="Please Enter A Movie Title:",
    disabled=False
)

recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        title = data["new"]
        if len(title) > 3:
            results = search(title)
            movie_id = results.iloc[0]["movieId"]
            display(find_similar_movies(movie_id))

movie_name_input.observe(on_type, names="value")

ui = widgets.VBox([
    widgets.HTML("<h2>Movie Recommender</h2>"),
    movie_name_input,
    recommendation_list
], 
layout=widgets.Layout(align_items="center")                 
)

display(ui)

VBox(children=(HTML(value='<h2>Movie Recommender</h2>'), Text(value='', description='Please Enter A Movie Titl…