# Simple Movie Recommender



Movie recommendation systems are widely used in today's digital platforms to help users discover new movies based on their preferences and past viewing behavior. In this project, I'll be creating a movie recommendation system that suggests top movies to users based on different approaches.

- `Popularity-Based Recommendations`: Recommending movies based on their overall popularity or average ratings.
- `Content-Based Recommendations`: Recommending movies similar to a given movie based on their attributes, such as genres...
- `User-Based Collaborative Filtering`: Recommending movies to a user based on the preferences of similar users or user-item interactions.
- `Item-Based Collaborative Filtering`: Recommending movies to a user based on the preferences of similar items or item-user interactions.

In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
import ipywidgets as widgets
from IPython.display import display, Markdown
import re
from sklearn.feature_extraction.text import TfidfVectorizer

#### Data Reading

In [2]:
movies = pd.read_csv('Datasets/movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [3]:
ratings = pd.read_csv('Datasets/ratings.csv')
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [4]:
movies.shape

(10329, 3)

In [5]:
ratings.shape

(105339, 4)

In [6]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10329 entries, 0 to 10328
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  10329 non-null  int64 
 1   title    10329 non-null  object
 2   genres   10329 non-null  object
dtypes: int64(1), object(2)
memory usage: 242.2+ KB


In [7]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105339 entries, 0 to 105338
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype  
---  ------     --------------   -----  
 0   userId     105339 non-null  int64  
 1   movieId    105339 non-null  int64  
 2   rating     105339 non-null  float64
 3   timestamp  105339 non-null  int64  
dtypes: float64(1), int64(3)
memory usage: 3.2 MB


In [8]:
# split the genres
movies['genres'] = movies['genres'].str.split('|')
movies['genres']

0        [Adventure, Animation, Children, Comedy, Fantasy]
1                           [Adventure, Children, Fantasy]
2                                        [Comedy, Romance]
3                                 [Comedy, Drama, Romance]
4                                                 [Comedy]
                               ...                        
10324                        [Animation, Children, Comedy]
10325                                             [Comedy]
10326                                             [Comedy]
10327                                              [Drama]
10328                                 [(no genres listed)]
Name: genres, Length: 10329, dtype: object

In [9]:
#remove spaces
movies['genres'] =  movies['genres'].apply(lambda x:[i.replace(" ", "") for i in x])
movies

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]"
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]"
2,3,Grumpier Old Men (1995),"[Comedy, Romance]"
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]"
4,5,Father of the Bride Part II (1995),[Comedy]
...,...,...,...
10324,146684,Cosmic Scrat-tastrophe (2015),"[Animation, Children, Comedy]"
10325,146878,Le Grand Restaurant (1966),[Comedy]
10326,148238,A Very Murray Christmas (2015),[Comedy]
10327,148626,The Big Short (2015),[Drama]


In [10]:
#function to find the unique genres
def get_unique_genres(x):
    unique_genres = []
    for i in x:
        for j in i:
            if j not in unique_genres:
                unique_genres.append(j)
    return unique_genres

In [11]:
unique_genres = get_unique_genres(movies['genres'])
unique_genres

['Adventure',
 'Animation',
 'Children',
 'Comedy',
 'Fantasy',
 'Romance',
 'Drama',
 'Action',
 'Crime',
 'Thriller',
 'Horror',
 'Mystery',
 'Sci-Fi',
 'IMAX',
 'War',
 'Musical',
 'Documentary',
 'Western',
 'Film-Noir',
 '(nogenreslisted)']

In [12]:
# function to generate new columns for genres
def genre_cols(genre, genre_m):
    for gen in genre_m:
        if gen == genre:
            return 1
    return 0

In [13]:
genre_cols_vector = np.vectorize(genre_cols)
genre_cols_vector

<numpy.vectorize at 0x13466fa10>

In [14]:
# calling the vectorized function genre_cols_vector to add genre columns
for genre in tqdm(unique_genres):
    movies[genre] = genre_cols_vector(genre, movies['genres'])

 40%|████      | 8/20 [00:00<00:00, 79.39it/s]

100%|██████████| 20/20 [00:00<00:00, 79.71it/s]


In [15]:
movies

Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Horror,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,(nogenreslisted)
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",0,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
4,5,Father of the Bride Part II (1995),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10324,146684,Cosmic Scrat-tastrophe (2015),"[Animation, Children, Comedy]",0,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10325,146878,Le Grand Restaurant (1966),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10326,148238,A Very Murray Christmas (2015),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
10327,148626,The Big Short (2015),[Drama],0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [16]:
#check nulls
movies.isna().sum()

movieId             0
title               0
genres              0
Adventure           0
Animation           0
Children            0
Comedy              0
Fantasy             0
Romance             0
Drama               0
Action              0
Crime               0
Thriller            0
Horror              0
Mystery             0
Sci-Fi              0
IMAX                0
War                 0
Musical             0
Documentary         0
Western             0
Film-Noir           0
(nogenreslisted)    0
dtype: int64

In [17]:
movies.head(10)

Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Horror,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,(nogenreslisted)
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",0,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
4,5,Father of the Bride Part II (1995),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,6,Heat (1995),"[Action, Crime, Thriller]",0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,7,Sabrina (1995),"[Comedy, Romance]",0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
7,8,Tom and Huck (1995),"[Adventure, Children]",1,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,9,Sudden Death (1995),[Action],0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,10,GoldenEye (1995),"[Action, Adventure, Thriller]",1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [18]:
#dropping (nogenreslisted) because it is not needed
movies.drop(['(nogenreslisted)'],axis=1,inplace=True)
movies.head()

Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Thriller,Horror,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",0,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,0
4,5,Father of the Bride Part II (1995),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Popularity-Based Recommendations

In [19]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [20]:
# creating a groupby of ratings average and count by users for each movie 
rating_avg_count = ratings.groupby('movieId').agg({'rating': ['mean', 'count']})
rating_avg_count

Unnamed: 0_level_0,rating,rating
Unnamed: 0_level_1,mean,count
movieId,Unnamed: 1_level_2,Unnamed: 2_level_2
1,3.907328,232
2,3.353261,92
3,3.189655,58
4,2.818182,11
5,3.250000,62
...,...,...
146684,4.000000,1
146878,2.500000,1
148238,3.000000,1
148626,4.333333,3


In [21]:
#creating a new dataframe popularity by merging rating_avg_count and movies
popularity = pd.merge(left=movies, right=rating_avg_count, left_on='movieId', right_on=rating_avg_count.index)
popularity

  popularity = pd.merge(left=movies, right=rating_avg_count, left_on='movieId', right_on=rating_avg_count.index)


Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,"(rating, mean)","(rating, count)"
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,3.907328,232
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,3.353261,92
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,3.189655,58
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",0,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,2.818182,11
4,5,Father of the Bride Part II (1995),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,3.250000,62
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10320,146684,Cosmic Scrat-tastrophe (2015),"[Animation, Children, Comedy]",0,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,4.000000,1
10321,146878,Le Grand Restaurant (1966),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,2.500000,1
10322,148238,A Very Murray Christmas (2015),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,3.000000,1
10323,148626,The Big Short (2015),[Drama],0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,4.333333,3


In [22]:
popularity.head()

Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,"(rating, mean)","(rating, count)"
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,3.907328,232
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,3.353261,92
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,3.189655,58
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",0,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,2.818182,11
4,5,Father of the Bride Part II (1995),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,3.25,62


In [23]:
popularity.rename(columns={'title': 'Movie Title', ('rating', 'mean'): 'Average Movie Rating', ('rating', 'count'): 'Number of Reviews'}, inplace=True)
popularity.head()

Unnamed: 0,movieId,Movie Title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,Average Movie Rating,Number of Reviews
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,3.907328,232
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,3.353261,92
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,3.189655,58
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",0,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,2.818182,11
4,5,Father of the Bride Part II (1995),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,3.25,62


In [24]:
# popping (nogenreslisted) because it is not needed 
unique_genres.pop()
unique_genres

['Adventure',
 'Animation',
 'Children',
 'Comedy',
 'Fantasy',
 'Romance',
 'Drama',
 'Action',
 'Crime',
 'Thriller',
 'Horror',
 'Mystery',
 'Sci-Fi',
 'IMAX',
 'War',
 'Musical',
 'Documentary',
 'Western',
 'Film-Noir']

In [25]:
# function to generate recommendations based on the popularity of the movie
def popularity_recommender(genre, threshold, nums):
    result = popularity[popularity[genre] == 1]  # filter movies with the given genre
    result = result[result['Number of Reviews'] > threshold]  # filter movies with a minimum number of reviews
    result = result.sort_values(by='Average Movie Rating', ascending=False)[:nums]  # sort movies by average rating and select top n
    return result

In [26]:
# Function to handle button click for popularity-based recommendations
def handle_popularity(event):
    with  recommendation_list:
        recommendation_list.clear_output()
        genre = genre_dropdown.value
        threshold = review_threshold_input.value
        nums = recommendation_count_input.value
        recommendations = popularity_recommender(genre, threshold, nums)[['Movie Title', 'genres', 'Average Movie Rating', 'Number of Reviews']]  # select relevant columns
        display(recommendations) # Display the recommendations

In [27]:
!jupyter nbextension enable --py widgetsnbextension

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


In [28]:
# Widgets for popularity-based recommendations
genre_dropdown = widgets.Dropdown(
    options=unique_genres,
    description='Select genre:'
)

genre_dropdown.style.description_width = '200px'
genre_dropdown.layout.width = '400px'

review_threshold_input = widgets.IntText(
    description='Minimum reviews threshold:'
)

review_threshold_input.style.description_width = '200px'
review_threshold_input.layout.width = '400px'

recommendation_count_input = widgets.IntText(
    description='Number of recommendations:'
)

recommendation_count_input.style.description_width = '200px'
recommendation_count_input.layout.width = '400px'

recommendation_list =  widgets.Output() # Output widget to display recommendations

popularity_button = widgets.Button(description='Generate Recommendations')

popularity_button.layout.width = '200px'

popularity_button.on_click(handle_popularity)
popularity_widgets = widgets.VBox([genre_dropdown, review_threshold_input, recommendation_count_input, popularity_button])

display(Markdown("## Popularity-Based Recommendations"), popularity_widgets,  recommendation_list) # Display the widgets and the output widget


## Popularity-Based Recommendations

VBox(children=(Dropdown(description='Select genre:', layout=Layout(width='400px'), options=('Adventure', 'Anim…

Output()

### Content-based recommendations

In [29]:
# function to clean the title of the movie
def clean_title(title):
    return re.sub("[^a-zA-Z0-9 ]", "", title)

In [30]:
# apply  the clean_title function to the movie titles
movies["Clean Title"] = movies['title'].apply(clean_title)
movies.head()

Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Horror,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,Clean Title
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,Toy Story 1995
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,Jumanji 1995
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",0,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,Father of the Bride Part II 1995


In [31]:
# tfidf vectorizer for the movie titles to generate a matrix of TF-IDF values
vectorizer = TfidfVectorizer(ngram_range=(1,2)) #ngram_range -> allows the vectorizer to recognise not only single words but also two consecutive words(anagrams)

tfidf = vectorizer.fit_transform(movies['Clean Title'])
tfidf

<10329x34566 sparse matrix of type '<class 'numpy.float64'>'
	with 73826 stored elements in Compressed Sparse Row format>

#### Search engine for the movie titles

In [32]:
# function to search for similar titles based on input using cosine similarity between the TF-IDF matrix and the input title
def search_title(title):
    title = clean_title(title) # clean the input title
    query_vec = vectorizer.transform([title]) # transform the input title into a TF-IDF vector
    similarity = cosine_similarity(query_vec,tfidf).flatten() #  calculate the cosine similarity between the input title and all movie titles
    indices = similarity.argsort()[::-1][:5] # sort the similarity scores in descending order and select the top 5 indices
    results = movies.iloc[indices] # get the movie titles corresponding to the top 5 indices
    return results #[['Clean Title', 'genres']]

In [33]:
movie_input = widgets.Text(
            value="Toy Story",
            description="Movie Title:",
            disabled=False)

movie_list = widgets.Output()

def on_type(data):
    with movie_list:
        movie_list.clear_output()
        title = data["new"]
        if len(title) > 5:
            display(search_title(title)[['Clean Title', 'genres']])

movie_input.observe(on_type, names='value')

display(Markdown("## Search Engine"), movie_input,movie_list)

## Search Engine

Text(value='Toy Story', description='Movie Title:')

Output()

In [34]:
result = search_title("The Toy Story")
result

Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Horror,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,Clean Title
8599,78499,Toy Story 3 (2010),"[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,1,0,0,...,0,0,0,1,0,0,0,0,0,Toy Story 3 2010
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,Toy Story 1995
2496,3114,Toy Story 2 (1999),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,Toy Story 2 1999
3838,4929,"Toy, The (1982)",[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,Toy The 1982
4403,5843,Toy Soldiers (1991),"[Action, Drama]",0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,Toy Soldiers 1991


In [35]:
result.index[0]

8599

In [34]:
# find cosine similarity of the movie's genres
movies_similarity = cosine_similarity(movies.drop(['title','movieId','genres','Clean Title'],axis=1))
movies_similarity

array([[1.        , 0.77459667, 0.31622777, ..., 0.4472136 , 0.        ,
        0.        ],
       [0.77459667, 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.31622777, 0.        , 1.        , ..., 0.70710678, 0.        ,
        0.        ],
       ...,
       [0.4472136 , 0.        , 0.70710678, ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [37]:
index = movies.index[movies['title'] == 'Grumpier Old Men (1995)'][0]
index

2

In [38]:
index = movies.iloc[8599]
index

movieId                                                    78499
title                                         Toy Story 3 (2010)
genres         [Adventure, Animation, Children, Comedy, Fanta...
Adventure                                                      1
Animation                                                      1
Children                                                       1
Comedy                                                         1
Fantasy                                                        1
Romance                                                        0
Drama                                                          0
Action                                                         0
Crime                                                          0
Thriller                                                       0
Horror                                                         0
Mystery                                                        0
Sci-Fi                   

In [39]:
# determining 10 most similar movies indices to Grumpier Old Men (1995) using cosine similarity of movie's genres
similar_movies_indices = movies_similarity[8599].argsort()[::-1][:10]
similar_movies_indices

array([8599, 7091, 8606, 6521, 9037, 7382, 6414, 9524, 5884, 2496])

In [40]:
similar_movies_scores = movies_similarity[8599][similar_movies_indices]
similar_movies_scores

array([1.        , 1.        , 1.        , 0.9258201 , 0.91287093,
       0.91287093, 0.91287093, 0.91287093, 0.91287093, 0.91287093])

In [41]:
# getting the movie titles from the indices
similar_movies = movies.iloc[similar_movies_indices]
similar_movies

Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Horror,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,Clean Title
8599,78499,Toy Story 3 (2010),"[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,1,0,0,...,0,0,0,1,0,0,0,0,0,Toy Story 3 2010
7091,47124,"Ant Bully, The (2006)","[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,1,0,0,...,0,0,0,1,0,0,0,0,0,Ant Bully The 2006
8606,78637,Shrek Forever After (a.k.a. Shrek: The Final C...,"[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,1,0,0,...,0,0,0,1,0,0,0,0,0,Shrek Forever After aka Shrek The Final Chapte...
6521,32031,Robots (2005),"[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,1,0,0,...,0,0,1,1,0,0,0,0,0,Robots 2005
9037,87876,Cars 2 (2011),"[Adventure, Animation, Children, Comedy, IMAX]",1,1,1,1,0,0,0,...,0,0,0,1,0,0,0,0,0,Cars 2 2011
7382,53121,Shrek the Third (2007),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,Shrek the Third 2007
6414,30793,Charlie and the Chocolate Factory (2005),"[Adventure, Children, Comedy, Fantasy, IMAX]",1,0,1,1,1,0,0,...,0,0,0,1,0,0,0,0,0,Charlie and the Chocolate Factory 2005
9524,98243,Rise of the Guardians (2012),"[Adventure, Animation, Children, Fantasy, IMAX]",1,1,1,0,1,0,0,...,0,0,0,1,0,0,0,0,0,Rise of the Guardians 2012
5884,8965,"Polar Express, The (2004)","[Adventure, Animation, Children, Fantasy, IMAX]",1,1,1,0,1,0,0,...,0,0,0,1,0,0,0,0,0,Polar Express The 2004
2496,3114,Toy Story 2 (1999),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,Toy Story 2 1999


In [42]:
similar_movies["score"] = similar_movies_scores
similar_movies

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  similar_movies["score"] = similar_movies_scores


Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,Clean Title,score
8599,78499,Toy Story 3 (2010),"[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,1,0,0,...,0,0,1,0,0,0,0,0,Toy Story 3 2010,1.0
7091,47124,"Ant Bully, The (2006)","[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,1,0,0,...,0,0,1,0,0,0,0,0,Ant Bully The 2006,1.0
8606,78637,Shrek Forever After (a.k.a. Shrek: The Final C...,"[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,1,0,0,...,0,0,1,0,0,0,0,0,Shrek Forever After aka Shrek The Final Chapte...,1.0
6521,32031,Robots (2005),"[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,1,0,0,...,0,1,1,0,0,0,0,0,Robots 2005,0.92582
9037,87876,Cars 2 (2011),"[Adventure, Animation, Children, Comedy, IMAX]",1,1,1,1,0,0,0,...,0,0,1,0,0,0,0,0,Cars 2 2011,0.912871
7382,53121,Shrek the Third (2007),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,Shrek the Third 2007,0.912871
6414,30793,Charlie and the Chocolate Factory (2005),"[Adventure, Children, Comedy, Fantasy, IMAX]",1,0,1,1,1,0,0,...,0,0,1,0,0,0,0,0,Charlie and the Chocolate Factory 2005,0.912871
9524,98243,Rise of the Guardians (2012),"[Adventure, Animation, Children, Fantasy, IMAX]",1,1,1,0,1,0,0,...,0,0,1,0,0,0,0,0,Rise of the Guardians 2012,0.912871
5884,8965,"Polar Express, The (2004)","[Adventure, Animation, Children, Fantasy, IMAX]",1,1,1,0,1,0,0,...,0,0,1,0,0,0,0,0,Polar Express The 2004,0.912871
2496,3114,Toy Story 2 (1999),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,Toy Story 2 1999,0.912871


In [43]:
movies.head()

Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Horror,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,Clean Title
0,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,Toy Story 1995
1,2,Jumanji (1995),"[Adventure, Children, Fantasy]",1,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,Jumanji 1995
2,3,Grumpier Old Men (1995),"[Comedy, Romance]",0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,Grumpier Old Men 1995
3,4,Waiting to Exhale (1995),"[Comedy, Drama, Romance]",0,0,0,1,0,1,1,...,0,0,0,0,0,0,0,0,0,Waiting to Exhale 1995
4,5,Father of the Bride Part II (1995),[Comedy],0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,Father of the Bride Part II 1995


In [35]:
# function to generate recommendations based on the similarity of the movie's genres
def content_recommender(movie_id, nums):
    similar_movies_indices = movies_similarity[movie_id].argsort()[::-1][:nums] # nums most similar movies indices to the input movie
    similar_movies = movies.loc[similar_movies_indices] # getting the movie titles from the indices
    similar_movies["score"] = movies_similarity[movie_id][similar_movies_indices] # getting the movie scores from the indices
    return similar_movies[['Clean Title', 'genres', 'score']]

In [36]:
content_recommender(7296, 5)

Unnamed: 0,Clean Title,genres,score
1460,Godzilla 1998,"[Action, Sci-Fi, Thriller]",1.0
7018,XMen The Last Stand 2006,"[Action, Sci-Fi, Thriller]",1.0
590,Solo 1996,"[Action, Sci-Fi, Thriller]",1.0
61,Lawnmower Man 2 Beyond Cyberspace 1996,"[Action, Sci-Fi, Thriller]",1.0
4824,Eve of Destruction 1991,"[Action, Sci-Fi, Thriller]",1.0


In [37]:
# Function to handle button click for content-based recommendations
def handle_content(event):
    with  recommendation_list:
        recommendation_list.clear_output()
        title = movie_title_input.value
        nums = recommendation_count_input.value
        results = search_title(title)
        movie_id = results.index[0] # get the movie id of the first result
        
        recommendations = content_recommender(movie_id, nums) # get the recommendations for the movie
        display(recommendations) # Display the recommendations

In [38]:
# Widgets for content-based recommendations
movie_title_input = widgets.Text(
    description='Enter movie title:'
)

recommendation_list  =  widgets.Output() # Output widget to display recommendations

movie_title_input.style.description_width = '200px'
movie_title_input.layout.width = '400px'

recommendation_count_input = widgets.IntText(
    description='Number of recommendations:'
)

recommendation_count_input.style.description_width = '200px'
recommendation_count_input.layout.width = '400px'

content_button = widgets.Button(description='Generate Recommendations')
content_button.layout.width = '200px'
content_button.on_click(handle_content)
content_widgets = widgets.VBox([movie_title_input, recommendation_count_input, content_button])

display(Markdown("## Content-Based Recommendations"), content_widgets,  recommendation_list) # Display the widgets and the output widget

## Content-Based Recommendations

VBox(children=(Text(value='', description='Enter movie title:', layout=Layout(width='400px'), style=TextStyle(…

Output()

### User-based Collaborative Filtering

In [313]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [444]:
ratings.shape

(105339, 4)

In [445]:
# pivot table to create a matrix of user ratings
movie_mat = ratings.pivot_table(index='userId', columns='movieId', values='rating')
movie_mat

movieId,1,2,3,4,5,6,7,8,9,10,...,144482,144656,144976,146344,146656,146684,146878,148238,148626,149532
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,5.0,,2.0,,3.0,,,,,,...,,,,,,,,,,
3,,,,,3.0,,3.0,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664,,,,,,,,,,,...,,,,,,,,,,
665,,,,,,,,,,,...,,,,,,,,,,
666,,,,,,,,,,,...,,,,,,,,,,
667,,,,,,,,,,,...,,,,,,,,,,


In [446]:
movie_mat.index

Int64Index([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,
            ...
            659, 660, 661, 662, 663, 664, 665, 666, 667, 668],
           dtype='int64', name='userId', length=668)

In [447]:
# fill na with 0
movie_mat.fillna(0, inplace=True)

In [448]:
# find cosine similarity of the users using the matrix of user ratings
user_similarity = cosine_similarity(movie_mat)
user_similarity

array([[1.        , 0.10111327, 0.21004361, ..., 0.2386603 , 0.27821676,
        0.15347851],
       [0.10111327, 1.        , 0.11555911, ..., 0.05142323, 0.03590744,
        0.06481608],
       [0.21004361, 0.11555911, 1.        , ..., 0.08094014, 0.15894346,
        0.10964798],
       ...,
       [0.2386603 , 0.05142323, 0.08094014, ..., 1.        , 0.12325229,
        0.15836814],
       [0.27821676, 0.03590744, 0.15894346, ..., 0.12325229, 1.        ,
        0.11095479],
       [0.15347851, 0.06481608, 0.10964798, ..., 0.15836814, 0.11095479,
        1.        ]])

In [449]:
similar_user = user_similarity[100].argsort()[::-1][1:50+1]
similar_user

array([194, 361,   6,  56, 573, 609, 475, 436, 556, 347, 269, 503,   0,
       589, 492, 564, 414, 157,  78, 261, 386, 590, 252, 585, 260, 568,
       293, 327, 498, 240, 357, 402, 437, 527, 277, 285, 627, 566, 404,
       440, 411, 401, 289, 118, 168, 448, 163, 223, 536,  68])

In [450]:
len(similar_user)

50

In [451]:
similar_user_ratings = movie_mat.iloc[similar_user, :]
similar_user_ratings

movieId,1,2,3,4,5,6,7,8,9,10,...,144482,144656,144976,146344,146656,146684,146878,148238,148626,149532
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
195,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
362,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
57,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
574,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
610,0.0,0.0,0.0,0.0,0.0,3.5,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
476,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
437,2.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
557,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
348,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [452]:
check = popularity.index[popularity['Number of Reviews'] > 100].tolist()

In [453]:
movie_mat.iloc[similar_user, check]

movieId,1,6,10,21,32,34,47,50,95,110,...,5445,5952,6377,6539,6874,7153,8961,33794,58559,79132
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
195,4.0,0.0,4.5,0.0,4.5,0.0,3.5,4.0,0.0,4.5,...,4.5,4.0,4.0,0.0,4.0,4.0,3.0,0.0,4.5,0.0
362,0.0,0.0,0.0,0.0,4.0,0.0,4.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,3.5,0.0,3.5,3.0,0.0
7,0.0,0.0,4.0,0.0,5.0,0.0,5.0,0.0,0.0,0.0,...,0.0,3.0,0.0,2.5,2.5,3.0,0.0,3.5,5.0,2.0
57,0.0,0.0,0.0,0.0,0.0,0.0,4.5,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,4.5
574,3.0,0.0,0.0,0.0,3.5,0.0,4.0,3.5,0.0,3.5,...,4.0,3.5,0.0,4.0,4.5,3.0,4.0,3.5,4.0,4.0
610,0.0,3.5,0.0,0.0,0.0,0.0,4.5,4.5,0.0,4.0,...,0.0,4.5,0.0,0.0,0.0,4.5,0.0,4.0,5.0,5.0
476,0.0,0.0,0.0,0.0,0.0,0.0,4.5,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.5,0.0
437,2.5,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,5.0
557,0.0,0.0,0.0,0.0,3.5,0.0,3.5,4.0,0.0,0.0,...,0.0,4.0,0.0,0.0,1.5,4.0,0.0,4.0,4.0,0.0
348,0.0,0.0,0.0,0.0,3.5,0.0,3.0,3.5,0.0,3.5,...,4.0,4.0,3.0,3.5,4.0,4.0,4.0,4.5,4.5,3.5


In [454]:
weighted_avg_rating = similar_user_ratings.mean(axis=0)
weighted_avg_rating

movieId
1         2.03
2         0.61
3         0.00
4         0.00
5         0.06
          ... 
146684    0.00
146878    0.00
148238    0.00
148626    0.00
149532    0.00
Length: 10325, dtype: float64

In [455]:
weighted_avg_rating.sort_values(ascending=False)[:5]

movieId
2571    4.11
296     4.07
2959    3.87
318     3.78
356     3.64
dtype: float64

In [456]:
top_n_indices = weighted_avg_rating.sort_values(ascending=False)[:5].index.tolist()

In [457]:
average_mov_rating = similar_user_ratings.mean(axis=0)
average_mov_rating[top_n_indices].tolist()

[4.11, 4.07, 3.87, 3.78, 3.64]

In [458]:
movies.loc[top_n_indices]

Unnamed: 0,movieId,title,genres,Adventure,Animation,Children,Comedy,Fantasy,Romance,Drama,...,Horror,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,Clean Title
2571,3217,"Star Is Born, A (1937)",[Drama],0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,Star Is Born A 1937
296,336,"Walking Dead, The (1995)","[Drama, War]",0,0,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,Walking Dead The 1995
2959,3742,Battleship Potemkin (1925),"[Drama, War]",0,0,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,Battleship Potemkin 1925
318,358,Higher Learning (1995),[Drama],0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,Higher Learning 1995
356,407,In the Mouth of Madness (1995),"[Horror, Thriller]",0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,In the Mouth of Madness 1995


In [459]:
# function to generate recommendations based on the similarity of the users calculted by their movie ratings
def user_collaborative_recommender(user_index,top_n):
    similar_user = user_similarity[user_index].argsort()[::-1][1:200+1] # get the indices of the top 200 similar users
    min_num_reviews = popularity.index[popularity['Number of Reviews'] > 100].tolist() # get the indices of movies with more than 100 reviews
    similar_user_ratings = movie_mat.iloc[similar_user, min_num_reviews] # get the ratings of the top 200 similar users for the movies with more than 100 reviews
    average_mov_rating = similar_user_ratings.mean(axis=0) # calculate the average rating of the movies by the 200 similar users
    top_n_indices = average_mov_rating.sort_values(ascending=False)[:top_n].index.tolist()  # get the indices of the top n movies with the highest average rating
    recommendations =  movies[['Clean Title', 'genres']].loc[top_n_indices]  # get the titles and genres of the top n movies with the highest average rating
    recommendations['Avg Rating'] = average_mov_rating[top_n_indices].tolist()  # add the average rating of the movies to the recommendations
    return recommendations

In [460]:
def  handle_user_collaborative(event):
    with  user_collaborative_list:
        user_collaborative_list.clear_output()
        user_index = int(user_index_input.value)
        top_n = int(top_n_input.value)
        recommendations = user_collaborative_recommender(user_index, top_n)
        display(recommendations)

user_collaborative_list = widgets.Output()    
        
user_index_input = widgets.IntText(
    description='User Index:'
)

user_index_input.style.description_width = '200px'
user_index_input.layout.width = '400px'

top_n_input = widgets.IntText(
    description='Number of Recommendations:'
)

top_n_input.style.description_width = '200px'
top_n_input.layout.width = '400px'

collaborative_button = widgets.Button(description='Generate Recommendations')
collaborative_button.layout.width = '200px'
collaborative_button.on_click(handle_user_collaborative)

user_collaborative_widgets = widgets.VBox([user_index_input, top_n_input, collaborative_button])

display(Markdown('## User-Based Collaborative Recommendations'), user_collaborative_widgets, user_collaborative_list)
#display( user_collaborative_widgets,  user_collaborative_list) # Display the widgets and the output widget

## User-Based Collaborative Recommendations

VBox(children=(IntText(value=0, description='User Index:', layout=Layout(width='400px'), style=DescriptionStyl…

Output()

### Item Based Collaborative Filtering

Going for item based collaborative filtering as it is a better approach as compared to user based collaborative filtering when it comes putting the input as we need to put in the user id in case of user-based whereas for item based collaborative filtering we can put in the movie title in the search engine and get our recommendations.

In [39]:
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [40]:
movie_id = 1 #toy story

In [41]:
# find similar users who watched the input movie and rated it higher than 4
similar_users_new = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()
similar_users_new

array([  2,   8,  17,  30,  38,  71,  72,  88,  96, 108, 109, 116, 122,
       147, 151, 156, 158, 165, 171, 187, 198, 213, 224, 278, 282, 286,
       289, 299, 303, 328, 335, 339, 347, 350, 387, 393, 399, 405, 409,
       432, 439, 440, 454, 455, 460, 462, 471, 484, 511, 531, 552, 555,
       560, 561, 571, 572, 575, 580, 589, 597, 627, 632, 637, 648, 650,
       662])

In [42]:
# find the other movies that the simialr users have watched and rated it higher than 4
similar_users_recs = ratings[(ratings["userId"].isin(similar_users_new)) & (ratings["rating"] > 4)]["movieId"]
similar_users_recs

113         1
117        17
120        36
123        62
130       608
         ... 
99024    2747
99027    2804
99028    2819
99029    2918
99033    3168
Name: movieId, Length: 5263, dtype: int64

In [43]:
similar_users_recs.value_counts()

1        66
260      31
318      30
1210     29
1198     28
         ..
27727     1
46970     1
50011     1
6586      1
2067      1
Name: movieId, Length: 1861, dtype: int64

In [44]:
similar_users_recs = similar_users_recs.value_counts() / len(similar_users_new)  #percentage of similar users that watched the movies

similar_users_recs = similar_users_recs[similar_users_recs > 0.10]  #getting the movies that were watched by more than 10% of the similar_users

similar_users_recs  #these are the movies that users similar to the user that watched the input movie has watched

1       1.000000
260     0.469697
318     0.454545
1210    0.439394
1198    0.424242
          ...   
1954    0.106061
17      0.106061
2542    0.106061
1259    0.106061
2174    0.106061
Name: movieId, Length: 166, dtype: float64

In [45]:
# in order to recommend movies to the user, we need to also find how much all the users have rated the movies that similar users have watched
all_users = ratings[(ratings["movieId"].isin(similar_users_recs.index)) & (ratings["rating"] > 4)]
all_users

Unnamed: 0,userId,movieId,rating,timestamp
12,1,260,4.5,1217895864
23,1,527,4.5,1217896341
27,1,593,5.0,1217895932
35,1,858,5.0,1217896428
36,1,912,5.0,1217897623
...,...,...,...,...
100354,668,1617,4.5,1134431064
100671,668,2396,4.5,1137826734
101315,668,3996,5.0,1215019197
103470,668,48516,5.0,1173424620


In [46]:
# find the percentage of all users that have watched the same movies as similar users and rated it higher than 4
all_users_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique())
all_users_recs

318     0.315705
296     0.266026
2571    0.245192
356     0.240385
527     0.224359
          ...   
2174    0.025641
1285    0.024038
2355    0.022436
736     0.020833
3033    0.017628
Name: movieId, Length: 166, dtype: float64

In [47]:
# concat the percenatges of users of similar users and all users that have watched the movies
rec_percentages = pd.concat([similar_users_recs, all_users_recs], axis=1)
rec_percentages.columns = ["similar", "all"]
rec_percentages

Unnamed: 0,similar,all
1,1.000000,0.105769
260,0.469697,0.216346
318,0.454545,0.315705
1210,0.439394,0.145833
1198,0.424242,0.165064
...,...,...
1954,0.106061,0.035256
17,0.106061,0.051282
2542,0.106061,0.044872
1259,0.106061,0.060897


we want movies that have a big difference between similar and all -> we don't want movies that are generally liked by everyone i.e `all` but we need movies that are specifically liked by `similar users` more..

In [48]:
# finding the ratio of similar and all percentages
rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
rec_percentages

Unnamed: 0,similar,all,score
1,1.000000,0.105769,9.454545
260,0.469697,0.216346,2.171044
318,0.454545,0.315705,1.439778
1210,0.439394,0.145833,3.012987
1198,0.424242,0.165064,2.570168
...,...,...,...
1954,0.106061,0.035256,3.008264
17,0.106061,0.051282,2.068182
2542,0.106061,0.044872,2.363636
1259,0.106061,0.060897,1.741627


In [49]:
# sort teh ratio(score) in descending order
rec_percentages = rec_percentages.sort_values("score", ascending=False)
rec_percentages

Unnamed: 0,similar,all,score
1,1.000000,0.105769,9.454545
2355,0.136364,0.022436,6.077922
3033,0.106061,0.017628,6.016529
1223,0.181818,0.030449,5.971292
3114,0.303030,0.051282,5.909091
...,...,...,...
296,0.378788,0.266026,1.423877
6874,0.106061,0.075321,1.408124
4226,0.136364,0.097756,1.394933
2959,0.242424,0.179487,1.350649


In [50]:
rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")

Unnamed: 0,similar,all,score,movieId,title,genres,Adventure,Animation,Children,Comedy,...,Horror,Mystery,Sci-Fi,IMAX,War,Musical,Documentary,Western,Film-Noir,Clean Title
0,1.0,0.105769,9.454545,1,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,...,0,0,0,0,0,0,0,0,0,Toy Story 1995
1866,0.136364,0.022436,6.077922,2355,"Bug's Life, A (1998)","[Adventure, Animation, Children, Comedy]",1,1,1,1,...,0,0,0,0,0,0,0,0,0,Bugs Life A 1998
2423,0.106061,0.017628,6.016529,3033,Spaceballs (1987),"[Comedy, Sci-Fi]",0,0,0,1,...,0,0,1,0,0,0,0,0,0,Spaceballs 1987
984,0.181818,0.030449,5.971292,1223,"Grand Day Out with Wallace and Gromit, A (1989)","[Adventure, Animation, Children, Comedy, Sci-Fi]",1,1,1,1,...,0,0,1,0,0,0,0,0,0,Grand Day Out with Wallace and Gromit A 1989
2496,0.30303,0.051282,5.909091,3114,Toy Story 2 (1999),"[Adventure, Animation, Children, Comedy, Fantasy]",1,1,1,1,...,0,0,0,0,0,0,0,0,0,Toy Story 2 1999
1225,0.166667,0.030449,5.473684,1517,Austin Powers: International Man of Mystery (1...,"[Action, Adventure, Comedy]",1,0,0,1,...,0,0,0,0,0,0,0,0,0,Austin Powers International Man of Mystery 1997
626,0.227273,0.041667,5.454545,745,Wallace & Gromit: A Close Shave (1995),"[Animation, Children, Comedy]",0,1,1,1,...,0,0,0,0,0,0,0,0,0,Wallace Gromit A Close Shave 1995
621,0.106061,0.020833,5.090909,736,Twister (1996),"[Action, Adventure, Romance, Thriller]",1,0,0,0,...,0,0,0,0,0,0,0,0,0,Twister 1996
8599,0.151515,0.030449,4.976077,78499,Toy Story 3 (2010),"[Adventure, Animation, Children, Comedy, Fanta...",1,1,1,1,...,0,0,0,1,0,0,0,0,0,Toy Story 3 2010
1143,0.121212,0.025641,4.727273,1407,Scream (1996),"[Comedy, Horror, Mystery, Thriller]",0,0,0,1,...,1,1,0,0,0,0,0,0,0,Scream 1996


In [54]:
def item_collaborative_recommender(movie_id, nums):
    similar_users_new = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()  # find similar users who watched the input movie and rated it higher than 4
    similar_users_recs = ratings[(ratings["userId"].isin(similar_users_new)) & (ratings["rating"] > 4)]["movieId"]  # find the other movies that the simialr users have watched and rated it higher than 4
    
    similar_users_recs = similar_users_recs.value_counts() / len(similar_users_new)  #percentage of similar users that watched the movies
    similar_users_recs = similar_users_recs[similar_users_recs > 0.10] #getting the movies that were watched by more than 10% of the similar_users
    
    all_users = ratings[(ratings["movieId"].isin(similar_users_recs.index)) & (ratings["rating"] > 4)] # find the percentage of all users that have watched the same movies as similar users and rated it higher than 4
    all_users_recs = all_users["movieId"].value_counts() / len(all_users["userId"].unique()) # find the percentage of all users that have watched the same movies as similar users and rated it higher than 4
    
    rec_percentages = pd.concat([similar_users_recs, all_users_recs], axis=1)  # concat the percenatges of users of similar users and all users that have watched the movies
    rec_percentages.columns = ["similar", "all"]  # rename the columns
    
    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]  # finding the ratio of similar and all percentages
    
    rec_percentages = rec_percentages.sort_values("score", ascending=False)  # sort the ratio(score) in descending order
    
    return rec_percentages.head(nums).merge(movies, left_index=True, right_on="movieId")[["title","genres","score"]]

In [55]:
item_collaborative_recommender(1, 10)

Unnamed: 0,title,genres,score
0,Toy Story (1995),"[Adventure, Animation, Children, Comedy, Fantasy]",9.454545
1866,"Bug's Life, A (1998)","[Adventure, Animation, Children, Comedy]",6.077922
2423,Spaceballs (1987),"[Comedy, Sci-Fi]",6.016529
984,"Grand Day Out with Wallace and Gromit, A (1989)","[Adventure, Animation, Children, Comedy, Sci-Fi]",5.971292
2496,Toy Story 2 (1999),"[Adventure, Animation, Children, Comedy, Fantasy]",5.909091
1225,Austin Powers: International Man of Mystery (1...,"[Action, Adventure, Comedy]",5.473684
626,Wallace & Gromit: A Close Shave (1995),"[Animation, Children, Comedy]",5.454545
621,Twister (1996),"[Action, Adventure, Romance, Thriller]",5.090909
8599,Toy Story 3 (2010),"[Adventure, Animation, Children, Comedy, Fanta...",4.976077
1143,Scream (1996),"[Comedy, Horror, Mystery, Thriller]",4.727273


In [53]:
def handle_item_collaborative(event):
    with recommendation_list:
        recommendation_list.clear_output()
        title = movie_input.value
        results = search_title(title)
        movie_id = results.iloc[0]["movieId"]
        nums = recommendation_num_input.value
        display(item_collaborative_recommender(movie_id, nums))

movie_input = widgets.Text(
    description='Movie Title:',
    disabled=False
)

recommendation_list  = widgets.Output()

movie_input.style.description_width = '200px'
movie_input.layout.width = '400px'

recommendation_num_input = widgets.IntText(
    description='Number of Recommendations:'
)

recommendation_num_input.style.description_width = '200px'
recommendation_num_input.layout.width = '400px'

item_collaborative_button = widgets.Button(description='Generate Recommendations')
item_collaborative_button.layout.width = '200px'
item_collaborative_button.on_click(handle_item_collaborative)

item_collaborative_widgets = widgets.VBox([movie_input, recommendation_num_input, item_collaborative_button])

display(Markdown('## Item-Based Collaborative Recommendations'), item_collaborative_widgets, recommendation_list)

## Item-Based Collaborative Recommendations

VBox(children=(Text(value='', description='Movie Title:', layout=Layout(width='400px'), style=TextStyle(descri…

Output()