## **Popularity-based Recommendations**

**This model recommends movies at the genre level based on their popularity.**


It takes the following input from the user and then provides suitable movie recommendations:
- Genre
- Minimum threshold reviews
- Number of recommendations

In [1]:
#importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors
from flask import Flask, request, render_template
from sklearn.metrics.pairwise import cosine_similarity
import re

In [2]:
#loading the movies dataset
movies = pd.read_csv("movie_prep.csv")
movies.head()

Unnamed: 0.1,Unnamed: 0,movieId,title,genres
0,0,1,Toy Story (1995),adventure animation children comedy fantasy
1,1,2,Jumanji (1995),adventure children fantasy
2,2,3,Grumpier Old Men (1995),comedy romance
3,3,4,Waiting to Exhale (1995),comedy drama romance
4,4,5,Father of the Bride Part II (1995),comedy


In [3]:
#loading the ratings dataset
ratings = pd.read_csv("ratings.csv")
ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [4]:
#merging the movies and ratings datasets according to the movieid column
movie_ratings = pd.merge(movies, ratings, on='movieId')
movie_ratings.head()

Unnamed: 0.1,Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,0,1,Toy Story (1995),adventure animation children comedy fantasy,2,5.0,859046895
1,0,1,Toy Story (1995),adventure animation children comedy fantasy,5,4.0,1303501039
2,0,1,Toy Story (1995),adventure animation children comedy fantasy,8,5.0,858610933
3,0,1,Toy Story (1995),adventure animation children comedy fantasy,11,4.0,850815810
4,0,1,Toy Story (1995),adventure animation children comedy fantasy,14,4.0,851766286


In [5]:
#for widgets
from difflib import get_close_matches
import ipywidgets as widgets

#defining widgets for genre, minimum rating threshold, and number of recommendations
genre_widget = widgets.Text(
    placeholder='Enter the genre',
    description='Genre:'
)

threshold_widget = widgets.FloatSlider(
    value=100,
    min=10,
    max=500,
    step=10,
    description='Min. Rating Threshold:',
    readout_format='.0f',
)

n_recommendations_widget = widgets.IntSlider(
    value=5,
    min=1,
    max=20,
    step=1,
    description='Num. Recommendations:',
    readout_format='.0f',
)

In [6]:
def get_recommendations():
    # Get the current value of the widgets
    genre_input = genre_widget.value
    threshold = threshold_widget.value
    n_recommendations = n_recommendations_widget.value

    # Find the genre with the highest number of matches with the user input
    genres = movies['genres'].str.split('|').explode().unique()
    genre_matches = pd.Series([len(set(genre_input.split()) & set(g.split())) for g in genres], index=genres)
    matched_genre = genre_matches.idxmax()

    from difflib import get_close_matches

    # Select the word from the matched genre that matches the most with the user input
    matched_words = matched_genre.split()
    matched_word = get_close_matches(genre_input, matched_words)[0]

    print(matched_word)
    print('\n')

    # Get the movies in the matched genre with at least the minimum number of reviews
    matched_movies = movie_ratings.loc[movie_ratings['genres'].str.contains(matched_word, regex=False)]
    matched_movies = matched_movies.groupby(['movieId', 'title']).filter(lambda x: len(x) >= threshold)

    # Calculate the average rating for each movie in the matched genre
    avg_ratings = matched_movies.groupby(['movieId', 'title']).agg({'rating': 'mean'})
    avg_ratings.head()

    # Sort the movies by popularity (i.e., number of ratings) and average rating
    popularity = matched_movies.groupby(['movieId', 'title']).size().reset_index(name='num_ratings')
    popularity = pd.merge(popularity, avg_ratings, on=['movieId', 'title'])
    popularity = popularity.sort_values(by=['num_ratings', 'rating'], ascending=False)
    
    # Get the top N recommendations
    recommendations = popularity.head(n_recommendations)

    # Print the top N recommendations
    print(recommendations[['title', 'num_ratings', 'rating']])

In [7]:
button = widgets.Button(description='Get Recommendations')
output = widgets.Output()

def on_button_click(_):
    with output:
        output.clear_output()
        get_recommendations()

button.on_click(on_button_click)
display(genre_widget, threshold_widget, n_recommendations_widget, button, output)

Text(value='', description='Genre:', placeholder='Enter the genre')

FloatSlider(value=100.0, description='Min. Rating Threshold:', max=500.0, min=10.0, readout_format='.0f', step…

IntSlider(value=5, description='Num. Recommendations:', max=20, min=1, readout_format='.0f')

Button(description='Get Recommendations', style=ButtonStyle())

Output()

## **Content-based Recommendations** 

**This model recommends the top movies based on similar genre.**

It takes the following input from the user and then provides suitable movie recommendations:

- Movie title (to which the similar movies you are looking for) 

In [8]:
#importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors
from flask import Flask, request, render_template
from sklearn.metrics.pairwise import cosine_similarity
import re

In [9]:
#loading the movies dataset
movies = pd.read_csv('movies.csv')
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [10]:
#data pre-processing
def clear(data):
    result=[]
    result.append(re.sub("[^a-z1-9]","",data))
    return result

def remove_spaces(row):
    x=[]
    for i in row:
        i=i.replace(' ','')
        i=i.lower()
        x.append(i)
    return(x)

def remove_divi(row):
    x=[]
    for i in row:
        i=i.replace('|',' ')
        i=i.lower()
        x.append(i)
    return(x)

def clear2(data):
    result=[]
    result.append(re.sub("[^a-zA-Z1-9]","",data))
    return result

In [11]:
movies.rename(columns={'movieId':'index'},inplace=True)
movies['original_title'] = movies.loc[:, 'title']
movies

Unnamed: 0,index,title,genres,original_title
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,Toy Story (1995)
1,2,Jumanji (1995),Adventure|Children|Fantasy,Jumanji (1995)
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men (1995)
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,Waiting to Exhale (1995)
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II (1995)
...,...,...,...,...
10324,146684,Cosmic Scrat-tastrophe (2015),Animation|Children|Comedy,Cosmic Scrat-tastrophe (2015)
10325,146878,Le Grand Restaurant (1966),Comedy,Le Grand Restaurant (1966)
10326,148238,A Very Murray Christmas (2015),Comedy,A Very Murray Christmas (2015)
10327,148626,The Big Short (2015),Drama,The Big Short (2015)


In [12]:
movies['title'] = movies['title'].apply(clear2).str.join("")
movies['title'] = movies['title'].str.lower()
movies['genres'] = movies['genres'].str.lower()
movies['genres'] = movies['genres'].apply(remove_divi).str.join('')

movies.head()

Unnamed: 0,index,title,genres,original_title
0,1,toystory1995,adventure animation children comedy fantasy,Toy Story (1995)
1,2,jumanji1995,adventure children fantasy,Jumanji (1995)
2,3,grumpieroldmen1995,comedy romance,Grumpier Old Men (1995)
3,4,waitingtoexhale1995,comedy drama romance,Waiting to Exhale (1995)
4,5,fatherofthebridepartii1995,comedy,Father of the Bride Part II (1995)


In [13]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import NearestNeighbors
from flask import Flask, request, render_template
from sklearn.metrics.pairwise import cosine_similarity
import re

In [14]:
def get_title(index):
	return movies[movies.index == index]["original_title"].values[0]

def get_index(title):
	return movies[movies.title == title]["index"].values[0]

In [15]:
data = movies

In [16]:
cv = CountVectorizer()
count_matrix = cv.fit_transform(movies['genres'])
    # create similarity score matrix
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(count_matrix)

In [24]:
def recommend(choice):
    
    choice = re.sub("[^a-zA-Z1-9]", "", choice).lower() 

    # If movie name matches exactly with the name of a movie in the dataset
    # then this block will be executed and return the movie list.
    
    if choice in data['title'].values:
        choice_index = data[data['title'] == choice].index.values[0]
        distances, indices = model.kneighbors(
            count_matrix[choice_index], n_neighbors=5)
        movie_list = [] 
        for i in indices.flatten():
            movie_list.append(data[data.index == i]
                             ['original_title'].values[0].title())
        return movie_list

    # If no movie name matches exactly with the movies in the dataset then, this 
    # block of code finds the movie name which highly matches with movie name entered 
    # by the user and and returns the movie list.

    elif (data['title'].str.contains(choice).any() == True):

        # getting list of similar movie names as choice.
        similar_names = list(str(s) for s in data['title'] if choice in str(s))
        # sorting to get the most matched movie name.
        similar_names.sort()
        # taking the first movie from the sorted similar movie name.
        new_choice = similar_names[0]
        print(new_choice)
        # getting index of the choice from the dataset
        choice_index = data[data['title'] == new_choice].index.values[0]
        # getting distances and indices of 13 mostly related movies with the choice.
        distances, indices = model.kneighbors(
            count_matrix[choice_index], n_neighbors=9)
        # creating movie list and cover images list
        movie_list = []
        photos = []
        for i in indices.flatten():
            movie_list.append(data[data.index == i]
                             ['original_title'].values[0].title())
        return movie_list

    # If no name matches then this else statement will be executed.
    else:
        return "opps! movie not found in our database", "Try again with another"

In [26]:
recommend("toy story")

toystory1995


['Asterix And The Vikings (Astérix Et Les Vikings) (2006)',
 "Emperor'S New Groove, The (2000)",
 'Wild, The (2006)',
 'Toy Story (1995)',
 'Monsters, Inc. (2001)',
 'Tale Of Despereaux, The (2008)',
 'Antz (1998)',
 'Boxtrolls, The (2014)',
 'Adventures Of Rocky And Bullwinkle, The (2000)']

## **Collaborative-based Recommendations**

**This model recommends the top N movies based on K similar users for a target user.**

It takes the following input from the user and then provides suitable movie recommendations:

- UserID
- Similar users threshold
- Number of recommendations

In [27]:
import pandas as pd
import io
import numpy as np
from scipy.spatial.distance import cosine

In [28]:
#loading the movie ratings dataset
data =  pd.read_csv("ratings.csv")
data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,16,4.0,1217897793
1,1,24,1.5,1217895807
2,1,32,4.0,1217896246
3,1,47,4.0,1217896556
4,1,50,4.0,1217896523


In [29]:
#creating a pivot table with users as rows and movies as columns
ratings = data.pivot_table(index=['userId'], columns=['movieId'], values='rating')
ratings.head()

movieId,1,2,3,4,5,6,7,8,9,10,...,144482,144656,144976,146344,146656,146684,146878,148238,148626,149532
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,5.0,,2.0,,3.0,,,,,,...,,,,,,,,,,
3,,,,,3.0,,3.0,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,4.0,,,,,,,,,,...,,,,,,,,,,


In [30]:
#defining a function to calculate the similarity between two users based on their ratings
def user_similarity(user1, user2):
    common_items = ratings.loc[user1].dropna().index.intersection(ratings.loc[user2].dropna().index)
    if len(common_items) == 0:
        return 0
    else:
        return 1 - cosine(ratings.loc[user1, common_items], ratings.loc[user2, common_items])

In [31]:
movies = pd.read_csv("movies.csv")
movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [32]:
def recommend_movies(user, k, n):
    similarities = [(user_similarity(user, other_user), other_user) for other_user in ratings.index if other_user != user]
    similarities.sort(reverse=True)
    similarities = similarities[:k]
    sim_users = [other_user for similarity, other_user in similarities]
    recommendations = ratings.loc[sim_users].mean().sort_values(ascending=False)[:n]
    recommended_movies = []
    for iid in recommendations.index:
        movie_id = iid
        movie_title = movies.loc[movies['movieId'] == iid, 'title'].iloc[0]
        movie_rating = round(recommendations.loc[iid], 2)
        recommended_movies.append((movie_id, movie_title, movie_rating))
    return recommended_movies

In [33]:
#testing the recommender system for user 1 with k=100 and N=5
target_user=2
k=100
numberofrec=5
recommendations= recommend_movies(target_user, k, numberofrec)

i=1

for movie in recommendations:
    print(i)
    print("Movie Title:", movie[1])
    print("Movie Rating:", movie[2])
    print()
    i+=1

1
Movie Title: Air (2015)
Movie Rating: 5.0

2
Movie Title: Frailty (2001)
Movie Rating: 5.0

3
Movie Title: Notorious (1946)
Movie Rating: 5.0

4
Movie Title: All About Eve (1950)
Movie Rating: 5.0

5
Movie Title: Little Princess, The (1939)
Movie Rating: 5.0

