In [1]:
# Instructions for how to use this notebook

# The first time you launch this notebook you can simply click on the "Cell" option at the top and click "Run All"
# You will be prompted for the inputs for the recommender system
# After you are all done providing the inputs, please wait approximately 20 seconds for the code to compile
# The initial compliation takes time to compute the feature vectors, but only needs to do that computation once
# Once all the cells have been run, you can scroll to the buttom to see your results

# If you would like to try another input simply run the cell labeled  "# User input section again"
# Once that is done you can scroll to the bottom and run the cell labeled "# Output section"
# You will see the results are much faster now

In [2]:
# User input section

print("Please enter a Netflix title")
input_title = input()
print("Optional Filter - You can enter a genre. Press enter to skip")
input_genre = input()
print("Optional Filter - You can enter a cast member name. This should be one word. Press enter to skip")
input_cast = input()
# print("Optional - If you would like to see a dataframe containing details of your recommended titles type anything. Press enter to skip")
# show_recommended_df = input()

Please enter a Netflix title
jaws
Optional Filter - You can enter a genre. Press enter to skip

Optional Filter - You can enter a cast member name. This should be one word. Press enter to skip



In [3]:
import pandas as pd
import numpy as np
import os
import ast
import re


prep_df = pd.read_csv("prep_movie_list.csv", sep=',', encoding='latin-1')

In [4]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

CV= CountVectorizer(max_features=5000, stop_words="english")
vector=CV.fit_transform(prep_df['listtags']).toarray()

similarity=cosine_similarity(vector)

     


In [5]:
#Recommender function
reco_list = []
def Recommend_Movies(movie):
    global reco_list
    try:
        movie_index=prep_df[prep_df['title']==movie].index[0]
        distance=similarity[movie_index]
        movies_list=sorted(list(enumerate(distance)), reverse=True, key=lambda x : x[1])[1:11]
        
        if len(movies_list) > 0:
            for i in movies_list:
                
                t=prep_df.iloc[i[0]].title
                if t == movie or t in reco_list:
                    pass
                    
                else:
                    reco_list.append(t)
                   
        else:
            print('Movie '+ movie +' does not have any matching recommendation. Please try another one.')
    except:
        print('Movie '+ movie +' not present in Netflix. Please try another one.')


In [8]:
# Output Section


'''
parameter 1:
    more than one word: movie title
'|' is the delimiter
parameter 2:
    single word: one genre of the movie
'|' is the delimiter
parameter 3:
    count of records to show > default 10
'''
errorKey = 0
inp_param = f"{input_title.upper()}|{input_genre.lower()}|{input_cast.title()}"
param1 = str(inp_param.split('|',1)[0])
param2 = inp_param.split('|',1)[1].split('|',1)[0]
param3 = inp_param.rsplit('|',1)[1]


reco_list = []
Recommend_Movies(param1)

    
recolistdf = pd.DataFrame(reco_list)
recolistdf = recolistdf.rename(columns={0: 'title'})


recolistdf = recolistdf.merge(prep_df[['title','cast','director','imdb_votes','keywords','genre','imdb_score','popularity_score','description','tags']], on  = "title",how="left")
recolistdf = recolistdf.drop_duplicates(subset = ['title'],keep = 'last').reset_index(drop = True)

recolistdf[['imdb_score']] = recolistdf[['imdb_score']].fillna(value=0)
recolistdf[['imdb_votes']] = recolistdf[['imdb_votes']].fillna(value=0)
recolistdf[['popularity_score']] = recolistdf[['popularity_score']].fillna(value=0)


recolistdf['imdb_score'] = recolistdf['imdb_score'].astype(float)
recolistdf['popularity_score'] = recolistdf['popularity_score'].astype(float)
recolistdf['composite_score'] = recolistdf['imdb_score']*0.6+recolistdf['popularity_score']*0.4

recolistdf = recolistdf.sort_values(by=['composite_score','title'], ascending=False)

if len(param2) > 0:
    recolistdf = recolistdf.loc[recolistdf['genre'].str.contains(param2.lower())]
if len(param3) > 0:
    recolistdf = recolistdf.loc[recolistdf['cast'].str.lower().str.contains(param3.lower())]



    
print("Here are your recommended titles based on your search criteria\n")
netflix_titles = pd.read_csv("10k_titles.csv")
for i, movie in enumerate(recolistdf['title']):
    print(f"{i+1}. {movie.lower().title()}")
    if netflix_titles.loc[netflix_titles['title'] == movie.lower().title()]['overview'].empty:
        print("Title overview unavailable at this time.")
    else:
        print(re.sub("[^A-Za-z ]","",str(netflix_titles.loc[netflix_titles['title'] == movie.lower().title()]['overview'].values)))
    print('\n')

resultdf = recolistdf[['title', 'cast', 'genre', 'composite_score']].copy()
resultdf

Here are your recommended titles based on your search criteria

1. The Conjuring
Paranormal investigators Ed and Lorraine Warren work to help a family terrorized by a dark presence in their farmhouse Forced to confront a powerful entity the Warrens find themselves caught in the most terrifying case of their lives


2. Prey
When danger threatens her camp the fierce and highly skilled Comanche warrior Naru sets out to protect her people But the prey she stalks turns out to be a highly evolved alien predator with a technically advanced arsenal An American family on holiday in Africa becomes lost in a game reserve and stalked by vicious killer lions


3. The Green Inferno
A group of student activists travel from New York City to the Amazon to save the rainforest  However once they arrive in this vast green landscape they soon discover that they are not alone and that no good deed goes unpunished


4. Jaws 2
Police chief Brody must protect the citizens of Amity after a second monstrous shar

Unnamed: 0,title,cast,genre,composite_score
3,THE CONJURING,"['Vera', 'Farmiga,', 'Patrick', 'Wilson,', 'Li...","['', 'thriller', 'horror', 'thriller', 'horror...",73.172
4,PREY,"['David', 'Kross,', 'Hanno', 'Koffler,', 'Mari...","['', 'drama', 'thriller', 'thriller', 'horror'...",33.1756
1,THE GREEN INFERNO,"['Ignacia', 'Allamand,', 'Daryl', 'Sabara,', '...","['', 'horror', 'thriller', 'adventure', 'thril...",17.61
0,JAWS 2,"['Roy', 'Scheider,', 'Lorraine', 'Gary,', 'Mur...","['', 'thriller', 'horror', '', 'thriller', 'ho...",6.7168
2,JAWS: THE REVENGE,"['Lorraine', 'Gary,', 'Lance', 'Guest,', 'Mari...","['', 'thriller', 'adventure', '']",0.0
