# 1. Introduction 

The goal of this project is to develop a content-based recommendation engine for movies and TV shows on Netflix. I will compare two different methods:

1. Using *cast, director, country, rating and genres* as features.
2. Using the words in the movie/TV show *descriptions* as features.

# 2. Imports

In [1]:
import re
import ssl
import nltk
import threading
import numpy as np
import pandas as pd

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Error loading stopwords: <urlopen error [Errno 8] nodename
[nltk_data]     nor servname provided, or not known>
[nltk_data] Error loading punkt: <urlopen error [Errno 8] nodename nor
[nltk_data]     servname provided, or not known>


False

# 3. Loading data

In [2]:
data = pd.read_csv('./netflix_titles.csv')
data.head()


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...


In [3]:
data.groupby('type').count()

Unnamed: 0_level_0,show_id,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Movie,5377,5377,5214,4951,5147,5377,5377,5372,5377,5377,5377
TV Show,2410,2410,184,2118,2133,2400,2410,2408,2410,2410,2410


In [4]:
data = data.dropna(subset=['cast', 'country', 'rating'])

# 4. Developing Recommendation Engine using *cast, director, country, rating and genres*

In [5]:
movies = data[data['type'] == 'Movie'].reset_index()
movies = movies.drop(['index', 'show_id', 'type', 'date_added', 'release_year', 'duration', 'description'], axis=1)
movies.head()

Unnamed: 0,title,director,cast,country,rating,listed_in
0,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,TV-MA,"Dramas, International Movies"
1,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,R,"Horror Movies, International Movies"
2,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,PG-13,"Action & Adventure, Independent Movies, Sci-Fi..."
3,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,PG-13,Dramas
4,122,Yasir Al Yasiri,"Amina Khalil, Ahmed Dawood, Tarek Lotfy, Ahmed...",Egypt,TV-MA,"Horror Movies, International Movies"


In [6]:
tv = data[data['type'] == 'TV Show'].reset_index()
tv = tv.drop(['index', 'show_id', 'type', 'date_added', 'release_year', 'duration', 'description'], axis=1)
tv.head()

Unnamed: 0,title,director,cast,country,rating,listed_in
0,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,TV-MA,"International TV Shows, TV Dramas, TV Sci-Fi &..."
1,46,Serdar Akar,"Erdal Beşikçioğlu, Yasemin Allen, Melis Birkan...",Turkey,TV-MA,"International TV Shows, TV Dramas, TV Mysteries"
2,1983,,"Robert Więckiewicz, Maciej Musiał, Michalina O...","Poland, United States",TV-MA,"Crime TV Shows, International TV Shows, TV Dramas"
3,​SAINT SEIYA: Knights of the Zodiac,,"Bryson Baugus, Emily Neves, Blake Shepard, Pat...",Japan,TV-14,"Anime Series, International TV Shows"
4,#blackAF,,"Kenya Barris, Rashida Jones, Iman Benson, Genn...",United States,TV-MA,TV Comedies


In [7]:
actors = []

for i in movies['cast']:
    actor = re.split(r', \s*', i)
    actors.append(actor)
    
flat_list = []
for sublist in actors:
    for item in sublist:
        flat_list.append(item)
        
actors_list = sorted(set(flat_list))

binary_actors = [[0] * 0 for i in range(len(set(flat_list)))]

for i in movies['cast']:
    k = 0
    for j in actors_list:
        if j in i:
            binary_actors[k].append(1.0)
        else:
            binary_actors[k].append(0.0)
        k+=1
        
binary_actors = pd.DataFrame(binary_actors).transpose()
        
directors = []

for i in movies['director']:
    if pd.notna(i):
        director = re.split(r', \s*', i)
        directors.append(director)
    
flat_list2 = []

for sublist in directors:
    for item in sublist:
        flat_list2.append(item)
        
directors_list = sorted(set(flat_list2))

binary_directors = [[0] * 0 for i in range(len(set(flat_list2)))]

for i in movies['director']:
    k = 0
    for j in directors_list:
        if pd.isna(i):
            binary_directors[k].append(0.0)
        elif j in i:
            binary_directors[k].append(1.0)
        else:
            binary_directors[k].append(0.0)
        k+=1
        
binary_directors = pd.DataFrame(binary_directors).transpose()
        
countries = []

for i in movies['country']:
    country = re.split(r', \s*', i)
    countries.append(country)
    
flat_list3 = []

for sublist in countries:
    for item in sublist:
        flat_list3.append(item)
        
countries_list = sorted(set(flat_list3))

binary_countries = [[0] * 0 for i in range(len(set(flat_list3)))]

for i in movies['country']:
    k = 0
    for j in countries_list:
        if j in i:
            binary_countries[k].append(1.0)
        else:
            binary_countries[k].append(0.0)
        k+=1
        
binary_countries = pd.DataFrame(binary_countries).transpose()

genres = []

for i in movies['listed_in']:
    genre = re.split(r', \s*', i)
    genres.append(genre)
    
flat_list4 = []

for sublist in genres:
    for item in sublist:
        flat_list4.append(item)
        
genres_list = sorted(set(flat_list4))

binary_genres = [[0] * 0 for i in range(len(set(flat_list4)))]

for i in movies['listed_in']:
    k = 0
    for j in genres_list:
        if j in i:
            binary_genres[k].append(1.0)
        else:
            binary_genres[k].append(0.0)
        k+=1
        
binary_genres = pd.DataFrame(binary_genres).transpose()

ratings = []

for i in movies['rating']:
    ratings.append(i)

ratings_list = sorted(set(ratings))

binary_ratings = [[0] * 0 for i in range(len(set(ratings_list)))]

for i in movies['rating']:
    k = 0
    for j in ratings_list:
        if j in i:
            binary_ratings[k].append(1.0)
        else:
            binary_ratings[k].append(0.0)
        k+=1
        
binary_ratings = pd.DataFrame(binary_ratings).transpose()

In [8]:
binary = pd.concat([binary_actors, binary_directors, binary_countries, binary_genres], axis=1,ignore_index=True)
binary

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26570,26571,26572,26573,26574,26575,26576,26577,26578,26579
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4756,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4757,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4758,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
4759,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
actors2 = []

for i in tv['cast']:
    actor2 = re.split(r', \s*', i)
    actors2.append(actor2)
    
flat_list5 = []

for sublist in actors2:
    for item in sublist:
        flat_list5.append(item)
        
actors_list2 = sorted(set(flat_list5))

binary_actors2 = [[0] * 0 for i in range(len(set(flat_list5)))]

for i in tv['cast']:
    k = 0
    for j in actors_list2:
        if j in i:
            binary_actors2[k].append(1.0)
        else:
            binary_actors2[k].append(0.0)
        k+=1
        
binary_actors2 = pd.DataFrame(binary_actors2).transpose()

countries2 = []

for i in tv['country']:
    country2 = re.split(r', \s*', i)
    countries2.append(country2)
    
flat_list6 = []

for sublist in countries2:
    for item in sublist:
        flat_list6.append(item)
        
countries_list2 = sorted(set(flat_list6))

binary_countries2 = [[0] * 0 for i in range(len(set(flat_list6)))]

for i in tv['country']:
    k = 0
    for j in countries_list2:
        if j in i:
            binary_countries2[k].append(1.0)
        else:
            binary_countries2[k].append(0.0)
        k+=1
        
binary_countries2 = pd.DataFrame(binary_countries2).transpose()

genres2 = []

for i in tv['listed_in']:
    genre2 = re.split(r', \s*', i)
    genres2.append(genre2)
    
flat_list7 = []
for sublist in genres2:
    for item in sublist:
        flat_list7.append(item)
        
genres_list2 = sorted(set(flat_list7))

binary_genres2 = [[0] * 0 for i in range(len(set(flat_list7)))]

for i in tv['listed_in']:
    k = 0
    for j in genres_list2:
        if j in i:
            binary_genres2[k].append(1.0)
        else:
            binary_genres2[k].append(0.0)
        k+=1
        
binary_genres2 = pd.DataFrame(binary_genres2).transpose()

ratings2 = []

for i in tv['rating']:
    ratings2.append(i)

ratings_list2 = sorted(set(ratings2))

binary_ratings2 = [[0] * 0 for i in range(len(set(ratings_list2)))]

for i in tv['rating']:
    k = 0
    for j in ratings_list2:
        if j in i:
            binary_ratings2[k].append(1.0)
        else:
            binary_ratings2[k].append(0.0)
        k+=1
        
binary_ratings2 = pd.DataFrame(binary_ratings2).transpose()

In [10]:
binary2 = pd.concat([binary_actors2, binary_countries2, binary_genres2], axis=1, ignore_index=True)
binary2

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12741,12742,12743,12744,12745,12746,12747,12748,12749,12750
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1886,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1887,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
1888,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1889,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


In [11]:
def recommender(current_search):
    cs_list = []
    binary_list = []
    if current_search in movies['title'].values:
        idx = movies[movies['title'] == current_search].index.item()
        for i in binary.iloc[idx]:
            binary_list.append(i)
        point1 = np.array(binary_list).reshape(1, -1)
        point1 = [val for sublist in point1 for val in sublist]
        for j in range(len(movies)):
            binary_list2 = []
            for k in binary.iloc[j]:
                binary_list2.append(k)
            point2 = np.array(binary_list2).reshape(1, -1)
            point2 = [val for sublist in point2 for val in sublist]
            dot_product = np.dot(point1, point2)
            norm_1 = np.linalg.norm(point1)
            norm_2 = np.linalg.norm(point2)
            cos_sim = dot_product / (norm_1 * norm_2)
            cs_list.append(cos_sim)
        movies_copy = movies.copy()
        movies_copy['cos_sim'] = cs_list
        results = movies_copy.sort_values('cos_sim', ascending=False)
        results = results[results['title'] != current_search]
        top_results = results.head(5)
        # top_results = pd.concat([results, results2])
        # top_results = top_results.sort_values('cos_sim', ascending=False)
        #top_results = top_results.head(5)
        return(top_results)
    elif current_search in tv['title'].values:
        idx = tv[tv['title'] == current_search].index.item()
        for i in binary2.iloc[idx]:
            binary_list.append(i)
        point1 = np.array(binary_list).reshape(1, -1)
        point1 = [val for sublist in point1 for val in sublist]
        for j in range(len(tv)):
            binary_list2 = []
            for k in binary2.iloc[j]:
                binary_list2.append(k)
            point2 = np.array(binary_list2).reshape(1, -1)
            point2 = [val for sublist in point2 for val in sublist]
            dot_product = np.dot(point1, point2)
            norm_1 = np.linalg.norm(point1)
            norm_2 = np.linalg.norm(point2)
            cos_sim = dot_product / (norm_1 * norm_2)
            cs_list.append(cos_sim)
        tv_copy = tv.copy()
        tv_copy2 = tv.copy()    
        tv_copy['cos_sim'] = cs_list
        tv_copy2['cos_sim'] = cs_list    #
        results = tv_copy.sort_values('cos_sim', ascending=False)
        results = results[results['title'] != current_search]  
        top_results = results.head(5)
        # top_results = pd.concat([results, results2])
        # top_results = top_results.sort_values('cos_sim', ascending=False)
        # top_results = top_results.head(5)
        return(top_results)
    else:
        return("Title not in dataset. Please check spelling.")

## Movie Recommender Handler

In [12]:
# Seun's updates start here.

# import threading
# # import thread

# list_results = []

# class threader(threading.Thread):
#   def __init__(self, title):
#     threading.Thread.__init__(self)
#     self.title = title

#   def run(self):
#     print("Starting recommender for " + self.title + "\n")
#     result = recommender(self.title)
#     list_results.append(result)

# def top_movie_recommender(*args):
#   # results = recommender(current)
#   list_threads = []
  
#   for arg in args:
#     current_recommender = threader(arg)
#     # results = thread.
#     list_threads.append(current_recommender)
#     current_recommender.start()

#   for thread in list_threads:
#     thread.join()

#   for thread in list_results:
#     results = pd.concat([results, thread])
  
#   results = results.sort_values('cos_sim', ascending=False)
#   results = results.head(5)
#   return(results)

# Nicholas's updates start here:
def top_movie_recommender(current, *args):
  results = recommender(current)
  for arg in args:
    temp_results = recommender(arg)
    results = pd.concat([results, temp_results])

  results = results.sort_values('cos_sim', ascending=False)
  results = results.head(5)
  return(results)


## TV Show Recommender Handler

In [13]:
def top_tv_recommender(current, *args):
  results = recommender(current)
  for arg in args:
    temp_results = recommender(arg)
    results = pd.concat([results, temp_results])

  results = results.sort_values('cos_sim', ascending=False)
  results = results.head(5)
  return(results)


## 4.1. Recommending Movies

In [14]:
top_movie_recommender('The Conjuring', 'Wild Child', 'Creep', 'Desolation')

Starting recommender for The Conjuring
Starting recommender for Wild Child


Starting recommender for Creep

Starting recommender for Desolation



KeyboardInterrupt: 

In [None]:
# recommender("Child's Play")

In [None]:
top_movie_recommender('Charlie and the Chocolate Factory', "Dr. Seuss' The Cat in the Hat")

In [None]:
# recommender('Wild Child')

In [None]:
# recommender("Dr. Seuss' The Cat in the Hat")

In [None]:
# recommender('Hook')

In [None]:
# recommender('Holidate')

## 4.2. Recommending TV shows

In [None]:
top_tv_recommender('After Life', 'Friends', 'Gilmore Girls')

In [None]:
# recommender('Anne with an E')

In [None]:
# recommender('Gilmore Girls')

In [None]:
# recommender('Friends')

In [None]:
# recommender('Love on the Spectrum')

In [None]:
recommender('13 Reasons Why', 'Stranger Things')

In [None]:
# recommender('Derry Girls')

In [None]:
# recommender('Breaking Bad')

In [None]:
# recommender('Stranger Things')

# 5. Developing Recommendation Engine using *Movie/TV show descriptions*

In [None]:
movies_des = data[data['type'] == 'Movie'].reset_index()
movies_des = movies_des[['title', 'description']]
movies_des.head()

In [None]:
tv_des = data[data['type'] == 'TV Show'].reset_index()
tv_des = tv_des[['title', 'description']]
tv_des.head()

In [None]:
filtered_movies = []
movies_words = []

for text in movies_des['description']:
    text_tokens = word_tokenize(text)
    tokens_without_sw = [word.lower() for word in text_tokens if not word in stopwords.words()]
    movies_words.append(tokens_without_sw)
    filtered = (" ").join(tokens_without_sw)
    filtered_movies.append(filtered)

movies_words = [val for sublist in movies_words for val in sublist]
movies_words = sorted(set(movies_words))
movies_des['description_filtered'] = filtered_movies
movies_des.head()

In [None]:
filtered_tv = []
tv_words = []

for text in tv_des['description']:
    text_tokens = word_tokenize(text)
    tokens_without_sw = [word.lower() for word in text_tokens if not word in stopwords.words()]
    tv_words.append(tokens_without_sw)
    filtered = (" ").join(tokens_without_sw)
    filtered_tv.append(filtered)

tv_words = [val for sublist in tv_words for val in sublist]
tv_words = sorted(set(tv_words))
tv_des['description_filtered'] = filtered_tv
tv_des.head()

In [None]:
movie_word_binary = [[0] * 0 for i in range(len(set(movies_words)))]

for des in movies_des['description_filtered']:
    k = 0
    for word in movies_words:
        if word in des:
            movie_word_binary[k].append(1.0)
        else:
            movie_word_binary[k].append(0.0)
        k+=1
        
movie_word_binary = pd.DataFrame(movie_word_binary).transpose()

In [None]:
tv_word_binary = [[0] * 0 for i in range(len(set(tv_words)))]

for des in tv_des['description_filtered']:
    k = 0
    for word in tv_words:
        if word in des:
            tv_word_binary[k].append(1.0)
        else:
            tv_word_binary[k].append(0.0)
        k+=1
        
tv_word_binary = pd.DataFrame(tv_word_binary).transpose()

In [None]:
def recommender2(search):
    cs_list = []
    binary_list = []
    if search in movies_des['title'].values:
        idx = movies_des[movies_des['title'] == search].index.item()
        for i in movie_word_binary.iloc[idx]:
            binary_list.append(i)
        point1 = np.array(binary_list).reshape(1, -1)
        point1 = [val for sublist in point1 for val in sublist]    
        for j in range(len(movies_des)):
            binary_list2 = []
            for k in movie_word_binary.iloc[j]:
                binary_list2.append(k)
            point2 = np.array(binary_list2).reshape(1, -1)
            point2 = [val for sublist in point2 for val in sublist]
            dot_product = np.dot(point1, point2)
            norm_1 = np.linalg.norm(point1)
            norm_2 = np.linalg.norm(point2)
            cos_sim = dot_product / (norm_1 * norm_2)
            cs_list.append(cos_sim)
        movies_copy = movies_des.copy()
        movies_copy['cos_sim'] = cs_list
        results = movies_copy.sort_values('cos_sim', ascending=False)
        results = results[results['title'] != search]    
        top_results = results.head(5)
        return(top_results)
    elif search in tv_des['title'].values:
        idx = tv_des[tv_des['title'] == search].index.item()
        for i in tv_word_binary.iloc[idx]:
            binary_list.append(i)
        point1 = np.array(binary_list).reshape(1, -1)
        point1 = [val for sublist in point1 for val in sublist]
        for j in range(len(tv)):
            binary_list2 = []
            for k in tv_word_binary.iloc[j]:
                binary_list2.append(k)
            point2 = np.array(binary_list2).reshape(1, -1)
            point2 = [val for sublist in point2 for val in sublist]
            dot_product = np.dot(point1, point2)
            norm_1 = np.linalg.norm(point1)
            norm_2 = np.linalg.norm(point2)
            cos_sim = dot_product / (norm_1 * norm_2)
            cs_list.append(cos_sim)
        tv_copy = tv_des.copy()
        tv_copy['cos_sim'] = cs_list
        results = tv_copy.sort_values('cos_sim', ascending=False)
        results = results[results['title'] != search]    
        top_results = results.head(5)
        return(top_results)
    else:
        return("Title not in dataset. Please check spelling.")

## 5.1. Recommending Movies

In [None]:
pd.options.display.max_colwidth = 300
recommender2('The Conjuring')

In [None]:
recommender2("Child's Play")

In [None]:
recommender2('Charlie and the Chocolate Factory')

In [None]:
recommender2('Wild Child')

In [None]:
recommender2("Dr. Seuss' The Cat in the Hat")

In [None]:
recommender2('Hook')

In [None]:
recommender2('Holidate')

## 5.2. Recommending TV shows

In [None]:
recommender2('After Life')

In [None]:
recommender2('Anne with an E')

In [None]:
recommender2('Gilmore Girls')

In [None]:
recommender2('Friends')

In [None]:
recommender2('Love on the Spectrum')

In [None]:
recommender2('13 Reasons Why')

In [None]:
recommender2('Derry Girls')

In [None]:
recommender2('Breaking Bad')

In [None]:
recommender2('Stranger Things')

# 6. Conclusion

Taking the cast, director, country, rating and genres as features rather than the descriptions was definitely the better option. Some of the recommendations by descriptions are good such as the 'Tinker Bell and the Legend of the NeverBeast' recommendation for 'Hook' and the 'Extracurricular' recommendation for '13 Reasons Why', but most of them are from completely different genres with very little in common besides a few key words.