# Content based filtering

Metadata based recommender. Draws descriptive information from metadata of movie and finds similar movies based on this metadata. Does NOT factor ratings into recommendation (that is done by the ensemble recommender).

In [180]:
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from ast import literal_eval
import joblib
from sklearn.metrics.pairwise import linear_kernel

### Helper functions

In [109]:
def tryconvert(value):
    try:
        return int(value)
    except:
        return -1

In [111]:
def find_director(crew_list):
    for member in crew_list:
        if member['job'] == 'Director':
            return re.sub(r"\s+", "", member['name'])
    return ''

In [140]:
def add_director(movie):
    length = len(movie['description'])//50
    for i in range(length):
        movie['description'] = movie['description'] + ' ' + movie['director']
    return movie

In [141]:
links_small = pd.read_csv('../links_small.csv')['tmdbId']
links_small = links_small[links_small.notnull()].astype(int)
directors = pd.read_csv('../credits.csv')
directors = directors[directors['id'].isin(links_small)][['crew', 'id']]
directors['director'] = directors.apply(lambda x: find_director(literal_eval(x['crew'])), axis = 1)
directors = directors[['id', 'director']]
metadata_small = pd.read_csv('../movies_metadata.csv')
metadata_small['id'] = metadata_small['id'].apply(lambda x: tryconvert(x))
metadata_small = metadata_small[metadata_small['id'].isin(links_small)].join(directors.set_index('id'), on = 'id')
metadata_small.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,director
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,373554033.0,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,JohnLasseter
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,262797249.0,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,JoeJohnston
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,0.0,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,HowardDeutch
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,81452156.0,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,ForestWhitaker
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,76578911.0,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,CharlesShyer


In [142]:
metadata_small['tagline'] = metadata_small['tagline'].fillna('')
metadata_small['overview'] = metadata_small['overview'].fillna('')
metadata_small['description'] = metadata_small['overview'].apply(lambda x: x.lower()) + metadata_small['tagline'].apply(lambda x: x.lower())
metadata_small = metadata_small.apply(lambda x: add_director(x), axis = 1)
metadata_small.head()

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,director,description
0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,JohnLasseter,"led by woody, andy's toys live happily in his ..."
1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,JoeJohnston,when siblings judy and peter discover an encha...
2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,...,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,HowardDeutch,a family wedding reignites the ancient feud be...
3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",...,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,ForestWhitaker,"cheated on, mistreated and stepped on, the wom..."
4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,...,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,CharlesShyer,just when george banks has recovered from his ...


In [143]:
vectorizer = TfidfVectorizer(stop_words = 'english')
description_matrix = vectorizer.fit_transform(metadata_small['description'])
description_matrix.shape

(9133, 34337)

In [144]:
similarity_matrix = linear_kernel(description_matrix, description_matrix)
metadata_small = metadata_small.reset_index()
indices = pd.Series(metadata_small.index, index = metadata_small['title'])
titles = metadata_small['title']

In [145]:
def get_recommendations(movie_name):
    index = indices[movie_name]
    scores = list(enumerate(similarity_matrix[index]))
    scores = sorted(scores, key = lambda x: x[1], reverse = True)[1:101]
    movie_indices = [i[0] for i in scores]
    recommendations = titles.iloc[movie_indices]
    return recommendations

In [146]:
get_recommendations('The Godfather').head(20)

976            The Godfather: Part II
4472               One from the Heart
3588    Tucker: The Man and His Dream
636                              Jack
963                    Apocalypse Now
1330                    The Rainmaker
2982                 The Conversation
3284                 Gardens of Stone
1586          The Godfather: Part III
1675                    The Outsiders
3677                  The Cotton Club
1976            Peggy Sue Got Married
5809                      Rumble Fish
1082                          Dracula
4203               Johnny Dangerously
3514                             Made
8408                       The Family
7779                    Henry's Crime
7609                          Machete
8674              The Look of Silence
Name: title, dtype: object

In [147]:
get_recommendations('The Prestige').head(20)

6915                        The Dark Knight
3365                                Memento
7951                  The Dark Knight Rises
7572                              Inception
6158                          Batman Begins
4111                               Insomnia
8529                           Interstellar
2069                              Following
7055                              Magicians
6090                      The Sword of Doom
8114                                 Broken
6516                           The Covenant
8768                               Blackhat
7120    The Curious Case of Benjamin Button
4898                                 Damage
2595                    Eye of the Beholder
7674                          The Romantics
3683                           Original Sin
8952                         7 Days in Hell
2285                             Nightmares
Name: title, dtype: object

In [120]:
pd.DataFrame(similarity_matrix).to_csv('../SECOND-similarity_matrix.csv')

In [148]:
metadata_small['description'][0]

"led by woody, andy's toys live happily in his room until andy's birthday brings buzz lightyear onto the scene. afraid of losing his place in andy's heart, woody plots against buzz. but when circumstances separate buzz and woody from their owner, the duo eventually learns to put aside their differences. JohnLasseter JohnLasseter JohnLasseter JohnLasseter JohnLasseter JohnLasseter"

## Ensemble recommender

In [228]:
cfilt_pref_matrix = joblib.load('../THIRD-modified_preferences')
movieId_map = joblib.load('../THIRD-movieId_map')
userId_map = joblib.load('../THIRD-userId_map')
inv_userId_map = joblib.load('../THIRD-inv_userId_map')
inv_movieId_map = joblib.load('../THIRD-inv_movieId_map')
links_small2 = pd.read_csv('../links_small.csv')
links_small2['id'] = links_small2['tmdbId'].apply(lambda x: int(x) if not np.isnan(x) else -1)
links_small2 = links_small2[['movieId', 'id']].set_index('movieId')
links_small2.head()

Unnamed: 0_level_0,id
movieId,Unnamed: 1_level_1
1,862
2,8844
3,15602
4,31357
5,11862


In [241]:
def sigmoid(x):
    return 1/(1 + np.e ** -x)

In [260]:
def collab_recommendations(userId):
    user_pref = cfilt_pref_matrix[inv_userId_map[userId], :].reshape(cfilt_pref_matrix.shape[1])
    user_pref = list(enumerate(user_pref))
    user_pref = sorted(user_pref, key = lambda x: x[1], reverse = True)[:50]
    preferences = []
    for i in user_pref:
        Id = inv_movieId_map[i[0]]
        Id = links_small2.loc[Id].values[0]
        title = metadata_small[metadata_small['id'] == Id]['title'].values[0]
        preferences.append((title, sigmoid(i[1])))
    return preferences

In [122]:
top_charts = pd.read_csv('../FIRST-top_charts.csv')
top_charts.head()

Unnamed: 0,title,vote_count,genres,popularity,id,vote_average,Weighted_Rating
0,The Shawshank Redemption,8358.0,"['Drama', 'Crime']",51.645403,278,8.5,8.357746
1,The Godfather,6024.0,"['Drama', 'Crime']",41.109264,238,8.5,8.306334
2,The Dark Knight,12269.0,"['Drama', 'Action', 'Crime', 'Thriller']",123.167259,155,8.3,8.208376
3,Fight Club,9678.0,['Drama'],63.869599,550,8.3,8.184899
4,Pulp Fiction,8670.0,"['Thriller', 'Crime']",140.950236,680,8.3,8.172155


In [169]:
genre_chart = top_charts.apply(lambda x: pd.Series(x['genres']), axis = 1).stack().reset_index(level = 1, drop = True)
genre_chart.name = 'genre'
genre_chart = top_charts.drop('genres', axis = 1).join(genre_chart).sort_values('Weighted_Rating', ascending = False)

In [170]:
def generate_genre_chart(genre):
    genre_top_movies = genre_chart[genre_chart['genre'] == genre]
    return genre_top_movies

In [123]:
links_small = pd.read_csv('../links_small.csv')
ratings_small = pd.read_csv('../ratings_small.csv')

In [124]:
links_small['tmdbId'] = links_small.apply(lambda x: int(x['tmdbId']) if not np.isnan(x['tmdbId']) else -1, axis = 1)
links_small = links_small.drop('imdbId', axis = 1)
links_small.head()

Unnamed: 0,movieId,tmdbId
0,1,862
1,2,8844
2,3,15602
3,4,31357
4,5,11862


In [125]:
ratings_small = ratings_small.join(links_small.set_index('movieId'), on = 'movieId').drop(['movieId', 'timestamp', 'userId'], axis = 1)
ratings_small['rating'] = ratings_small.apply(lambda x: np.log(x['rating']), axis = 1)
ratings_small.head()

Unnamed: 0,rating,tmdbId
0,0.916291,9909
1,1.098612,11360
2,1.098612,819
3,0.693147,1103
4,1.386294,11216


In [126]:
ratings_small = ratings_small.groupby('tmdbId')['rating'].mean()[1:]
ratings_small.head()

tmdbId
2     1.504077
5     1.108252
6     1.141087
11    1.405972
12    1.285808
Name: rating, dtype: float64

In [149]:
top_charts_ranks = top_charts.index + 1
top_charts['rank'] = top_charts_ranks
top_charts.head()

Unnamed: 0,title,vote_count,genres,popularity,id,vote_average,Weighted_Rating,rank
0,The Shawshank Redemption,8358.0,"['Drama', 'Crime']",51.645403,278,8.5,8.357746,1
1,The Godfather,6024.0,"['Drama', 'Crime']",41.109264,238,8.5,8.306334,2
2,The Dark Knight,12269.0,"['Drama', 'Action', 'Crime', 'Thriller']",123.167259,155,8.3,8.208376,3
3,Fight Club,9678.0,['Drama'],63.869599,550,8.3,8.184899,4
4,Pulp Fiction,8670.0,"['Thriller', 'Crime']",140.950236,680,8.3,8.172155,5


In [277]:
def hybrid_recommendation(movie, rank_chart):
    content_recommendations = get_recommendations(movie)
    model_ratings = pd.Series(np.zeros(content_recommendations.shape[0]), index = content_recommendations)
    content_recommendations = content_recommendations.reset_index()
    for index, recommendation in content_recommendations.iterrows():
        rank = index + 1
        rank_score = (100 - rank)/99
        Id = recommendation['index']
        chart_score = 0
        if Id in rank_chart['id'].values:
            chart_rank = rank_chart[rank_chart['id'] == Id]['rank']
            chart_score = (rank_chart.shape[0] - chart_rank)/(rank_chart.shape[0] - 1)
        rating_score = 0
        if Id in ratings_small.index:
            max_rating = np.max(ratings_small.values)
            rating = ratings_small[Id]
            rating_score = rating/max_rating
        total_score = 10*(8*rank_score + chart_score + rating_score)/10
        model_ratings[recommendation[1]] = total_score
    model_ratings = model_ratings.sort_values(ascending = False)
    return model_ratings

In [291]:
def hybrid_recommendation_(movie, rank_chart, userId):
    content_recommendations = get_recommendations(movie)
    cfilt_recommendations = collab_recommendations(userId)
    cfilt_movies = [i[0] for i in cfilt_recommendations]
    movie_list = list(set(content_recommendations) | set(cfilt_movies))
    model_ratings = pd.Series(np.zeros(len(movie_list)), index = movie_list)
    content_recommendations = content_recommendations.reset_index()
    for index, recommendation in content_recommendations.iterrows():
        rank = index + 1
        rank_score = (100 - rank)/99
        Id = recommendation['index']
        chart_score = 0
        if Id in rank_chart['id'].values:
            chart_rank = rank_chart[rank_chart['id'] == Id]['rank']
            chart_score = (rank_chart.shape[0] - chart_rank)/(rank_chart.shape[0] - 1)
        rating_score = 0
        if Id in ratings_small.index:
            max_rating = np.max(ratings_small.values)
            rating = ratings_small[Id]
            rating_score = rating/max_rating
        total_score = 10*(8*rank_score + chart_score + rating_score)/10
        model_ratings[recommendation[1]] = total_score
    for movie_score_pair in cfilt_recommendations:
        movie = movie_score_pair[0]
        score = movie_score_pair[1]
        model_ratings[movie] = 10*(2*model_ratings[movie]/10 + score)/3
    model_ratings = model_ratings.sort_values(ascending = False)
    return model_ratings

In [284]:
hybrid_recommendation('The Dark Knight', top_charts)[:31]

title
The Prestige                               8.351518
Batman                                     8.128939
Following                                  8.128356
The Dark Knight Rises                      8.000000
Batman Forever                             7.944808
Batman Begins                              7.919192
Memento                                    7.838384
Inception                                  7.757576
Batman Returns                             7.703404
Insomnia                                   7.595960
Interstellar                               7.515152
Batman: The Dark Knight Returns, Part 2    7.353535
Batman: Under the Red Hood                 7.191919
Batman: The Dark Knight Returns, Part 1    6.949495
JFK                                        6.868687
Beginning of the End                       6.787879
Batman: Mask of the Phantasm               6.707071
Tormented                                  6.626263
Despicable Me 2                            6.591732
The Wr

In [292]:
hybrid_recommendation_('The Dark Knight', top_charts, 2)[:31]

The Prestige                               8.351518
Batman                                     8.128939
Following                                  8.128356
The Dark Knight Rises                      8.000000
Batman Forever                             7.944808
Batman Begins                              7.919192
Memento                                    7.838384
Inception                                  7.757576
Batman Returns                             7.703404
Insomnia                                   7.595960
Interstellar                               7.515152
Batman: The Dark Knight Returns, Part 2    7.353535
Batman: Under the Red Hood                 7.191919
Batman: The Dark Knight Returns, Part 1    6.949495
JFK                                        6.868687
Beginning of the End                       6.787879
Batman: Mask of the Phantasm               6.707071
Tormented                                  6.626263
Despicable Me 2                            6.591732
The Wrong Ma

In [289]:
def final_recommender_(movie_list, rank_chart, userId):
    final_list = {}
    for movie in movie_list:
        recommendation_list = hybrid_recommendation_(movie, rank_chart, userId)
        for recommendation, score in recommendation_list.iteritems():
            if recommendation in final_list:
                final_list[recommendation] += score
            else:
                final_list.update({recommendation: score})
    final_list = sorted(final_list.items(), key = lambda x: x[1], reverse = True)
    return final_list[:31]

In [167]:
def final_recommender(movie_list, rank_chart):
    final_list = {}
    for movie in movie_list:
        recommendation_list = hybrid_recommendation(movie, rank_chart)
        for recommendation, score in recommendation_list.iteritems():
            if recommendation in final_list:
                final_list[recommendation] += score
            else:
                final_list.update({recommendation: score})
    final_list = sorted(final_list.items(), key = lambda x: x[1], reverse = True)
    return final_list[:31]

In [295]:
final_recommender(['Interstellar', 'Inception', 'The Godfather', 'Batman Begins', 'Star Wars', 'Return of the Jedi'], top_charts)

[('The Prestige', 25.13536218868002),
 ('Following', 24.38506774759147),
 ('The Dark Knight', 24.0),
 ('Memento', 23.67676767676768),
 ('The Dark Knight Rises', 23.595959595959595),
 ('Insomnia', 22.86868686868687),
 ('Star Wars: The Force Awakens', 21.575757575757578),
 ('Star Wars: Episode III - Revenge of the Sith', 15.676767676767676),
 ('Inception', 15.515151515151516),
 ('Star Wars: Episode I - The Phantom Menace', 15.434343434343434),
 ('The Empire Strikes Back', 15.434343434343434),
 ('Batman Begins', 15.11111111111111),
 ('Interstellar', 15.030303030303031),
 ('Star Wars: Episode II - Attack of the Clones', 14.383838383838384),
 ('Batman', 13.672020342164512),
 ('Shanghai Noon', 12.857090623764083),
 ('Johnny Dangerously', 12.30383071968108),
 ("Henry's Crime", 11.878787878787879),
 ('Throne of Blood', 11.878787878787879),
 ('Arthur', 11.717171717171716),
 ('Project S', 11.513093657170156),
 ('Quest for Camelot', 11.393939393939394),
 ('Made', 11.313131313131311),
 ('Double Je

In [296]:
final_recommender_(['Interstellar', 'Inception', 'The Godfather', 'Batman Begins', 'Star Wars', 'Return of the Jedi'], top_charts, 2)

[('The Empire Strikes Back', 30.253083176384884),
 ('Star Wars: Episode I - The Phantom Menace', 29.763777969545647),
 ('Return of the Jedi', 25.47776152385463),
 ('The Prestige', 25.13536218868002),
 ('Star Wars', 24.80378700557484),
 ('Following', 24.38506774759147),
 ('The Princess Bride', 24.3476638177891),
 ('The Dark Knight', 24.0),
 ('Memento', 23.67676767676768),
 ('The Dark Knight Rises', 23.595959595959595),
 ('The Matrix', 23.138432790466684),
 ('Insomnia', 22.86868686868687),
 ('Star Wars: The Force Awakens', 21.575757575757578),
 ('Back to the Future', 19.875195257099453),
 ('The Terminator', 19.850703015119727),
 ('Men in Black', 19.843480245770753),
 ('Full Metal Jacket', 19.840526687642928),
 ('Raiders of the Lost Ark', 19.794102787395055),
 ('Terminator 2: Judgment Day', 19.77231291004994),
 ('E.T. the Extra-Terrestrial', 19.763144534459208),
 ('The Fifth Element', 19.73524774048415),
 ('Aliens', 19.735098731984362),
 ('Alien', 19.731153521504755),
 ('Blade Runner', 19

In [189]:
metadata_small.head()

Unnamed: 0,index,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,...,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count,director,description
0,0,False,"{'id': 10194, 'name': 'Toy Story Collection', ...",30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",http://toystory.disney.com/toy-story,862,tt0114709,en,Toy Story,...,81.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Toy Story,False,7.7,5415.0,JohnLasseter,"led by woody, andy's toys live happily in his ..."
1,1,False,,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",,8844,tt0113497,en,Jumanji,...,104.0,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Released,Roll the dice and unleash the excitement!,Jumanji,False,6.9,2413.0,JoeJohnston,when siblings judy and peter discover an encha...
2,2,False,"{'id': 119050, 'name': 'Grumpy Old Men Collect...",0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",,15602,tt0113228,en,Grumpier Old Men,...,101.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Still Yelling. Still Fighting. Still Ready for...,Grumpier Old Men,False,6.5,92.0,HowardDeutch,a family wedding reignites the ancient feud be...
3,3,False,,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",,31357,tt0114885,en,Waiting to Exhale,...,127.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Friends are the people who let you be yourself...,Waiting to Exhale,False,6.1,34.0,ForestWhitaker,"cheated on, mistreated and stepped on, the wom..."
4,4,False,"{'id': 96871, 'name': 'Father of the Bride Col...",0,"[{'id': 35, 'name': 'Comedy'}]",,11862,tt0113041,en,Father of the Bride Part II,...,106.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Just When His World Is Back To Normal... He's ...,Father of the Bride Part II,False,5.7,173.0,CharlesShyer,just when george banks has recovered from his ...


In [190]:
links_small.head()

0      862
1     8844
2    15602
3    31357
4    11862
Name: tmdbId, dtype: int64

In [232]:
Id = inv_movieId_map[99]
print(Id)
Id = links_small2.loc[Id].values[0]
print(Id)
title = metadata_small[metadata_small['id'] == Id]['title'].values[0]
print(title)

318
278
The Shawshank Redemption


In [267]:
collab_recommendations(2)

[('The Empire Strikes Back', 0.9981760443411298),
 ('Return of the Jedi', 0.9975807344574391),
 ('Star Wars', 0.9959593769619964),
 ('Back to the Future', 0.9937597628549726),
 ('The Terminator', 0.9925351507559865),
 ('Men in Black', 0.9921740122885376),
 ('Raiders of the Lost Ark', 0.9897051393697528),
 ('Terminator 2: Judgment Day', 0.988615645502497),
 ('E.T. the Extra-Terrestrial', 0.9881572267229604),
 ('Aliens', 0.9867549365992181),
 ('Alien', 0.9865576760752378),
 ('Blade Runner', 0.9833695669554472),
 ('The Princess Bride', 0.9811333283291802),
 ('Indiana Jones and the Last Crusade', 0.9799685938569136),
 ('Star Wars: Episode I - The Phantom Menace', 0.973710783999168),
 ('Monty Python and the Holy Grail', 0.9706908401022886),
 ('Total Recall', 0.9694662888912652),
 ('Die Hard', 0.9667312511637914),
 ('Star Trek IV: The Voyage Home', 0.9634709892387515),
 ('Star Trek II: The Wrath of Khan', 0.9633120096171544),
 ('The Blues Brothers', 0.9617938058478954),
 ('Jurassic Park', 0.

### To-do: improve genre based recommendation and include collaborative filtering