In [86]:
import os

import numpy as np 
import pandas as pd 

import matplotlib.pyplot as plt
import seaborn as sns

from nltk.stem.snowball import SnowballStemmer
from ast import literal_eval
from sklearn.feature_extraction.text import CountVectorizer
import faiss

import pickle

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [87]:
df_movies = pd.read_csv(os.path.join('data', 'prepared_movies.csv'))
df_movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16960 entries, 0 to 16959
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            16960 non-null  int64  
 1   cast          16960 non-null  object 
 2   directors     16960 non-null  object 
 3   writers       16960 non-null  object 
 4   keywords      16960 non-null  object 
 5   release_date  16960 non-null  object 
 6   title         16960 non-null  object 
 7   overview      16960 non-null  object 
 8   genres        16960 non-null  object 
 9   popularity    16960 non-null  float64
 10  vote_count    16960 non-null  int64  
 11  vote_average  16960 non-null  float64
 12  poster_path   16891 non-null  object 
dtypes: float64(2), int64(2), object(9)
memory usage: 1.7+ MB


1. Select Specific Columns Only

In [88]:
df_keywords = df_movies.copy()
df_keywords = df_keywords[['id', 'title', 'genres', 'cast', 'directors', 'writers', 'keywords']]
df_keywords.head()

Unnamed: 0,id,title,genres,cast,directors,writers,keywords
0,862,Toy Story,"['Animation', 'Comedy', 'Family']","['Tom Hanks', 'Tim Allen', 'Don Rickles']",['John Lasseter'],"['Joel Cohen', 'Andrew Stanton', 'Joss Whedon', 'Alec Sokolow']","['jealousy', 'toy', 'boy', 'friendship', 'friends', 'rivalry', 'boy next door', 'new toy', 'toy comes to life']"
1,8844,Jumanji,"['Adventure', 'Fantasy', 'Family']","['Robin Williams', 'Jonathan Hyde', 'Kirsten Dunst']",['Joe Johnston'],"['Chris van Allsburg', 'Greg Taylor', 'Jim Strain', 'Jonathan Hensleigh']","['board game', 'disappearance', ""based on children's book"", 'new home', 'recluse', 'giant insect']"
2,15602,Grumpier Old Men,"['Romance', 'Comedy']","['Walter Matthau', 'Jack Lemmon', 'Ann-Margret']",['Howard Deutch'],['Mark Steven Johnson'],"['fishing', 'best friend', 'duringcreditsstinger', 'old men']"
3,31357,Waiting to Exhale,"['Comedy', 'Drama', 'Romance']","['Whitney Houston', 'Angela Bassett', 'Loretta Devine']",['Forest Whitaker'],"['Ronald Bass', 'Terry McMillan']","['based on novel', 'interracial relationship', 'single mother', 'divorce', 'chick flick']"
4,11862,Father of the Bride Part II,['Comedy'],"['Steve Martin', 'Diane Keaton', 'Martin Short']",['Charles Shyer'],"['Albert Hackett', 'Nancy Meyers']","['baby', 'midlife crisis', 'confidence', 'aging', 'daughter', 'mother daughter relationship', 'pregnancy', 'contraception', 'gynecologist']"


2. Processing Keywords Column

LowerCase + Stemming 

In [89]:
stemmer = SnowballStemmer('english')

def stem_keywords(keywords_list):
    # Ensure keywords are lowercase and handle multiple words in a string
    normalized_keywords = []
    for kw in keywords_list:
        if isinstance(kw, str):
            # Split multi-word strings, stem each word, and rejoin
            stemmed_words = [stemmer.stem(word.lower()) for word in kw.split()]
            normalized_keywords.append(" ".join(stemmed_words))
    return normalized_keywords

# Convert the 'keywords' column to a proper list if it is stored as a string
df_keywords['keywords'] = df_keywords['keywords'].fillna('[]').apply(literal_eval)

# Apply the updated function to normalize keywords
df_keywords['keywords_normalized'] = df_keywords['keywords'].apply(stem_keywords)

# Display the updated DataFrame
df_keywords.head()

Unnamed: 0,id,title,genres,cast,directors,writers,keywords,keywords_normalized
0,862,Toy Story,"['Animation', 'Comedy', 'Family']","['Tom Hanks', 'Tim Allen', 'Don Rickles']",['John Lasseter'],"['Joel Cohen', 'Andrew Stanton', 'Joss Whedon', 'Alec Sokolow']","[jealousy, toy, boy, friendship, friends, rivalry, boy next door, new toy, toy comes to life]","[jealousi, toy, boy, friendship, friend, rivalri, boy next door, new toy, toy come to life]"
1,8844,Jumanji,"['Adventure', 'Fantasy', 'Family']","['Robin Williams', 'Jonathan Hyde', 'Kirsten Dunst']",['Joe Johnston'],"['Chris van Allsburg', 'Greg Taylor', 'Jim Strain', 'Jonathan Hensleigh']","[board game, disappearance, based on children's book, new home, recluse, giant insect]","[board game, disappear, base on children book, new home, reclus, giant insect]"
2,15602,Grumpier Old Men,"['Romance', 'Comedy']","['Walter Matthau', 'Jack Lemmon', 'Ann-Margret']",['Howard Deutch'],['Mark Steven Johnson'],"[fishing, best friend, duringcreditsstinger, old men]","[fish, best friend, duringcreditssting, old men]"
3,31357,Waiting to Exhale,"['Comedy', 'Drama', 'Romance']","['Whitney Houston', 'Angela Bassett', 'Loretta Devine']",['Forest Whitaker'],"['Ronald Bass', 'Terry McMillan']","[based on novel, interracial relationship, single mother, divorce, chick flick]","[base on novel, interraci relationship, singl mother, divorc, chick flick]"
4,11862,Father of the Bride Part II,['Comedy'],"['Steve Martin', 'Diane Keaton', 'Martin Short']",['Charles Shyer'],"['Albert Hackett', 'Nancy Meyers']","[baby, midlife crisis, confidence, aging, daughter, mother daughter relationship, pregnancy, contraception, gynecologist]","[babi, midlif crisi, confid, age, daughter, mother daughter relationship, pregnanc, contracept, gynecologist]"


Removing Spaces between Words

In [90]:
def clean_keyword_spaces(keywords_list):
     return [kw.replace(" ", "").strip() for kw in keywords_list]

df_keywords['keywords_normalized'] = df_keywords['keywords_normalized'].apply(clean_keyword_spaces)
df_keywords.head()

Unnamed: 0,id,title,genres,cast,directors,writers,keywords,keywords_normalized
0,862,Toy Story,"['Animation', 'Comedy', 'Family']","['Tom Hanks', 'Tim Allen', 'Don Rickles']",['John Lasseter'],"['Joel Cohen', 'Andrew Stanton', 'Joss Whedon', 'Alec Sokolow']","[jealousy, toy, boy, friendship, friends, rivalry, boy next door, new toy, toy comes to life]","[jealousi, toy, boy, friendship, friend, rivalri, boynextdoor, newtoy, toycometolife]"
1,8844,Jumanji,"['Adventure', 'Fantasy', 'Family']","['Robin Williams', 'Jonathan Hyde', 'Kirsten Dunst']",['Joe Johnston'],"['Chris van Allsburg', 'Greg Taylor', 'Jim Strain', 'Jonathan Hensleigh']","[board game, disappearance, based on children's book, new home, recluse, giant insect]","[boardgame, disappear, baseonchildrenbook, newhome, reclus, giantinsect]"
2,15602,Grumpier Old Men,"['Romance', 'Comedy']","['Walter Matthau', 'Jack Lemmon', 'Ann-Margret']",['Howard Deutch'],['Mark Steven Johnson'],"[fishing, best friend, duringcreditsstinger, old men]","[fish, bestfriend, duringcreditssting, oldmen]"
3,31357,Waiting to Exhale,"['Comedy', 'Drama', 'Romance']","['Whitney Houston', 'Angela Bassett', 'Loretta Devine']",['Forest Whitaker'],"['Ronald Bass', 'Terry McMillan']","[based on novel, interracial relationship, single mother, divorce, chick flick]","[baseonnovel, interracirelationship, singlmother, divorc, chickflick]"
4,11862,Father of the Bride Part II,['Comedy'],"['Steve Martin', 'Diane Keaton', 'Martin Short']",['Charles Shyer'],"['Albert Hackett', 'Nancy Meyers']","[baby, midlife crisis, confidence, aging, daughter, mother daughter relationship, pregnancy, contraception, gynecologist]","[babi, midlifcrisi, confid, age, daughter, motherdaughterrelationship, pregnanc, contracept, gynecologist]"


Remove Keywords that only appear once

In [91]:
# Flatten the list of keywords and count unique values
unique_keywords = set(kw for keywords_list in df_keywords['keywords_normalized'] for kw in keywords_list)
print(f"Number of unique keywords before filtering: {len(unique_keywords)}")

s = df_keywords['keywords_normalized'].explode().value_counts()
s = s[s > 1] # Keep keywords appearing more than once

def filter_keywords_by_freq(keywords_list):
    return [kw for kw in keywords_list if kw in s]

df_keywords['keywords'] = df_keywords['keywords_normalized'].apply(filter_keywords_by_freq)

# Flatten the list of keywords and count unique values
unique_keywords = set(kw for keywords_list in df_keywords['keywords'] for kw in keywords_list)
df_keywords = df_keywords.drop(columns=['keywords_normalized'])
print(f"Number of unique keywords after filtering: {len(unique_keywords)}")

Number of unique keywords before filtering: 13147
Number of unique keywords after filtering: 6806


3. Processing Genres, Cast, Directors and Writers

Lowercase + Removing Spaces between Words

In [92]:
def clean_title(name):
    return str.lower(name)

In [93]:
def clean_name(name):
    return str.lower(name.replace(" ", ""))

In [94]:
from ast import literal_eval

# Ensure the column is properly converted to lists
def safely_convert_to_list(value):
    try:
        # Convert string representation of a list to an actual list
        return literal_eval(value) if isinstance(value, str) else value
    except (ValueError, SyntaxError):
        # Return an empty list if conversion fails
        return []

# Apply the conversion to the relevant columns
columns_to_normalize = ['genres', 'cast', 'directors', 'writers']
for col in columns_to_normalize:
    df_keywords[col] = df_keywords[col].fillna('[]').apply(safely_convert_to_list)

# Define a function to normalize the lists
def normalize_list_values(value_list):
    return [val.lower().replace(" ", "").strip() for val in value_list if isinstance(val, str)]

# Apply the normalization function
for col in columns_to_normalize:
    df_keywords[col] = df_keywords[col].apply(normalize_list_values)

# Display the updated DataFrame
df_keywords.head()

Unnamed: 0,id,title,genres,cast,directors,writers,keywords
0,862,Toy Story,"[animation, comedy, family]","[tomhanks, timallen, donrickles]",[johnlasseter],"[joelcohen, andrewstanton, josswhedon, alecsokolow]","[jealousi, toy, boy, friendship, friend, rivalri, boynextdoor, toycometolife]"
1,8844,Jumanji,"[adventure, fantasy, family]","[robinwilliams, jonathanhyde, kirstendunst]",[joejohnston],"[chrisvanallsburg, gregtaylor, jimstrain, jonathanhensleigh]","[boardgame, disappear, baseonchildrenbook, newhome, reclus, giantinsect]"
2,15602,Grumpier Old Men,"[romance, comedy]","[waltermatthau, jacklemmon, ann-margret]",[howarddeutch],[markstevenjohnson],"[fish, bestfriend, duringcreditssting, oldmen]"
3,31357,Waiting to Exhale,"[comedy, drama, romance]","[whitneyhouston, angelabassett, lorettadevine]",[forestwhitaker],"[ronaldbass, terrymcmillan]","[baseonnovel, interracirelationship, singlmother, divorc, chickflick]"
4,11862,Father of the Bride Part II,[comedy],"[stevemartin, dianekeaton, martinshort]",[charlesshyer],"[alberthackett, nancymeyers]","[babi, midlifcrisi, confid, age, daughter, motherdaughterrelationship, pregnanc, gynecologist]"


4. Metadata Soup

All Metadata (Directors, Genres, Actors, Writers, Keywords)

In [95]:
def create_metadata_soup(row):
    directors = [clean_name(d) for d in row['directors']] * 2
    genres = [clean_name(g) for g in row['genres']] * 2
    actors = [clean_name(a) for a in row['cast']] * 2
    writers = [clean_name(w) for w in row['writers']]
    keywords = [clean_name(k) for k in row['keywords']]
    return ' '.join(directors + genres + actors + writers + keywords)

df_keywords['metadata_soup_all'] = df_keywords.apply(create_metadata_soup, axis=1)

df_keywords[['id', 'title', 'metadata_soup_all']].head()


Unnamed: 0,id,title,metadata_soup_all
0,862,Toy Story,johnlasseter johnlasseter animation comedy family animation comedy family tomhanks timallen donrickles tomhanks timallen donrickles joelcohen andrewstanton josswhedon alecsokolow jealousi toy boy friendship friend rivalri boynextdoor toycometolife
1,8844,Jumanji,joejohnston joejohnston adventure fantasy family adventure fantasy family robinwilliams jonathanhyde kirstendunst robinwilliams jonathanhyde kirstendunst chrisvanallsburg gregtaylor jimstrain jonathanhensleigh boardgame disappear baseonchildrenbook newhome reclus giantinsect
2,15602,Grumpier Old Men,howarddeutch howarddeutch romance comedy romance comedy waltermatthau jacklemmon ann-margret waltermatthau jacklemmon ann-margret markstevenjohnson fish bestfriend duringcreditssting oldmen
3,31357,Waiting to Exhale,forestwhitaker forestwhitaker comedy drama romance comedy drama romance whitneyhouston angelabassett lorettadevine whitneyhouston angelabassett lorettadevine ronaldbass terrymcmillan baseonnovel interracirelationship singlmother divorc chickflick
4,11862,Father of the Bride Part II,charlesshyer charlesshyer comedy comedy stevemartin dianekeaton martinshort stevemartin dianekeaton martinshort alberthackett nancymeyers babi midlifcrisi confid age daughter motherdaughterrelationship pregnanc gynecologist


In [96]:
# Find rows where 'metadata_soup' is empty or null
empty_metadata_soup_count = df_keywords['metadata_soup_all'].isna().sum() + df_keywords['metadata_soup_all'].apply(lambda x: x.strip() == '').sum()
print(f"Number of rows with empty or null metadata_soup: {empty_metadata_soup_count}")

Number of rows with empty or null metadata_soup: 10


People Metadata (Directors, Actors, Writers)

In [97]:
def create_metadata_soup(row):
    directors = [clean_name(d) for d in row['directors']] * 2
    actors = [clean_name(a) for a in row['cast']] * 2
    writers = [clean_name(w) for w in row['writers']]
    return ' '.join(directors + actors + writers)

df_keywords['metadata_soup_people'] = df_keywords.apply(create_metadata_soup, axis=1)

df_keywords[['id', 'title', 'metadata_soup_people']].head()


Unnamed: 0,id,title,metadata_soup_people
0,862,Toy Story,johnlasseter johnlasseter tomhanks timallen donrickles tomhanks timallen donrickles joelcohen andrewstanton josswhedon alecsokolow
1,8844,Jumanji,joejohnston joejohnston robinwilliams jonathanhyde kirstendunst robinwilliams jonathanhyde kirstendunst chrisvanallsburg gregtaylor jimstrain jonathanhensleigh
2,15602,Grumpier Old Men,howarddeutch howarddeutch waltermatthau jacklemmon ann-margret waltermatthau jacklemmon ann-margret markstevenjohnson
3,31357,Waiting to Exhale,forestwhitaker forestwhitaker whitneyhouston angelabassett lorettadevine whitneyhouston angelabassett lorettadevine ronaldbass terrymcmillan
4,11862,Father of the Bride Part II,charlesshyer charlesshyer stevemartin dianekeaton martinshort stevemartin dianekeaton martinshort alberthackett nancymeyers


In [98]:
# Find rows where 'metadata_soup' is empty or null
empty_metadata_soup_count = df_keywords['metadata_soup_people'].isna().sum() + df_keywords['metadata_soup_people'].apply(lambda x: x.strip() == '').sum()
print(f"Number of rows with empty or null metadata_soup: {empty_metadata_soup_count}")

Number of rows with empty or null metadata_soup: 60


Themes Metadata (Genres, Keywords)

In [99]:
def create_metadata_soup(row):
    title = [clean_title(row['title'])] # Add the title
    genres = [clean_name(g) for g in row['genres']] * 2
    keywords = [clean_name(k) for k in row['keywords']] * 2
    return ' '.join(title + genres + keywords)

df_keywords['metadata_soup_theme'] = df_keywords.apply(create_metadata_soup, axis=1)

df_keywords[['id', 'title', 'metadata_soup_theme']].head()


Unnamed: 0,id,title,metadata_soup_theme
0,862,Toy Story,toy story animation comedy family animation comedy family jealousi toy boy friendship friend rivalri boynextdoor toycometolife jealousi toy boy friendship friend rivalri boynextdoor toycometolife
1,8844,Jumanji,jumanji adventure fantasy family adventure fantasy family boardgame disappear baseonchildrenbook newhome reclus giantinsect boardgame disappear baseonchildrenbook newhome reclus giantinsect
2,15602,Grumpier Old Men,grumpier old men romance comedy romance comedy fish bestfriend duringcreditssting oldmen fish bestfriend duringcreditssting oldmen
3,31357,Waiting to Exhale,waiting to exhale comedy drama romance comedy drama romance baseonnovel interracirelationship singlmother divorc chickflick baseonnovel interracirelationship singlmother divorc chickflick
4,11862,Father of the Bride Part II,father of the bride part ii comedy comedy babi midlifcrisi confid age daughter motherdaughterrelationship pregnanc gynecologist babi midlifcrisi confid age daughter motherdaughterrelationship pregnanc gynecologist


In [100]:
# Find rows where 'metadata_soup' is empty or null
empty_metadata_soup_count = df_keywords['metadata_soup_theme'].isna().sum() + df_keywords['metadata_soup_theme'].apply(lambda x: x.strip() == '').sum()
print(f"Number of rows with empty or null metadata_soup: {empty_metadata_soup_count}")

Number of rows with empty or null metadata_soup: 0


5. CountVectorizer

In [101]:
count_vectorizer = CountVectorizer(stop_words='english')

metadata_matrix_all = count_vectorizer.fit_transform(df_keywords['metadata_soup_all'])
print(f"Metadata matrix shape: {metadata_matrix_all.shape}")
metadata_matrix_people = count_vectorizer.fit_transform(df_keywords['metadata_soup_people'])
print(f"Metadata matrix (people) shape: {metadata_matrix_people.shape}")
metadata_matrix_theme = count_vectorizer.fit_transform(df_keywords['metadata_soup_theme'])
print(f"Metadata matrix (themes) shape: {metadata_matrix_theme.shape}")

Metadata matrix shape: (16960, 42151)
Metadata matrix (people) shape: (16960, 35413)
Metadata matrix (themes) shape: (16960, 16337)


In [102]:
df_keywords = df_keywords.reset_index(drop=True)
indices = pd.Series(df_keywords.index, index=df_keywords['title'])

6. Save Metadata matrices

In [103]:
with open('data/metadata_vector_matrix_all.pkl', 'wb') as f:
    pickle.dump(metadata_matrix_all, f)

with open('data/metadata_vector_matrix_people.pkl', 'wb') as f:
    pickle.dump(metadata_matrix_people, f)

with open('data/metadata_vector_matrix_theme.pkl', 'wb') as f:
    pickle.dump(metadata_matrix_theme, f)

with open('data/metadata_indices.pkl', 'wb') as f:
    pickle.dump(indices, f)

7. FAISS

In [104]:
# Function to find movie index 
def find_movie_index(title, indices_map):
    if title in indices_map:
        idx = indices_map[title]
        return idx.iloc[0] if isinstance(idx, pd.Series) else idx
    return None

In [105]:
def get_content_recommendations(title, matrix_dense, faiss_index, n=10):
    """
    Returns the top N most similar movies based ONLY on metadata features using Faiss.
    """
    # Find the index for the input title
    idx = find_movie_index(title, indices)

    # Get the metadata vector for the query movie
    query_vector = matrix_dense[idx].reshape(1, -1)

    # Search the Faiss index for the k nearest neighbors
    # We search for n+1 because the first result will be the movie itself
    distances, movie_indices = faiss_index.search(query_vector, n + 1)

    # Filter out invalid indices (-1 can be returned by Faiss) and ensure bounds
    # The results are returned as a list within a list, so we extract them and apply the mask
    valid_mask = (movie_indices[0] != -1) & (movie_indices[0] < len(df_keywords))
    movie_indices = movie_indices[0][valid_mask]
    distances = distances[0][valid_mask]

    # Exclude self-match (1st result)if present
    self_mask = (movie_indices != int(idx))
    movie_indices = movie_indices[self_mask]
    similarity_scores = distances[self_mask]

    # Take top N after excluding self
    movie_indices = movie_indices[:n]
    similarity_scores = similarity_scores[:n]

    similarity_scores = [round(score, 2) for score in similarity_scores]

    # Ensure indices are valid for iloc
    valid_iloc_indices = [i for i in movie_indices if i < len(df_keywords)]
    if not valid_iloc_indices:
        return pd.DataFrame(columns=['title', 'id', 'similarity'])
    results_df = df_keywords.iloc[valid_iloc_indices][['title', 'id']].copy()

    # Ensure similarity_scores aligns with the potentially filtered results_df
    results_df['similarity'] = similarity_scores[:len(results_df)]

    return results_df

All Metadata (Directors, Genres, Actors, Writers, Keywords)

In [106]:
# Convert the sparse matrix directly to dense float32 for Faiss
metadata_matrix_dense_all = metadata_matrix_all.astype(np.float32).toarray()

# Build Faiss Index using only metadata vectors
faiss.normalize_L2(metadata_matrix_dense_all) # Normalize for cosine similarity via inner product
embedding_dimension_all = metadata_matrix_dense_all.shape[1]
faiss_index_all = faiss.IndexFlatIP(embedding_dimension_all) # IP = Inner Product
faiss_index_all.add(metadata_matrix_dense_all)
print(f"Faiss index created with {faiss_index_all.ntotal} vectors.")

Faiss index created with 16960 vectors.


People Metadata (Directors, Actors, Writers)

In [107]:
# Convert the sparse matrix directly to dense float32 for Faiss
metadata_matrix_dense_people = metadata_matrix_people.astype(np.float32).toarray()

# Build Faiss Index using only metadata vectors
faiss.normalize_L2(metadata_matrix_dense_people) # Normalize for cosine similarity via inner product
embedding_dimension_people = metadata_matrix_dense_people.shape[1]
faiss_index_people = faiss.IndexFlatIP(embedding_dimension_people) # IP = Inner Product
faiss_index_people.add(metadata_matrix_dense_people)
print(f"Faiss index created with {faiss_index_people.ntotal} vectors.")

Faiss index created with 16960 vectors.


Theme Metadata (Genres, Keywords)

In [108]:
# Convert the sparse matrix directly to dense float32 for Faiss
metadata_matrix_dense_theme = metadata_matrix_theme.astype(np.float32).toarray()

# Build Faiss Index using only metadata vectors
faiss.normalize_L2(metadata_matrix_dense_theme) # Normalize for cosine similarity via inner product
embedding_dimension_theme = metadata_matrix_dense_theme.shape[1]
faiss_index_theme = faiss.IndexFlatIP(embedding_dimension_theme) # IP = Inner Product
faiss_index_theme.add(metadata_matrix_dense_theme)
print(f"Faiss index created with {faiss_index_theme.ntotal} vectors.")

Faiss index created with 16960 vectors.


8. Sample Recommendations

In [109]:
# Example 1: A popular sci-fi movie
print('\n--- Recommendations for "Interstellar" all ---')
display(get_content_recommendations("Interstellar", metadata_matrix_dense_all, faiss_index_all))

print('\n--- Recommendations for "Interstellar" people ---')
display(get_content_recommendations("Interstellar", metadata_matrix_dense_people, faiss_index_people))

print('\n--- Recommendations for "Interstellar" theme ---')
display(get_content_recommendations("Interstellar", metadata_matrix_dense_theme, faiss_index_theme))


--- Recommendations for "Interstellar" all ---


Unnamed: 0,title,id,similarity
11988,The Martian,286217,0.38
6577,Inception,27205,0.3
13954,The Perfect 46,320181,0.29
15644,Passengers,274870,0.28
11373,Cody the Robosapien,187462,0.28
947,Contact,686,0.27
5385,Doctor Who,15691,0.27
13613,Doctor Who: Last Christmas,317182,0.26
11594,Midnight Special,245703,0.26
3320,Babylon 5: A Call to Arms,10916,0.26



--- Recommendations for "Interstellar" people ---


Unnamed: 0,title,id,similarity
4677,The Prestige,1124,0.44
7501,The Dark Knight Rises,49026,0.43
1443,Following,11660,0.42
2087,Memento,77,0.42
5279,The Dark Knight,155,0.41
4110,Batman Begins,272,0.39
16729,Dunkirk,374720,0.38
6577,Inception,27205,0.38
2473,Insomnia,320,0.31
5677,Passengers,13944,0.21



--- Recommendations for "Interstellar" theme ---


Unnamed: 0,title,id,similarity
120,Apollo 13,568,0.34
8823,Love,54320,0.33
5341,Stargate: The Ark of Truth,13001,0.32
15903,Passage to Mars,399623,0.3
8381,The End of Love,84191,0.3
1551,The Astronaut's Wife,2900,0.3
6883,The Scientist,56320,0.29
8569,Oblivion,75612,0.29
1912,Space Cowboys,5551,0.29
11988,The Martian,286217,0.29


In [110]:
# Example 2: A romantic comedy
print('\n--- Recommendations for "10 Things I Hate About You" all ---')
display(get_content_recommendations("10 Things I Hate About You", metadata_matrix_dense_all, faiss_index_all))

print('\n--- Recommendations for "10 Things I Hate About You" people ---')
display(get_content_recommendations("10 Things I Hate About You", metadata_matrix_dense_people, faiss_index_people))

print('\n--- Recommendations for "10 Things I Hate About You" theme ---')
display(get_content_recommendations("10 Things I Hate About You", metadata_matrix_dense_theme, faiss_index_theme))


--- Recommendations for "10 Things I Hate About You" all ---


Unnamed: 0,title,id,similarity
9040,Don Jon,138697,0.53
5931,(500) Days of Summer,19913,0.43
6254,The Matriarch,148077,0.41
705,Angels in the Outfield,24795,0.38
13576,Hello Lonesome,117340,0.38
7367,50/50,40807,0.37
7163,Elektra Luxx,56272,0.36
5177,As You Like It,19103,0.35
1706,Down to You,10472,0.35
8328,Kiss the Bride,19844,0.35



--- Recommendations for "10 Things I Hate About You" people ---


Unnamed: 0,title,id,similarity
9040,Don Jon,138697,0.55
705,Angels in the Outfield,24795,0.37
7367,50/50,40807,0.36
69,The Juror,9623,0.36
5931,(500) Days of Summer,19913,0.35
7963,Looper,59967,0.33
7163,Elektra Luxx,56272,0.33
6577,Inception,27205,0.33
4857,The Lookout,8270,0.33
4461,Brick,9270,0.33



--- Recommendations for "10 Things I Hate About You" theme ---


Unnamed: 0,title,id,similarity
12888,10 Attitudes,19828,0.42
16816,Between Us,390526,0.42
14423,All About E,320639,0.42
3614,Once Around,39233,0.42
3676,Tromeo & Juliet,16233,0.42
1409,The Other Sister,18417,0.41
1588,Romance,171982,0.41
1787,Whatever It Takes,16222,0.41
1595,Molly,44857,0.41
1577,Mumford,24071,0.41


In [111]:
# Example 3: An animated film
print('\n--- Recommendations for "Toy Story" all ---')
display(get_content_recommendations("Toy Story", metadata_matrix_dense_all, faiss_index_all))

print('\n--- Recommendations for "Toy Story" people ---')
display(get_content_recommendations("Toy Story", metadata_matrix_dense_people, faiss_index_people))

print('\n--- Recommendations for "Toy Story" theme ---')
display(get_content_recommendations("Toy Story", metadata_matrix_dense_theme, faiss_index_theme))


--- Recommendations for "Toy Story" all ---


Unnamed: 0,title,id,similarity
9141,Toy Story of Terror!,213121,0.62
1656,Toy Story 2,863,0.59
6529,Toy Story 3,10193,0.56
10658,Partysaurus Rex,130925,0.49
10660,Toy Story That Time Forgot,256835,0.48
15777,Dexter's Laboratory: Ego Trip,56828,0.42
1348,A Bug's Life,9487,0.41
10177,Small Fry,82424,0.4
3380,Garfield,8920,0.38
7195,Cars 2,49013,0.38



--- Recommendations for "Toy Story" people ---


Unnamed: 0,title,id,similarity
1656,Toy Story 2,863,0.55
9141,Toy Story of Terror!,213121,0.51
6529,Toy Story 3,10193,0.5
10658,Partysaurus Rex,130925,0.4
10660,Toy Story That Time Forgot,256835,0.39
713,That Thing You Do!,9591,0.39
7170,Larry Crowne,59861,0.38
6816,Crazy on the Outside,35458,0.37
1348,A Bug's Life,9487,0.33
10177,Small Fry,82424,0.28



--- Recommendations for "Toy Story" theme ---


Unnamed: 0,title,id,similarity
9141,Toy Story of Terror!,213121,0.56
10660,Toy Story That Time Forgot,256835,0.48
14092,The Tangerine Bear: Home in Time for Christmas!,36301,0.47
16861,Banana,54551,0.47
11450,Stitch! The Movie,15567,0.45
7132,Open Season 3,51170,0.45
1123,Meet the Deedles,40688,0.45
6529,Toy Story 3,10193,0.44
14400,Hammy's Boomerang Adventure,85693,0.44
12254,Big Top Scooby-Doo!,119321,0.44
