In [1]:
#import libraries
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer 
from sklearn.metrics.pairwise import cosine_similarity 


In [2]:
#load cleaned dataset 
df = pd.read_csv("./cleaned_data.csv")
df.head()

Unnamed: 0,id,genres,keywords,original_language,original_title,overview,popularity,release_date,runtime,spoken_languages,tagline,vote_average,vote_count,cast,director,combined_features
0,0,"['Action', 'Adventure', 'Fantasy', 'Science', ...",culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",4.050184,2009-12-10 00:00:00,2.441353,"[{'iso_639_1': 'en', 'name': 'English'}, {'iso...",Enter the World of Pandora.,0.929848,8.993617,"['Sam', 'Worthington', 'Zoe', 'Saldana', 'Sigo...",James Cameron,Avatar culture clash future space war space co...
1,1,"['Adventure', 'Fantasy', 'Action']",ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",3.693426,2007-05-19 00:00:00,2.751591,"[{'iso_639_1': 'en', 'name': 'English'}]","At the end of the world, the adventure begins.",0.677787,3.083496,"['Johnny', 'Depp', 'Orlando', 'Bloom', 'Keira'...",Gore Verbinski,Pirates of the Caribbean: At World's End ocean...
2,2,"['Action', 'Adventure', 'Crime']",spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,2.697272,2015-10-26 00:00:00,1.820876,"[{'iso_639_1': 'fr', 'name': 'Français'}, {'is...",A Plan No One Escapes,0.173665,3.055969,"['Daniel', 'Craig', 'Christoph', 'Waltz', 'L\\...",Sam Mendes,Spectre spy based on novel secret agent sequel...
3,3,"['Action', 'Crime', 'Drama', 'Thriller']",dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,2.852359,2012-07-16 00:00:00,2.574312,"[{'iso_639_1': 'en', 'name': 'English'}]",The Legend Ends,1.265929,6.812539,"['Christian', 'Bale', 'Michael', 'Caine', 'Gar...",Christopher Nolan,The Dark Knight Rises dc comics crime fighter ...
4,4,"['Action', 'Adventure', 'Science', 'Fiction']",based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",0.703765,2012-03-07 00:00:00,1.111759,"[{'iso_639_1': 'en', 'name': 'English'}]","Lost in our world, found in another.",0.005625,1.159873,"['Taylor', 'Kitsch', 'Lynn', 'Collins', 'Saman...",Andrew Stanton,John Carter based on novel mars medallion spac...


In [3]:
# Vectorize combined features column using TF-IDF 
tfidfvec = TfidfVectorizer() 
tfidf_matrix = tfidfvec.fit_transform((df["combined_features"])) 

# compute cosine similarity between vectors 
sim_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix) 

In [4]:
def recommendations(movie_title, sim=sim_matrix): 	
    """
    This function takes in the title of a movie and returns recommendations for the input movie
    params: movie title, similarity matrix
    returns: top 5 recommendations based on similarity score
    rtype: list    
    """
    
    # Find the index of the movie that matches the movie name
    idx = df[df['original_title'] == movie_title].index[0]
    
    # get the similarity score between target movie and every other movie
    sim_scores = list(enumerate(sim[idx]))
    
    # Sort the movies based on the similarity scores (in descending order)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top 5 most similar movies (excluding the movie itself)
    top_5_movies = sim_scores[1:6]
    
    # Get movie titles for the top 5 similar movies
    recommended_movies = [df['original_title'].iloc[i[0]] for i in top_5_movies]
    
    return recommended_movies


In [5]:
recommendations("John Carter")

['Mission to Mars',
 'The Princess and the Frog',
 'Dear John',
 'Princess Kaiulani',
 'The Last Days on Mars']