In [75]:
import pandas as pd
import numpy as np
import ast
import nltk
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import difflib


In [76]:
movies = pd.read_csv("tmdb_5000_movies.csv")
credits = pd.read_csv("tmdb_5000_credits.csv")

movies = movies.merge(credits, on="title")


In [77]:
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]
movies.dropna(inplace=True)


In [78]:
def convert(obj):
    return [i['name'] for i in ast.literal_eval(obj)]

def convert_cast(obj):
    return [i['name'] for i in ast.literal_eval(obj)[:3]]

def fetch_director(obj):
    for i in ast.literal_eval(obj):
        if i['job'] == 'Director':
            return [i['name']]
    return []


In [79]:
movies['genres'] = movies['genres'].apply(convert)
movies['keywords'] = movies['keywords'].apply(convert)
movies['cast'] = movies['cast'].apply(convert_cast)
movies['crew'] = movies['crew'].apply(fetch_director)
movies['overview'] = movies['overview'].apply(lambda x: x.split())

for feature in ['genres', 'keywords', 'cast', 'crew']:
    movies[feature] = movies[feature].apply(lambda x: [i.replace(" ", "") for i in x])


In [80]:
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
new_df = movies[['movie_id', 'title', 'tags', 'genres']]
new_df['tags'] = new_df['tags'].apply(lambda x: " ".join(x)).str.lower()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(lambda x: " ".join(x)).str.lower()


In [81]:
ps = PorterStemmer()
def stem(text):
    return " ".join([ps.stem(word) for word in text.split()])

new_df['tags'] = new_df['tags'].apply(stem)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags'] = new_df['tags'].apply(stem)


In [82]:
tfidf = TfidfVectorizer(max_features=5000, stop_words='english')
vectors = tfidf.fit_transform(new_df['tags']).toarray()
similarity = cosine_similarity(vectors)


In [None]:
def recommend(movie):
    movie = movie.lower()
    all_titles = new_df['title'].str.lower().tolist()
    
    match = difflib.get_close_matches(movie, all_titles, n=1, cutoff=0.6)
    if not match:
        print("Movie not found.")
        return []

    movie_index = new_df[new_df['title'].str.lower() == match[0]].index[0]
    movie_genres = set(new_df.loc[movie_index, 'genres'])

    distances = list(enumerate(similarity[movie_index]))
    sorted_movies = sorted(distances, key=lambda x: x[1], reverse=True)[1:25]

    recommended = []
    for i in sorted_movies:
        idx = i[0]
        other_genres = set(new_df.loc[idx, 'genres'])
        if movie_genres & other_genres:
            recommended.append(new_df.iloc[idx].title)
        if len(recommended) == 5:
            break

    print(f"\n🎬 Top 5 recommendations for '{new_df.iloc[movie_index].title}':\n")
    for rec in recommended:
        print("👉", rec)

    return recommended


In [84]:
recommend("The Conjuring")


🎬 Top 5 recommendations for 'The Conjuring':

👉 The Conjuring 2
👉 Ouija
👉 The Amityville Horror
👉 Grave Encounters
👉 The Vatican Exorcisms


['The Conjuring 2',
 'Ouija',
 'The Amityville Horror',
 'Grave Encounters',
 'The Vatican Exorcisms']

In [85]:
recommend("Pirates of the Caribbean: The Curse of the Black Pearl")


🎬 Top 5 recommendations for 'Pirates of the Caribbean: The Curse of the Black Pearl':

👉 Pirates of the Caribbean: Dead Man's Chest
👉 VeggieTales: The Pirates Who Don't Do Anything
👉 The Pirates! In an Adventure with Scientists!
👉 Pirates of the Caribbean: At World's End
👉 Pirates of the Caribbean: On Stranger Tides


["Pirates of the Caribbean: Dead Man's Chest",
 "VeggieTales: The Pirates Who Don't Do Anything",
 'The Pirates! In an Adventure with Scientists!',
 "Pirates of the Caribbean: At World's End",
 'Pirates of the Caribbean: On Stranger Tides']

In [86]:
recommend("Mad Max: Fury Road")


🎬 Top 5 recommendations for 'Mad Max: Fury Road':

👉 The Book of Eli
👉 9
👉 Terminator Salvation
👉 Zombie Hunter
👉 Resident Evil: Extinction


['The Book of Eli',
 '9',
 'Terminator Salvation',
 'Zombie Hunter',
 'Resident Evil: Extinction']

In [87]:
recommend("Mean Girls")


🎬 Top 5 recommendations for 'Mean Girls':

👉 Sleepover
👉 Freaky Friday
👉 Napoleon Dynamite
👉 Bandslam
👉 Confessions of a Teenage Drama Queen


['Sleepover',
 'Freaky Friday',
 'Napoleon Dynamite',
 'Bandslam',
 'Confessions of a Teenage Drama Queen']

In [88]:
recommend("Inception")


🎬 Top 5 recommendations for 'Inception':

👉 12 Rounds
👉 Premium Rush
👉 Stolen
👉 Looper
👉 The Walk


['12 Rounds', 'Premium Rush', 'Stolen', 'Looper', 'The Walk']

In [89]:
recommend("The Avengers")


🎬 Top 5 recommendations for 'The Avengers':

👉 Avengers: Age of Ultron
👉 Captain America: Civil War
👉 Iron Man 3
👉 Captain America: The First Avenger
👉 Captain America: The Winter Soldier


['Avengers: Age of Ultron',
 'Captain America: Civil War',
 'Iron Man 3',
 'Captain America: The First Avenger',
 'Captain America: The Winter Soldier']

In [90]:
recommend("Titanic")


🎬 Top 5 recommendations for 'Titanic':

👉 Ghost Ship
👉 Poseidon
👉 The Rose
👉 Dear Frankie
👉 The Bounty


['Ghost Ship', 'Poseidon', 'The Rose', 'Dear Frankie', 'The Bounty']

In [91]:
recommend("Toy Story")



🎬 Top 5 recommendations for 'Toy Story':

👉 Toy Story 3
👉 Toy Story 2
👉 Small Soldiers
👉 The 41–Year–Old Virgin Who Knocked Up Sarah Marshall and Felt Superbad About It
👉 Child's Play


['Toy Story 3',
 'Toy Story 2',
 'Small Soldiers',
 'The 41–Year–Old Virgin Who Knocked Up Sarah Marshall and Felt Superbad About It',
 "Child's Play"]