In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import ast
import difflib
import nltk
from nltk.stem import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

In [2]:
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')

In [3]:
movies = movies.merge(credits,on ='movie_id')

In [4]:
movies= movies[['movie_id','title_x','genres','keywords','overview','cast','crew']]

In [5]:
movies.dropna(inplace=True)

In [6]:
def convert(text):
    l=[]
    for i in ast.literal_eval(text):
        l.append(i['name'])
    return l

In [7]:
movies['genres']=movies['genres'].apply(convert)
movies['keywords']=movies['keywords'].apply(convert)

In [8]:
def convert_cast(text):
    l=[]
    counter=0
    for i in ast.literal_eval(text):
        if counter < 5:
            l.append(i['name'])
            counter += 1
    return l

In [9]:
movies['cast']=movies['cast'].apply(convert_cast)

In [10]:
def fetch_director(text):
    l=[]
    for i in ast.literal_eval(text):
        if i['job']== 'Director' :
            l.append(i['name'])
            break
    return l

In [11]:
movies['crew']=movies['crew'].apply(fetch_director)
movies['overview']=movies['overview'].apply(lambda x:x.split())

In [12]:
def remove_space(word):
    l = []
    for i in word:
        l.append(i.replace(" ","_"))
    return l

In [13]:
movies['crew']=movies['crew'].apply(remove_space)
movies['cast']=movies['cast'].apply(remove_space)
movies['genres']=movies['genres'].apply(remove_space)
movies['keywords']=movies['keywords'].apply(remove_space)

In [14]:
movies['tags'] = movies['overview']+movies['genres']+movies['keywords']+movies['cast']+movies['crew']

In [15]:
new_df=movies[['movie_id','title_x','tags']]

In [16]:
new_df['tags']=new_df['tags'].apply(lambda x: " ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags']=new_df['tags'].apply(lambda x: " ".join(x))


In [17]:
new_df['tags']=new_df['tags'].apply(lambda x:x.lower())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags']=new_df['tags'].apply(lambda x:x.lower())


In [18]:
ps = PorterStemmer()

def stems(text):
    l = []
    for i in text.split():
        l.append(ps.stem(i))
    return " ".join(l)   

In [19]:
new_df['tags']=new_df['tags'].apply(stems)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['tags']=new_df['tags'].apply(stems)


In [20]:
cv =CountVectorizer(max_features=5000,stop_words='english')
vector = cv.fit_transform(new_df['tags']).toarray()

In [21]:
similarly = cosine_similarity(vector)

In [22]:
import difflib

def normalize_name(name):
    # Lowercase, remove underscores, and strip spaces for comparison
    return name.replace('_', ' ').lower().strip()

def recommend(query, n_recommendations=5):
    query_norm = normalize_name(query)
    titles = new_df['title_x'].str.lower().str.strip()
    matches = new_df[titles == query_norm]

    # Flatten all cast and crew names for searching
    all_cast = set()
    all_crew = set()
    cast_to_titles = {}
    crew_to_titles = {}

    for idx, row in movies.iterrows():
        for cast_member in row['cast']:
            norm_cast = normalize_name(cast_member)
            all_cast.add(norm_cast)
            cast_to_titles.setdefault(norm_cast, []).append(row['title_x'])
        for crew_member in row['crew']:
            norm_crew = normalize_name(crew_member)
            all_crew.add(norm_crew)
            crew_to_titles.setdefault(norm_crew, []).append(row['title_x'])
            
    # Check for exact match in cast
    if query_norm in all_cast:
        print(f"Movies featuring '{query.title()}':")
        for title in cast_to_titles[query_norm]:
            print(f"- {title}")
        return
        
    # Check for exact match in crew
    if query_norm in all_crew:
        print(f"Movies directed by '{query.title()}':")
        for title in crew_to_titles[query_norm]:
            print(f"- {title}")
        return

    # Suggest close matches for cast
    close_cast = difflib.get_close_matches(query_norm, all_cast, n=4, cutoff=0.7)
    if close_cast:
        print("Did you mean (cast):")
        for idx, match in enumerate(close_cast, 1):
            print(f"{idx}. {match.title()}")
        return
        
    # Suggest close matches for crew
    close_crew = difflib.get_close_matches(query_norm, all_crew, n=4, cutoff=0.7)
    if close_crew:
        print("Did you mean (crew):")
        for idx, match in enumerate(close_crew, 1):
            print(f"{idx}. {match.title()}")
        return
        
    # Usual title-based recommendation
    if matches.empty:
        close_matches = difflib.get_close_matches(query_norm, titles, n=4, cutoff=0.7)
        if close_matches:
            print("Did you mean (title):")
            for idx, match in enumerate(close_matches, 1):
                orig_title = new_df.iloc[titles.tolist().index(match)]['title_x']
                print(f"{idx}. {orig_title}")
        else:
            print("Movie, cast, or crew not found and no similar names found.")
        return

    index = matches.index[0]
    distances = sorted(list(enumerate(similarly[index])), reverse=True, key=lambda x: x[1])
    recommendations = []
    for i in distances[1:n_recommendations+1]:
        rec_title = new_df.iloc[i[0]].title_x
        print(f"- {rec_title}")
        recommendations.append(rec_title)

In [23]:
recommend("Spider-Man")

- Spider-Man 3
- Spider-Man 2
- The Amazing Spider-Man 2
- Arachnophobia
- The Amazing Spider-Man


In [24]:
recommend("SpiderMan")

Did you mean (title):
1. Spider-Man
2. Spider-Man 3
3. Spider-Man 2
4. Superman


In [25]:
recommend("Johnny Depp")

Movies featuring 'Johnny Depp':
- Pirates of the Caribbean: At World's End
- Pirates of the Caribbean: Dead Man's Chest
- The Lone Ranger
- Pirates of the Caribbean: On Stranger Tides
- Alice in Wonderland
- Alice Through the Looking Glass
- Charlie and the Chocolate Factory
- Dark Shadows
- Rango
- Pirates of the Caribbean: The Curse of the Black Pearl
- Public Enemies
- The Tourist
- Transcendence
- Mortdecai
- Black Mass
- From Hell
- Secret Window
- The Ninth Gate
- Donnie Brasco
- The Astronaut's Wife
- Blow
- Corpse Bride
- The Imaginarium of Doctor Parnassus
- Once Upon a Time in Mexico
- Chocolat
- Finding Neverland
- Don Juan DeMarco
- The Libertine
- Edward Scissorhands
- Fear and Loathing in Las Vegas
- Ed Wood
- What's Eating Gilbert Grape


In [26]:
import pickle

pickle.dump(new_df,open('movie_list.pkl','wb'))
pickle.dump(similarly,open('similarity.pkl','wb'))

pickle.dump(new_df,open('D:/Projects/Movie_Recommendation/backend/movie_list.pkl','wb'))
pickle.dump(similarly,open('D:/Projects/Movie_Recommendation/backend/similarity.pkl','wb'))

In [27]:
recommend("Iron Man")

Did you mean (cast):
1. Ron Morgan
2. Byron Mann
3. Ron Perlman
4. Ron Leibman


In [28]:
recommend("IronMan")

Did you mean (cast):
1. Ron Morgan
2. Byron Mann


In [29]:
recommend("Interstellar")

- Silent Running
- Guardians of the Galaxy
- Space Cowboys
- Apollo 13
- A.I. Artificial Intelligence


In [34]:
recommend("Leonardo DiCaprio")

Movies featuring 'Leonardo Dicaprio':
- Titanic
- The Great Gatsby
- Inception
- The Revenant
- The Aviator
- Django Unchained
- Blood Diamond
- The Wolf of Wall Street
- Gangs of New York
- The Departed
- Shutter Island
- Body of Lies
- Catch Me If You Can
- The Beach
- Revolutionary Road
- The Man in the Iron Mask
- J. Edgar
- The Quick and the Dead
- Marvin's Room
- Romeo + Juliet
- Celebrity
- What's Eating Gilbert Grape


In [None]:
recommend("Leonardo DiCaprio")

Movies featuring 'Leonardo Dicaprio':
- Titanic
- The Great Gatsby
- Inception
- The Revenant
- The Aviator
- Django Unchained
- Blood Diamond
- The Wolf of Wall Street
- Gangs of New York
- The Departed
- Shutter Island
- Body of Lies
- Catch Me If You Can
- The Beach
- Revolutionary Road
- The Man in the Iron Mask
- J. Edgar
- The Quick and the Dead
- Marvin's Room
- Romeo + Juliet
- Celebrity
- What's Eating Gilbert Grape
