<a href="https://colab.research.google.com/github/Indra282002/Movie-Recommendation-System/blob/main/Movie_Recommender_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Data Preprocessing**

In [1]:
# Importing necessary libraries
import numpy as np
import pandas as pd

In [2]:
# Reading the datasets
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'tmdb_5000_movies.csv'

In [None]:
# Merging the datasets on the common column 'title'
movies = movies.merge(credits,on='title')

In [None]:
# Selecting only the required columns for analysis
movies = movies[['movie_id','title','overview','genres','keywords','cast','crew']]

In [None]:
# Dropping rows with missing values
movies.dropna(inplace = True)

In [None]:
# Checking for null values
movies.isnull().sum()

In [None]:
# Checking for duplicate rows
movies.duplicated().sum()

In [None]:
# Importing the abstract syntax tree module to safely evaluate string expressions
import ast

# Function to extract genres and keywords
def fetch_genre_keywords(obj):
  List = []
  for i in ast.literal_eval(obj):
    List.append(i['name'])
  return List;

In [None]:
# Applying the function to extract genres and keywords
movies['genres'] = movies['genres'].apply(fetch_genre_keywords)
movies['keywords'] = movies['keywords'].apply(fetch_genre_keywords)

In [None]:
# Function to extract the top 3 cast members
def fetch_cast(obj):
  List = []
  for i in ast.literal_eval(obj):
    if len(List) < 3:
      List.append(i['name'])
  return List;

In [None]:
# Applying the function to extract cast members
movies['cast'] = movies['cast'].apply(fetch_cast)

In [None]:
# Function to extract the director's name
def fetch_director(obj):
  List=[]
  for i in ast.literal_eval(obj):
    if i['job'] == 'Director':
      List.append(i['name'])
  return List;

In [None]:
# Applying the function to extract the director
movies['crew'] = movies['crew'].apply(fetch_director)

In [None]:
# Splitting the overview text into individual words
movies['overview'] = movies['overview'].apply(lambda x:x.split())

In [None]:
# Removing spaces within elements of genres, keywords, cast, and crew
movies['genres'] = movies['genres'].apply(lambda x:[i.replace(" ","") for i in x])
movies['keywords'] = movies['keywords'].apply(lambda x:[i.replace(" ","") for i in x])
movies['cast'] = movies['cast'].apply(lambda x:[i.replace(" ","") for i in x])
movies['crew'] = movies['crew'].apply(lambda x:[i.replace(" ","") for i in x])

In [None]:
# Creating a 'tags' column by combining all the relevant features
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']

In [None]:
# Selecting only relevant columns for final analysis
movies_df = movies[['movie_id','title','tags']]

In [None]:
# Converting the list of tags into a single string
movies_df['tags'] = movies_df['tags'].apply(lambda x:" ".join(x))

In [None]:
# Converting all text to lowercase
movies_df['tags'] = movies_df['tags'].apply(lambda x:x.lower())

### **Stemming the text**

In [None]:
# Importing Natural Language Toolkit library
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [None]:
# Function to apply stemming to the tags
def stem(text):
  List = []
  for i in text.split():
    List.append(ps.stem(i))
  return " ".join(List)

In [None]:
# Applying stemming
movies_df['tags'] = movies_df['tags'].apply(stem)

# **Text Vectorization**


In [None]:
# Creating a count vectorizer with a maximum of 5000 features and English stopwords

from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features = 5000,stop_words = 'english')

In [None]:
# Generating the feature vectors from the tags
movies_vector = cv.fit_transform(movies_df['tags']).toarray()

In [None]:
# Calculating cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(movies_vector)

# **Movies Recommendation Function**

In [None]:
"""
      Recommends movies similar to the input movie based on cosine similarity.
"""
def Movie_Recommendation(movie):
  # Fetching the index of the given movie
  movie_index = movies_df [movies_df['title'] == movie].index[0]
  # Calculating similarity scores with all other movies
  distances = similarity[movie_index]
  # Sorting movies by similarity scores in descending order and fetching top 5
  movies_list = sorted(list(enumerate(distances)),reverse = True,key = lambda x:x[1])[1:6]
  # Printing recommended movie titles
  for i in movies_list:
    print(movies_df.iloc[i[0]].title)

### **Testing**

In [None]:
# Recommend movies similar to "Avatar"
Movie_Recommendation('Superman')

Superman Returns
Superman II
Iron Man 2
Superman III
Superman IV: The Quest for Peace
