<a href="https://colab.research.google.com/github/ShaileeGavnekar/Movie_Recommendation_System/blob/main/Movie_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ast
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the datasets
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')

# Merge the datasets on the 'title' column
movies = movies.merge(credits, on='title')

# Select necessary columns
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]
movies.dropna(inplace=True)

# Define helper functions with error handling for malformed data
def convert(text):
    L = []
    try:
        for i in ast.literal_eval(text):
            L.append(i['name'])
    except (ValueError, SyntaxError):
        pass
    return L

def convert3(text):
    L = []
    try:
        for i in ast.literal_eval(text):
            if len(L) < 3:
                L.append(i['name'])
    except (ValueError, SyntaxError):
        pass
    return L

def fetch_director(text):
    L = []
    try:
        for i in ast.literal_eval(text):
            if i['job'] == 'Director':
                L.append(i['name'])
    except (ValueError, SyntaxError):
        pass
    return L

def collapse(L):
    return [i.replace(" ", "") for i in L]

# Apply the functions to the columns
movies['genres'] = movies['genres'].apply(convert)
movies['keywords'] = movies['keywords'].apply(convert)
movies['cast'] = movies['cast'].apply(convert3)
movies['crew'] = movies['crew'].apply(fetch_director)

# Remove spaces from words for uniformity
movies['cast'] = movies['cast'].apply(collapse)
movies['crew'] = movies['crew'].apply(collapse)
movies['genres'] = movies['genres'].apply(collapse)
movies['keywords'] = movies['keywords'].apply(collapse)

# Process 'overview' and create 'tags' column
movies['overview'] = movies['overview'].apply(lambda x: x.split())
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']

# Create a new DataFrame 'new' with only relevant columns
new = movies[['movie_id', 'title', 'tags']]
new['tags'] = new['tags'].apply(lambda x: " ".join(x))

# Check if 'tags' is empty
new = new[new['tags'] != '']

# Initialize CountVectorizer with max features and stop words
cv = CountVectorizer(max_features=5000, stop_words='english')
vector = cv.fit_transform(new['tags']).toarray()

# Calculate similarity matrix
similarity = cosine_similarity(vector)

# Define recommendation function with a check for movie existence
def recommend(movie):
    if movie not in new['title'].values:
        print(f"Movie '{movie}' not found in dataset.")
        return

    index = new[new['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])

    print(f"Recommendations for '{movie}':")
    for i in distances[1:6]:
        print(new.iloc[i[0]].title)

# Example usage
recommend('Minion')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new['tags'] = new['tags'].apply(lambda x: " ".join(x))


Movie 'Minion' not found in dataset.
