In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
import difflib

data=pd.read_csv('bigmovies.csv',usecols=['id', 'title','backdrop_path','tagline','overview', 'genres','keywords'])
data.shape

(1234214, 7)

In [2]:
data.dropna(inplace=True)
data.shape

(54875, 7)

In [3]:
data.isnull().sum()

id               0
title            0
backdrop_path    0
overview         0
tagline          0
genres           0
keywords         0
dtype: int64

In [4]:
selected_data=['title','genres','overview','tagline','keywords']
for i in selected_data:
  data[i]=data[i].fillna(' ')
data.shape

(54875, 7)

In [5]:
combined_data=data['title']+' '+data['genres']+' '+data['overview']+' '+data['tagline']+' '+data['keywords']

In [6]:
vector = TfidfVectorizer(stop_words='english')
feature_set = vector.fit_transform(combined_data)

# Use Nearest Neighbors instead of full cosine similarity matrix
nn_model = NearestNeighbors(metric='cosine', algorithm='brute')
nn_model.fit(feature_set)
print(nn_model)



NearestNeighbors(algorithm='brute', metric='cosine')


In [8]:
def recommend(movie_name):
    # find close match
    list_of_all_titles = data['title'].tolist()
    find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

    if not find_close_match:
        print("No close match found. Please try again.")
        return []

    close_match = find_close_match[0]
    print(f"\nShowing results for: {close_match}")
    # Prepare data as nested list

    # index of the movie
    index = data[data.title == close_match].index[0]
    distances, indices = nn_model.kneighbors(feature_set[index], n_neighbors=6)

    result = []
    for i in indices.flatten()[1:]:  # exclude the movie itself
        item = []
        temp = data.iloc[i]
        item.append(temp['title'])
        item.append(temp['genres'])
        item.append(temp['overview'])
        item.append(temp['backdrop_path'])
        result.append(item)

    return result



In [17]:
def search(movie_name):
    newvector = TfidfVectorizer(stop_words='english')
    fneweature_set = vector.fit_transform(data['title'])
    
    # Use Nearest Neighbors instead of full cosine similarity matrix
    new_nn_model = NearestNeighbors(metric='cosine', algorithm='brute')
    new_nn_model.fit(feature_set)
    
    list_of_all_titles = data['title'].tolist()
    find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
    if not find_close_match:
        print("No close match found. Please try again.")
        return []

    close_match = find_close_match[0]
    print(f"\nShowing results for: {close_match}")
    # Prepare data as nested list

    # index of the movie
    index = data[data.title == close_match].index[0]
    distances, indices = nn_model.kneighbors(feature_set[index], n_neighbors=6)

    result = []
    for i in indices.flatten()[1:]:  # exclude the movie itself
        item = []
        temp = data.iloc[i]
        item.append(temp['title'])
        item.append(temp['genres'])
        item.append(temp['overview'])
        item.append(temp['backdrop_path'])
        result.append(item)

    return result

In [18]:
print(search("Iron man"))


Showing results for: Iron Man
[['Iron Man 3', 'Action, Adventure, Science Fiction', "When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.", '/aFTYFqrWp4RS46Twm87l5e0ItYb.jpg'], ['Iron Man 2', 'Adventure, Action, Science Fiction', "With the world now aware of his dual life as the armored superhero Iron Man, billionaire inventor Tony Stark faces pressure from the government, the press and the public to share his technology with the military. Unwilling to let go of his invention, Stark, with Pepper Potts and James 'Rhodey' Rhodes at his side, must forge new alliances – and confront powerful enemies.", '/jesRqfL9v6HNnowe795xjmuKUXl.jpg'], ['Inhumans: The First Chapter', 'Science Fiction, Action, Adventure', 'An isolated community of superhumans fight to protect themselves.', '/zJ4xmyVLYU9Lu3yfYiJNFiLL51D.jpg'], ['Captain Marvel', 'Action, Adventure, Science Fiction', 'The story follows Carol Danvers as she

In [12]:
import pickle
pickle.dump(data, open("bigmovies.pkl", "wb"))
pickle.dump(feature_set, open("features.pkl", "wb"))
pickle.dump(vector, open("vectorizer.pkl", "wb"))
pickle.dump(nn_model, open("model.pkl", "wb"))

In [40]:
data.head()

Unnamed: 0,id,title,backdrop_path,overview,tagline,genres,keywords
0,27205,Inception,/8ZTVqvKDQ8emSGUEMjsS4yHAwrp.jpg,"Cobb, a skilled thief who commits corporate es...",Your mind is the scene of the crime.,"Action, Science Fiction, Adventure","rescue, mission, dream, airplane, paris, franc..."
1,157336,Interstellar,/pbrkL804c8yAv3zBZR4QPEafpAR.jpg,The adventures of a group of explorers who mak...,Mankind was born on Earth. It was never meant ...,"Adventure, Drama, Science Fiction","rescue, future, spacecraft, race against time,..."
2,155,The Dark Knight,/nMKdUUepR0i5zn0y1T4CsSB5chy.jpg,Batman raises the stakes in his war on crime. ...,Welcome to a world without rules.,"Drama, Action, Crime, Thriller","joker, sadism, chaos, secret identity, crime f..."
3,19995,Avatar,/vL5LR6WdxWPjLPFRLe133jXWsh5.jpg,"In the 22nd century, a paraplegic Marine is di...",Enter the world of Pandora.,"Action, Adventure, Fantasy, Science Fiction","future, society, culture clash, space travel, ..."
4,24428,The Avengers,/9BBTo63ANSmhC4e6r62OJFuK2GL.jpg,When an unexpected enemy emerges and threatens...,Some assembly required.,"Science Fiction, Action, Adventure","new york city, superhero, shield, based on com..."
