In [2]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
movies_data = pd.read_csv('movies.csv')
movies_data.shape

(4803, 24)

In [4]:
movies_data.columns



Index(['index', 'budget', 'genres', 'homepage', 'id', 'keywords',
       'original_language', 'original_title', 'overview', 'popularity',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title',
       'vote_average', 'vote_count', 'cast', 'crew', 'director'],
      dtype='object')

In [5]:
selected_features = ['genres','keywords','tagline','cast','director']
print(selected_features)
for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

['genres', 'keywords', 'tagline', 'cast', 'director']


In [6]:
combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']
print(combined_features)


0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object


In [7]:
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)


In [10]:
import difflib
similarity = cosine_similarity(feature_vectors)

movie_name = input('Enter your favourite movie name: ')

list_of_all_titles = movies_data['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

if not find_close_match:  # Check if the list is empty
    print("No close match found. Please try a different movie name.")
else:
    close_match = find_close_match[0]  # Access only if it's not empty

    index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

    similarity_score = list(enumerate(similarity[index_of_the_movie]))

    sorted_similar_movies = sorted(similarity_score, key=lambda x: x[1], reverse=True)

    print('Movies suggested for you:\n')

    i = 1
    for movie in sorted_similar_movies:
        index = movie[0]
        title_from_index = movies_data[movies_data.index == index]['title'].values[0]
        if i < 30:
            print(i, '.', title_from_index)
            i += 1


Movies suggested for you:

1 . Mission: Impossible
2 . Mission: Impossible II
3 . Raising Cain
4 . Clear and Present Danger
5 . Agent Cody Banks 2: Destination London
6 . Dr. No
7 . Mission: Impossible - Rogue Nation
8 . RED 2
9 . Femme Fatale
10 . The Da Vinci Code
11 . Flushed Away
12 . The Ghost Writer
13 . Ronin
14 . Jack Ryan: Shadow Recruit
15 . London Has Fallen
16 . Body Double
17 . A View to a Kill
18 . The Spy Who Loved Me
19 . Live and Let Die
20 . Scarface
21 . Around the World in 80 Days
22 . Tomorrow Never Dies
23 . The Untouchables
24 . National Treasure
25 . Dressed to Kill
26 . 8 Women
27 . Agent Cody Banks
28 . Mission to Mars
29 . Mission: Impossible III
