In [11]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# loading the data from the csv file to a pandas dataframe
movies_data = pd.read_csv("movies.csv")

In [12]:

# number of rows and columns in the data frame
print(movies_data.shape)

# selecting the relevant features for recommendation
selected_features = ['genres', 'keywords', 'tagline', 'cast', 'director']
print(selected_features)

# replacing the null values with null string
for feature in selected_features:
    movies_data[feature] = movies_data[feature].fillna('')

# combining all the selected features
combined_features = movies_data['genres'] + ' ' + movies_data['keywords'] + ' ' + movies_data['tagline'] + ' ' + \
                   movies_data['cast'] + ' ' + movies_data['director']

# converting the text data to feature vectors
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)

# getting the similarity scores using cosine similarity
similarity = cosine_similarity(feature_vectors)

# getting the movie name from the user
movie_name = input('Enter your favorite movie name: ')

# finding the closest match for the movie name given by the user
find_close_match = difflib.get_close_matches(movie_name, movies_data['title'])
if len(find_close_match) == 0:
    print("No close match found for the given movie name.")
    exit()

close_match = find_close_match[0]
index_of_the_movie = movies_data[movies_data['title'] == close_match].index[0]

# getting a list of similar movies
similarity_scores = list(enumerate(similarity[index_of_the_movie]))
sorted_similar_movies = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

# printing the recommended movies
print('Movies suggested for you:\n')
i = 1
for movie in sorted_similar_movies:
    index = movie[0]
    if index != index_of_the_movie:
        title_from_index = movies_data.loc[index, 'title']
        print(i, '.', title_from_index)
        i += 1
        if i > 30:
            break


(4803, 24)
['genres', 'keywords', 'tagline', 'cast', 'director']
Enter your favorite movie name: Batman
Movies suggested for you:

1 . Batman Returns
2 . Batman & Robin
3 . The Dark Knight Rises
4 . Batman Begins
5 . The Dark Knight
6 . A History of Violence
7 . Superman
8 . Beetlejuice
9 . Bedazzled
10 . Mars Attacks!
11 . The Sentinel
12 . Planet of the Apes
13 . Man of Steel
14 . Suicide Squad
15 . The Mask
16 . Salton Sea
17 . Spider-Man 3
18 . The Postman Always Rings Twice
19 . Hang 'em High
20 . Spider-Man 2
21 . Dungeons & Dragons: Wrath of the Dragon God
22 . Superman Returns
23 . Jonah Hex
24 . Exorcist II: The Heretic
25 . Superman II
26 . Green Lantern
27 . Superman III
28 . Something's Gotta Give
29 . Reds
30 . Batman Forever
