# Importing Libraries

In [37]:
import numpy as nu
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Data Collection and Preprocessing

In [38]:
#loading the data from a csv file to pandas dataframe
movies_data = pd.read_csv('/content/movies.csv')

In [39]:
#movies_data.head()

In [40]:
movies_data.shape

(4803, 24)

In [41]:
# selecting the relevant features for recommendation

selected_features = ['genres','keywords','tagline','cast','director']
print(selected_features)

['genres', 'keywords', 'tagline', 'cast', 'director']


In [42]:
# replacing the null valuess with null string

for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [43]:
# combining all the 6 selected features

combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [44]:
#print(combined_features)

In [45]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [46]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [47]:
#print(feature_vectors)

Cosine Similarity

In [48]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [49]:
#print(similarity)

Getting the movies name as input

In [50]:
# getting the movie name from the user

movie_name = input(' Enter your favourite movie name : ')

 Enter your favourite movie name : John Carter


In [51]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = movies_data['title'].tolist()
#print(list_of_all_titles)

In [52]:
# finding the close match for the movie name given by the user

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

['John Carter', 'Coach Carter', 'Get Carter']


In [53]:
close_match = find_close_match[0]
print(close_match)

John Carter


In [54]:
# finding the index of the movie with title

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_the_movie)

4


In [55]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
#print(similarity_score)

In [56]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
#print(sorted_similar_movies)

In [57]:
# print the name of similar movies based on the index

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<=50):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . John Carter
2 . Heaven is for Real
3 . Alien
4 . The Specials
5 . The Helix... Loaded
6 . Finding Nemo
7 . Transformers
8 . Mission to Mars
9 . The Astronaut's Wife
10 . American Psycho
11 . Max
12 . The English Patient
13 . The Last Temptation of Christ
14 . Enter Nowhere
15 . The Martian
16 . Notes on a Scandal
17 . Sideways
18 . Spider-Man 3
19 . Daddy's Home
20 . We Bought a Zoo
21 . George of the Jungle
22 . Treasure Planet
23 . Don McKay
24 . Auto Focus
25 . Savages
26 . The Covenant
27 . X-Men Origins: Wolverine
28 . Daybreakers
29 . Gravity
30 . Lone Survivor
31 . Imagine That
32 . Battleship
33 . Paper Towns
34 . Killing Them Softly
35 . Star Trek IV: The Voyage Home
36 . Shrek 2
37 . Avatar
38 . Beastmaster 2: Through the Portal of Time
39 . The Host
40 . Morvern Callar
41 . Silent Running
42 . Minority Report
43 . Synecdoche, New York
44 . Pandaemonium
45 . Black Knight
46 . All About Steve
47 . The Last Days on Mars
48 . eXistenZ
49 . Flight