In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
movies_data = pd.read_csv('/content/movies.csv')

In [None]:
movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [None]:
movies_data.shape

(729, 24)

In [None]:
#selecting the features
selected_features = ['genres' , 'keywords', 'tagline' , 'cast', 'director']
print(selected_features)

['genres', 'keywords', 'tagline', 'cast', 'director']


In [None]:
movies_data.isnull().sum()

Unnamed: 0,0
index,0
budget,0
genres,0
homepage,312
id,0
keywords,7
original_language,0
original_title,0
overview,0
popularity,0


In [None]:
#replacing null values
for i in selected_features:
  movies_data[i] = movies_data[i].fillna('') #when there are multiple features


In [None]:
#combining the features
combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']
print(combined_features)

0      Action Adventure Fantasy Science Fiction cultu...
1      Adventure Fantasy Action ocean drug abuse exot...
2      Action Adventure Crime spy based on novel secr...
3      Action Crime Drama Thriller dc comics crime fi...
4      Action Adventure Science Fiction based on nove...
                             ...                        
724    Action Drama Thriller Crime mexico cia kidnapp...
725    Comedy Family father son relationship parents ...
726    Comedy Crime informant jumping from a rooftop ...
727    Family Comedy holiday christmas party santa cl...
728    Fantasy Action Science Fiction black magic fis...
Length: 729, dtype: object


In [None]:
v = TfidfVectorizer()

In [None]:
feature_vectors = v.fit_transform(combined_features)
print(feature_vectors)

In [None]:
#finding the similarity using cosine similarity
similarity = cosine_similarity(feature_vectors)
print(similarity)
print(similarity.shape)

[[1.         0.06232851 0.03394713 ... 0.0050107  0.         0.0288348 ]
 [0.06232851 1.         0.02700888 ... 0.01334382 0.01351851 0.02103366]
 [0.03394713 0.02700888 1.         ... 0.01424466 0.03770737 0.00362831]
 ...
 [0.0050107  0.01334382 0.01424466 ... 1.         0.03253733 0.03244655]
 [0.         0.01351851 0.03770737 ... 0.03253733 1.         0.        ]
 [0.0288348  0.02103366 0.00362831 ... 0.03244655 0.         1.        ]]
(729, 729)


In [None]:
movie_name = input('Enter a movie name: ')

Enter a movie name: batman


In [None]:
#creating a list with all the movies given in the dataset
list_of_all_titles = movies_data['title'].tolist()
print(list_of_all_titles)

['Avatar', "Pirates of the Caribbean: At World's End", 'Spectre', 'The Dark Knight Rises', 'John Carter', 'Spider-Man 3', 'Tangled', 'Avengers: Age of Ultron', 'Harry Potter and the Half-Blood Prince', 'Batman v Superman: Dawn of Justice', 'Superman Returns', 'Quantum of Solace', "Pirates of the Caribbean: Dead Man's Chest", 'The Lone Ranger', 'Man of Steel', 'The Chronicles of Narnia: Prince Caspian', 'The Avengers', 'Pirates of the Caribbean: On Stranger Tides', 'Men in Black 3', 'The Hobbit: The Battle of the Five Armies', 'The Amazing Spider-Man', 'Robin Hood', 'The Hobbit: The Desolation of Smaug', 'The Golden Compass', 'King Kong', 'Titanic', 'Captain America: Civil War', 'Battleship', 'Jurassic World', 'Skyfall', 'Spider-Man 2', 'Iron Man 3', 'Alice in Wonderland', 'X-Men: The Last Stand', 'Monsters University', 'Transformers: Revenge of the Fallen', 'Transformers: Age of Extinction', 'Oz: The Great and Powerful', 'The Amazing Spider-Man 2', 'TRON: Legacy', 'Cars 2', 'Green Lant

In [None]:
find_close_match = difflib.get_close_matches(movie_name , list_of_all_titles)
print(find_close_match)

['Catwoman']


In [None]:
close_match = find_close_match[0]
print(close_match)

Catwoman


In [None]:
#index of the movie with title
index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_the_movie)

303


In [None]:
#based on index values
similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.003786749318692379), (1, 0.0033614531063075504), (2, 0.01829179043226519), (3, 0.10857915651966515), (4, 0.03103389157385462), (5, 0.003865726865823218), (6, 0.0), (7, 0.004113846226694305), (8, 0.0), (9, 0.07062228340021316), (10, 0.07937270392173218), (11, 0.014570663282828647), (12, 0.004084314408671391), (13, 0.003457469820742283), (14, 0.07113630558219275), (15, 0.0), (16, 0.003976831211504625), (17, 0.003727887165471395), (18, 0.017376796008896647), (19, 0.0035086153429607267), (20, 0.03907138200176589), (21, 0.003304184215591979), (22, 0.0), (23, 0.0), (24, 0.0032473391838204906), (25, 0.03772525238911107), (26, 0.004074788739236216), (27, 0.003594094390761118), (28, 0.003867105258227064), (29, 0.003986835265508587), (30, 0.018526868709845695), (31, 0.0038504900671854523), (32, 0.013683896473326448), (33, 0.08610954740655628), (34, 0.0), (35, 0.03511052607100554), (36, 0.0038439313804885644), (37, 0.015551055264816767), (38, 0.040447294264047404), (39, 0.00365214200352506

In [None]:
sorted_similar_movies = sorted(similarity_score , key = lambda x:x[1] , reverse = True)
print(sorted_similar_movies)

[(303, 1.0000000000000002), (608, 0.14274636826738665), (567, 0.11564079963366951), (3, 0.10857915651966515), (506, 0.10732624496791915), (468, 0.10228388492449494), (347, 0.10176077759309982), (119, 0.10162227221530877), (65, 0.0973837663301878), (210, 0.09139358299010512), (511, 0.08892941135346556), (440, 0.08869805467511878), (163, 0.08804700125794714), (549, 0.08782378454406603), (428, 0.08694774035499456), (527, 0.086466003114192), (33, 0.08610954740655628), (299, 0.08431251194724101), (72, 0.08400130289375751), (41, 0.08212715168019272), (207, 0.08186442701782316), (46, 0.07979785390486663), (10, 0.07937270392173218), (301, 0.078571724372892), (487, 0.07849459623427417), (203, 0.07512069512623218), (441, 0.07291139038232104), (147, 0.072686514657982), (14, 0.07113630558219275), (9, 0.07062228340021316), (234, 0.07034330421430424), (442, 0.06732745249249097), (237, 0.06698338034616423), (298, 0.06290419143177295), (588, 0.058360700882532576), (568, 0.054540902844280745), (195, 0.

In [None]:
print('Movies suggested for you: \n')
i = 1
for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data.iloc[index]['title']
  if (i<5):
    print(i , '.' , title_from_index)
    i+=1

Movies suggested for you: 

1 . Catwoman
2 . Basic Instinct 2
3 . Runaway Bride
4 . The Dark Knight Rises
