In [2]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# loading the data from the csv file to apandas dataframe
movies_data = pd.read_csv('/content/movies.csv')


In [4]:
movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [5]:
movies_data.shape

(4803, 24)

In [6]:
# selecting the relevant features for recommendation

selected_features = ['genres','keywords','tagline','cast','director']
print(selected_features)

['genres', 'keywords', 'tagline', 'cast', 'director']


In [7]:
for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [8]:
# combining all the 5 selected features

combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [9]:
print(combined_features)

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object


In [10]:
vectorizer = TfidfVectorizer()

In [11]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [12]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [13]:
print(similarity)

[[1.         0.07219487 0.037733   ... 0.         0.         0.        ]
 [0.07219487 1.         0.03281499 ... 0.03575545 0.         0.        ]
 [0.037733   0.03281499 1.         ... 0.         0.05389661 0.        ]
 ...
 [0.         0.03575545 0.         ... 1.         0.         0.02651502]
 [0.         0.         0.05389661 ... 0.         1.         0.        ]
 [0.         0.         0.         ... 0.02651502 0.         1.        ]]


In [14]:
print(similarity.shape)

(4803, 4803)


In [15]:
# getting the movie name from the user

movie_name = input(' Enter your favourite movie name : ')

 Enter your favourite movie name : Don


In [16]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = movies_data['title'].tolist()
print(list_of_all_titles)

['Avatar', "Pirates of the Caribbean: At World's End", 'Spectre', 'The Dark Knight Rises', 'John Carter', 'Spider-Man 3', 'Tangled', 'Avengers: Age of Ultron', 'Harry Potter and the Half-Blood Prince', 'Batman v Superman: Dawn of Justice', 'Superman Returns', 'Quantum of Solace', "Pirates of the Caribbean: Dead Man's Chest", 'The Lone Ranger', 'Man of Steel', 'The Chronicles of Narnia: Prince Caspian', 'The Avengers', 'Pirates of the Caribbean: On Stranger Tides', 'Men in Black 3', 'The Hobbit: The Battle of the Five Armies', 'The Amazing Spider-Man', 'Robin Hood', 'The Hobbit: The Desolation of Smaug', 'The Golden Compass', 'King Kong', 'Titanic', 'Captain America: Civil War', 'Battleship', 'Jurassic World', 'Skyfall', 'Spider-Man 2', 'Iron Man 3', 'Alice in Wonderland', 'X-Men: The Last Stand', 'Monsters University', 'Transformers: Revenge of the Fallen', 'Transformers: Age of Extinction', 'Oz: The Great and Powerful', 'The Amazing Spider-Man 2', 'TRON: Legacy', 'Cars 2', 'Green Lant

In [17]:
# finding the close match for the movie name given by the user

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

['Domino', 'Dwegons', 'Don Jon']


In [18]:
close_match = find_close_match[0]
print(close_match)

Domino


In [19]:

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_the_movie)

992


In [20]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.005882443389772549), (1, 0.08119590444240977), (2, 0.014623127935721337), (3, 0.025349180373243303), (4, 0.0057280146151972395), (5, 0.006093750807362723), (6, 0.0), (7, 0.005937563614967788), (8, 0.0), (9, 0.005801001931944013), (10, 0.0066163945641860376), (11, 0.01269676591182876), (12, 0.09498736083746777), (13, 0.005422831998491422), (14, 0.027269558270639944), (15, 0.0), (16, 0.005630072454567156), (17, 0.07147937658795112), (18, 0.005620209739748476), (19, 0.005101248810585095), (20, 0.022610640027331687), (21, 0.019054957723263825), (22, 0.0), (23, 0.0), (24, 0.005549521660965293), (25, 0.0), (26, 0.00612808201864943), (27, 0.005358613293938588), (28, 0.005883125028397064), (29, 0.006154624783473235), (30, 0.0059286178002153685), (31, 0.005780957857553358), (32, 0.019952384570525304), (33, 0.00590067175220264), (34, 0.0), (35, 0.006194229046744529), (36, 0.005581494681824201), (37, 0.0), (38, 0.023171999365490713), (39, 0.005539868060326553), (40, 0.0), (41, 0.0056666272

In [21]:
len(similarity_score)

4803

In [22]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 
print(sorted_similar_movies)

[(992, 1.0), (308, 0.24644188555287366), (1627, 0.18758470731074023), (3788, 0.1717369252775517), (570, 0.17058621707684343), (738, 0.16733521058293427), (402, 0.16672169233525716), (2273, 0.15291582744467327), (3917, 0.14285706155211825), (132, 0.13956914877720783), (4250, 0.132881521983033), (2384, 0.1259644628397295), (774, 0.11824131145523452), (1894, 0.11784541926519886), (3305, 0.11685288716663696), (1701, 0.11526808709410952), (3282, 0.11511016418725241), (2745, 0.1127393146739905), (3055, 0.10927063449773214), (1337, 0.10714922517703669), (1002, 0.10519615861263718), (2577, 0.10446758430748214), (3326, 0.10426205087561068), (2234, 0.10349647364120992), (273, 0.10171614729942134), (645, 0.10125401198619324), (1387, 0.10117303916401774), (1254, 0.10013215523507701), (669, 0.10011391904482736), (3709, 0.09941716459914023), (2694, 0.09575615155450064), (1177, 0.09498871786162076), (12, 0.09498736083746777), (994, 0.0948884614668458), (372, 0.0937476667505794), (412, 0.0921372654940

In [23]:
# print the name of similar movies based on the index

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . Domino
2 . Point Break
3 . Deliver Us from Evil
4 . Guten Tag, Ramón
5 . Ransom
6 . Joy
7 . The Rundown
8 . Hands of Stone
9 . Phat Girlz
10 . Wrath of the Titans
11 . Wal-Mart: The High Cost of Low Price
12 . Carlos
13 . The Last Castle
14 . War
15 . The Blood of Heroes
16 . Once Upon a Time in Mexico
17 . Begin Again
18 . This Christmas
19 . Seeking a Friend for the End of the World
20 . Twilight
21 . The One
22 . Tuck Everlasting
23 . Black November
24 . A Dangerous Method
25 . Gone in Sixty Seconds
26 . Broken Arrow
27 . Malcolm X
28 . Get Carter
29 . Sin City: A Dame to Kill For


In [24]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = movies_data['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite movie name : Joy
Movies suggested for you : 

1 . Joy
2 . Silver Linings Playbook
3 . Hands of Stone
4 . Guten Tag, Ramón
5 . American Hustle
6 . Deliver Us from Evil
7 . The Hunger Games: Catching Fire
8 . Domino
9 . Winter's Tale
10 . The Hunger Games: Mockingjay - Part 2
11 . Wrath of the Titans
12 . Point Break
13 . Carlos
14 . X-Men: Apocalypse
15 . Dreamer: Inspired By a True Story
16 . Dallas Buyers Club
17 . Black Mass
18 . Legend
19 . The Bridge of San Luis Rey
20 . The Conjuring
21 . GoodFellas
22 . Martin Lawrence Live: Runteldat
23 . Wild
24 . To Write Love on Her Arms
25 . Stardust
26 . Flawless
27 . Free State of Jones
28 . The Intern
29 . Valentine's Day
