In [1]:
# Include The Dependencies

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import difflib
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

In [3]:
# Load Dataset and Preprocessing

In [4]:
movie_data = pd.read_csv('movies.csv')

In [5]:
movie_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [6]:
movie_data.isnull().sum()

index                      0
budget                     0
genres                    28
homepage                3091
id                         0
keywords                 412
original_language          0
original_title             0
overview                   3
popularity                 0
production_companies       0
production_countries       0
release_date               1
revenue                    0
runtime                    2
spoken_languages           0
status                     0
tagline                  844
title                      0
vote_average               0
vote_count                 0
cast                      43
crew                       0
director                  30
dtype: int64

In [7]:
movie_data.shape

(4803, 24)

In [8]:
# Check all featurs

In [9]:
movie_data['director']

0           James Cameron
1          Gore Verbinski
2              Sam Mendes
3       Christopher Nolan
4          Andrew Stanton
              ...        
4798     Robert Rodriguez
4799         Edward Burns
4800          Scott Smith
4801          Daniel Hsia
4802     Brian Herzlinger
Name: director, Length: 4803, dtype: object

In [10]:
# Select features

In [11]:
select_feature = ['genres','keywords','tagline','title','cast','director']

In [12]:
select_feature

['genres', 'keywords', 'tagline', 'title', 'cast', 'director']

In [13]:
for i in select_feature:
    movie_data[i] = movie_data[i].fillna(' ')

In [14]:
# Combine all features

In [15]:
combine_all_feature = movie_data['genres'] + ' ' + movie_data['keywords'] + ' ' + movie_data['tagline'] + ' ' + movie_data['title'] + ' ' + movie_data['cast'] + ' ' + movie_data['director']

In [16]:
combine_all_feature

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance   A newlywed couple's honeymoon...
4800    Comedy Drama Romance TV Movie date love at fir...
4801        A New Yorker in Shanghai Shanghai Calling ...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object

In [17]:
# Convert the featurs to vector

In [18]:
tfvector = TfidfVectorizer()

In [19]:
vector_featurs = tfvector.fit_transform(combine_all_feature)

In [20]:
# print(vector_featurs)

In [21]:
# Gitiing the similarity score using cosine_similarity

In [22]:
similarity = cosine_similarity(vector_featurs)

In [23]:
similarity

array([[1.        , 0.07294698, 0.03533251, ..., 0.        , 0.        ,
        0.        ],
       [0.07294698, 1.        , 0.02792771, ..., 0.04419983, 0.        ,
        0.        ],
       [0.03533251, 0.02792771, 1.        , ..., 0.        , 0.04636139,
        0.        ],
       ...,
       [0.        , 0.04419983, 0.        , ..., 1.        , 0.        ,
        0.05551043],
       [0.        , 0.        , 0.04636139, ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.05551043, 0.        ,
        1.        ]], shape=(4803, 4803))

In [24]:
similarity.shape

(4803, 4803)

In [25]:
# Geting The user Input

In [26]:
movie_name = input('Enter name')

Enter name pad man


In [27]:
# Creating a list with all movie title from data set

In [28]:
list_of_moive_title = movie_data['title'].tolist()

In [29]:
list_of_moive_title

['Avatar',
 "Pirates of the Caribbean: At World's End",
 'Spectre',
 'The Dark Knight Rises',
 'John Carter',
 'Spider-Man 3',
 'Tangled',
 'Avengers: Age of Ultron',
 'Harry Potter and the Half-Blood Prince',
 'Batman v Superman: Dawn of Justice',
 'Superman Returns',
 'Quantum of Solace',
 "Pirates of the Caribbean: Dead Man's Chest",
 'The Lone Ranger',
 'Man of Steel',
 'The Chronicles of Narnia: Prince Caspian',
 'The Avengers',
 'Pirates of the Caribbean: On Stranger Tides',
 'Men in Black 3',
 'The Hobbit: The Battle of the Five Armies',
 'The Amazing Spider-Man',
 'Robin Hood',
 'The Hobbit: The Desolation of Smaug',
 'The Golden Compass',
 'King Kong',
 'Titanic',
 'Captain America: Civil War',
 'Battleship',
 'Jurassic World',
 'Skyfall',
 'Spider-Man 2',
 'Iron Man 3',
 'Alice in Wonderland',
 'X-Men: The Last Stand',
 'Monsters University',
 'Transformers: Revenge of the Fallen',
 'Transformers: Age of Extinction',
 'Oz: The Great and Powerful',
 'The Amazing Spider-Man 2',

In [30]:
# Finding the close match for the movie  given by movie title

In [31]:
closed_movies = difflib.get_close_matches(movie_name,list_of_moive_title)

In [32]:
closed_movies

['Bad Company', 'Hard Rain', 'Bad Santa']

In [33]:
close_match = closed_movies[0]

In [34]:
close_match

'Bad Company'

In [35]:
# Finding the index of movie with titel

In [36]:
find_index_of_movie = movie_data[movie_data.title == close_match]['index'].values[0]
find_index_of_movie

np.int64(594)

In [37]:
# Gitting a list of similar movie

In [38]:
similarity_score = list(enumerate(similarity[find_index_of_movie]))

In [39]:
len(similarity_score)

4803

In [40]:
print(similarity_score)

[(0, np.float64(0.013402689813283235)), (1, np.float64(0.060032253375753165)), (2, np.float64(0.026946516222886498)), (3, np.float64(0.011827656287944927)), (4, np.float64(0.013344494182252462)), (5, np.float64(0.027409297186256794)), (6, np.float64(0.007659676538346292)), (7, np.float64(0.04391930302756164)), (8, np.float64(0.007358329135374548)), (9, np.float64(0.012118350744188596)), (10, np.float64(0.014497084393550306)), (11, np.float64(0.01633572338511871)), (12, np.float64(0.012570516654952655)), (13, np.float64(0.01165217751307579)), (14, np.float64(0.013135440376675307)), (15, np.float64(0.006773488999228482)), (16, np.float64(0.045109717816452524)), (17, np.float64(0.012146373683786377)), (18, np.float64(0.009396471633774762)), (19, np.float64(0.024573836591215068)), (20, np.float64(0.013013105432756793)), (21, np.float64(0.01098598767145983)), (22, np.float64(0.019028875577019745)), (23, np.float64(0.01901561465735379)), (24, np.float64(0.02843478204049724)), (25, np.float64

In [41]:
# Sortting the movie based on their similary score

In [42]:
sorted_movie = sorted(similarity_score, key= lambda x:x[1], reverse=True)

In [43]:
print(sorted_movie)

[(594, np.float64(1.0)), (2695, np.float64(0.19767953000401445)), (129, np.float64(0.12553224657357157)), (1772, np.float64(0.11943181637130872)), (248, np.float64(0.11688065461900764)), (4310, np.float64(0.115071500003844)), (433, np.float64(0.11408311733212784)), (2528, np.float64(0.11298623931874485)), (3048, np.float64(0.10629644335506885)), (186, np.float64(0.10559541773177117)), (3077, np.float64(0.10556497275094756)), (869, np.float64(0.1054493811045307)), (1220, np.float64(0.1053158434318501)), (4685, np.float64(0.10471140826849504)), (1638, np.float64(0.10452571800005844)), (2985, np.float64(0.09901344747067901)), (1515, np.float64(0.09852557966373636)), (782, np.float64(0.097838640565537)), (299, np.float64(0.09764224900641132)), (115, np.float64(0.09600061013680782)), (392, np.float64(0.09573062293594972)), (1445, np.float64(0.09518506838542021)), (4083, np.float64(0.09301742236095434)), (1355, np.float64(0.09159833066014686)), (210, np.float64(0.09153877594871096)), (2799, 

In [44]:
# Recommender the simillar movie name based on the index

In [45]:
print("Movie suggest for you :\n")

i = 1
for movie in sorted_movie:
    index = movie[0]
    title_index_movie = movie_data[movie_data.index == index]['title'].values[0]
    if ( i < 20):
        print(i,'.', title_index_movie)
        i = i+ 1   

Movie suggest for you :

1 . Bad Company
2 . I Think I Love My Wife
3 . Thor
4 . Flawless
5 . Mr. & Mrs. Smith
6 . Grand Theft Parsons
7 . RED 2
8 . Bad Grandpa
9 . Very Bad Things
10 . Bad Boys II
11 . Malone
12 . You, Me and Dupree
13 . 8MM
14 . The Case of the Grinning Cat
15 . Love & Other Drugs
16 . The Color of Money
17 . Laws of Attraction
18 . The Spirit
19 . Batman Forever


In [46]:
# Movie Recommendatin System 

In [47]:
movie_name = input("Enter the movie name from the user:\n ")

list_of_all_movies = movie_data['title'].tolist()

closed_movie = difflib.get_close_matches(movie_name, list_of_all_movies)
closed_matches = closed_movie[0]
find_movie_index = movie_data[movie_data.title == closed_matches]['index'].values[0]
similiraty_scores = list(enumerate(similarity[find_movie_index]))
sorted_movies = sorted(similiraty_scores,key=lambda x:x[1],reverse=True)
print("Now suggest move for you : \n")
i = 1
for movie in sorted_movies:
    index = movie[0]
    recommendent_movie = movie_data[movie_data.index == index]['title'].values[0]
    if(i<22):
        print(i,'.',recommendent_movie)
        i = i+1

Enter the movie name from the user:
  pad man


Now suggest move for you : 

1 . Bad Company
2 . I Think I Love My Wife
3 . Thor
4 . Flawless
5 . Mr. & Mrs. Smith
6 . Grand Theft Parsons
7 . RED 2
8 . Bad Grandpa
9 . Very Bad Things
10 . Bad Boys II
11 . Malone
12 . You, Me and Dupree
13 . 8MM
14 . The Case of the Grinning Cat
15 . Love & Other Drugs
16 . The Color of Money
17 . Laws of Attraction
18 . The Spirit
19 . Batman Forever
20 . Hancock
21 . Safe House
