In [1]:
import pandas as pd
import numpy as np
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity 

In [2]:
movie = pd.read_csv("movies.csv")

In [3]:
movie.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


### Selecting the relevant features from the data

In [4]:
selected_features = ['genres', 'keywords', 'tagline', 'cast', 'director']
print(selected_features)

['genres', 'keywords', 'tagline', 'cast', 'director']


In [5]:
# replacing the null values
for feature in selected_features:
    movie[feature] = movie[feature].fillna('')

In [6]:
combined_features = movie['genres']+' '+movie['keywords']+' '+movie['tagline']+' '+movie['cast']+' '+movie['director']
print(combined_features)

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object


In [7]:
# text to feature vectors
vectorizer = TfidfVectorizer()

In [8]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [1]:
# print(feature_vectors)

In [9]:
# getting simarity score
similarity = cosine_similarity(feature_vectors)

In [2]:
# similarity

In [10]:
# Movie Name
movie_name = input('Enter Your Favourite Movi Name : ')

In [11]:
# list with all the movie names given in the dataset
list_of_all_movies = movie['title'].tolist()

In [12]:
print(len(list_of_all_movies))

4803


In [13]:
# Finding Close Match
find_close_match = difflib.get_close_matches(movie_name, list_of_all_movies)
print(find_close_match)

['Avatar']


In [14]:
close_match = find_close_match[0]
print(close_match)

Avatar


In [None]:
# movie[movie.title == close_match]['index'].values[0]

In [15]:
# finding the index of movie with title
index_of_movie = movie[movie.title == close_match]['index'].values[0]

In [16]:
# Getting List of similar Movies
# len(list(enumerate(similarity[index_of_movie])))
similarity_score = list(enumerate(similarity[index_of_movie]))

In [3]:
# print(similarity_score)

In [17]:
# Sorting the similarity score
similar_movies = sorted(similarity_score, key = lambda x: x[1], reverse =True)
# print(similar_movies)

In [18]:
movie[movie.index== 79]['title'].values[0]

'Iron Man 2'

In [19]:
# printing the names of similar movies
print("Movie Suggested For You : ")
i=1
for mv in similar_movies[1:]:
    index = mv[0]
    title = movie[movie.index== index]['title'].values[0]
    if i<=10:
        print(i, '.', title)
        i+=1

Movie Suggested For You : 
1 . Alien
2 . Aliens
3 . Guardians of the Galaxy
4 . Star Trek Beyond
5 . Star Trek Into Darkness
6 . Galaxy Quest
7 . Alien³
8 . Cargo
9 . Trekkies
10 . Gravity


### Predictive System 
#### Movie Recommendation System

In [21]:
list_of_all_movies = movie['title'].tolist()
movie_name = input('Enter Your Favourite Movie Name : ')
find_close_match = difflib.get_close_matches(movie_name, list_of_all_movies)
close_match = find_close_match[0]
index_of_movie = movie[movie.title == close_match]['index'].values[0]
similarity_score = list(enumerate(similarity[index_of_movie]))
similar_movies = sorted(similarity_score, key = lambda x: x[1], reverse =True)
print("Movie Suggested For You : ")
i=1
for mv in similar_movies:
    index = mv[0]
    title = movie[movie.index== index]['title'].values[0]
    if i<=10:
        print(i, '.', title)
        i+=1

Movie Suggested For You : 
1 . Iron Man
2 . Iron Man 2
3 . Iron Man 3
4 . Avengers: Age of Ultron
5 . The Avengers
6 . Captain America: Civil War
7 . Captain America: The Winter Soldier
8 . Ant-Man
9 . X-Men
10 . Made
