importing the dependencies

In [1]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Data collection and Pre-processing

In [3]:
#loading the data from a csv file to a pandas dataframe
movies_data = pd.read_csv('movies.csv')

In [None]:
movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [4]:
movies_data.shape

(4803, 24)

In [5]:
#selecting relevant features for recommendation

selected_features = ['genres','keywords','tagline','cast','director']

In [6]:
#replacing the null values with null string

for feature in selected_features:
    movies_data[feature] = movies_data[feature].fillna('')

In [7]:
#combining all the five relevant features

combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [8]:
#converting the text data into feature vectors

vectorizer = TfidfVectorizer()

In [9]:
feature_vectors = vectorizer.fit_transform(combined_features)

Cosine Similarity

In [10]:
#getting the similarity scores using Cosine Similarity

similarity = cosine_similarity(feature_vectors)

In [11]:
#getting the movie name from the user

movie_name = input('Enter your favourite movie name: ')

Enter your favourite movie name: interstellar


In [12]:
#creating a list with all the movies given in the dataset

list_of_all_titles = movies_data['title'].tolist()

In [13]:
#finding close match to movie name given by the user

find_close_match = difflib.get_close_matches(movie_name,list_of_all_titles)
find_close_match

['Interstellar', 'Cinderella', 'Cinderella Man']

In [14]:
close_match = find_close_match[0]
close_match

'Interstellar'

In [15]:
from operator import index
#finding the index of the movie with title

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]
print(index_of_the_movie)

95


In [16]:
#getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))


In [17]:
sorted_similar_movies = sorted(similarity_score,key = lambda x:x[1],reverse = True)


In [18]:
print('Movies suggested for you : \n')

i = 1

for movies in sorted_similar_movies:
    index = movies[0]
    title_from_index = movies_data[movies_data.index == index]['title'].values[0]
    if i < 30:
      print(i, '.', title_from_index)
      i += 1

Movies suggested for you : 

1 . Interstellar
2 . The Dark Knight Rises
3 . The Matrix
4 . The Martian
5 . Dear Frankie
6 . Argo
7 . The Matrix Revolutions
8 . The Matrix Reloaded
9 . The Terminator
10 . Armageddon
11 . Terminator Genisys
12 . Contact
13 . Terminator Salvation
14 . The Killer Inside Me
15 . Gandhi, My Father
16 . The Tree of Life
17 . Get Smart
18 . Back to the Future
19 . Terminator 3: Rise of the Machines
20 . The Prestige
21 . Batman Begins
22 . Dragonslayer
23 . WarGames
24 . Little Nicky
25 . Superman III
26 . The Other Side of Heaven
27 . House at the End of the Street
28 . Good Deeds
29 . Mortal Kombat: Annihilation


Movie Recommendation System

In [19]:
movie_name = input('Enter your favourite movie name: ')

list_of_all_titles = movies_data['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name,list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.title == close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score,key = lambda x:x[1],reverse = True)

print('Movies suggested for you : \n')

i = 1

for movies in sorted_similar_movies:
    index = movies[0]
    title_from_index = movies_data[movies_data.index == index]['title'].values[0]
    if i < 30:
      print(i, '.', title_from_index)
      i += 1

Enter your favourite movie name: goodfellas
Movies suggested for you : 

1 . GoodFellas
2 . Casino
3 . Raging Bull
4 . Cape Fear
5 . Cop Land
6 . New York, New York
7 . Killing Them Softly
8 . Taxi Driver
9 . Analyze That
10 . Mean Streets
11 . Once Upon a Time in America
12 . Youth in Revolt
13 . The Departed
14 . Blow
15 . Silver Linings Playbook
16 . Bound
17 . A Walk Among the Tombstones
18 . Midnight Run
19 . The Bridge of San Luis Rey
20 . Medicine Man
21 . Radio Flyer
22 . The Host
23 . Flipper
24 . Stardust
25 . Kill the Messenger
26 . Shutter Island
27 . Analyze This
28 . 8 Heads in a Duffel Bag
29 . The Untouchables
