<a href="https://colab.research.google.com/github/Shivanand512/-Movie-Recommendation-System-using-Machine-Learning-with-Python/blob/master/Movie_Recommendation_System_using_Machine_Learning_with_Python.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Importing the dependencies

In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

Data Collection and Pre-Processing

In [None]:
# loading the data from the csv file to our pandas dataframe
movies_data=pd.read_csv('movies.csv')

In [None]:
#printing the first 5 rows of the dataframe
movies_data.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [None]:
# number of rows and columns in the data frame
movies_data.shape

In [None]:
# selecting the relevent features for recommendation
selected_features=['genres','keywords','tagline','cast','director']
print(selected_features)

In [None]:
# replacing the missing values with the null string
for feature in selected_features:
  movies_data[feature]=movies_data[feature].fillna('')

In [None]:
# Combining all the 5 selected features
combined_features = movies_data['genres']+' '+movies_data['keywords']+' '+movies_data['tagline']+' '+movies_data['cast']+' '+movies_data['director']

In [None]:
print(combined_features)

In [None]:
# converting the text data to feature vectors
vectorizer=TfidfVectorizer()

In [None]:
feature_vectors=vectorizer.fit_transform(combined_features)

In [None]:
print(feature_vectors)

Cosine Similarity

In [None]:
# getting the similarity scores using cosine similarity
similarity=cosine_similarity(feature_vectors)
print(similarity)

In [None]:
print(similarity.shape)

(4803, 4803)


In [None]:
# getting the movie name from the user
movie_name=input("Enter your favourite movie name: ")

Enter your favourite movie name: iron man


In [None]:
# creating a list with all the movie names given in the data set
list_of_all_titles=movies_data['title'].tolist()
# print(list_of_all_titles)

In [None]:
# finding the close match for the movie name given by the user
find_close_match=difflib.get_close_matches(movie_name,list_of_all_titles)
print(find_close_match)

['Iron Man', 'Iron Man 3', 'Iron Man 2']


In [None]:
close_match=find_close_match[0]
print(close_match)

Iron Man


In [None]:
# find index of the movie with title
index_of_the_movie=movies_data[movies_data.title==close_match]['index'].values[0]
print(index_of_the_movie)

68


In [None]:
# getting a list of similar movies based on the index value
similarity_score=list(enumerate(similarity[index_of_the_movie]))
# print(similarity_score)

In [None]:
len(similarity)

4803

In [None]:
# sorting the movies based on the similarity score
sorted_similar_movies=sorted(similarity_score,key=lambda x:x[1],reverse=True)
print(sorted_similar_movies)

In [None]:
# Print the name of similar movies based on the index
print('Movies Suggested for you : \n')
i=1
for movie in sorted_similar_movies:
  index=movie[0]
  title_from_index=movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i,' .',title_from_index)
    i+=1

Movies Suggested for you : 

1  . Iron Man
2  . Iron Man 2
3  . Iron Man 3
4  . Avengers: Age of Ultron
5  . The Avengers
6  . Captain America: Civil War
7  . Captain America: The Winter Soldier
8  . Ant-Man
9  . X-Men
10  . Made
11  . X-Men: Apocalypse
12  . X2
13  . The Incredible Hulk
14  . The Helix... Loaded
15  . X-Men: First Class
16  . X-Men: Days of Future Past
17  . Captain America: The First Avenger
18  . Kick-Ass 2
19  . Guardians of the Galaxy
20  . Deadpool
21  . Thor: The Dark World
22  . G-Force
23  . X-Men: The Last Stand
24  . Duets
25  . Mortdecai
26  . The Last Airbender
27  . Southland Tales
28  . Zathura: A Space Adventure
29  . Sky Captain and the World of Tomorrow


Movie Recomendation System

In [None]:
# getting the movie name from the user
movie_name=input("Enter your favourite movie name: ")

list_of_all_titles=movies_data['title'].tolist()

find_close_match=difflib.get_close_matches(movie_name,list_of_all_titles)

close_match=find_close_match[0]

index_of_the_movie=movies_data[movies_data.title==close_match]['index'].values[0]

similarity_score=list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies=sorted(similarity_score,key=lambda x:x[1],reverse=True)

print('Movies Suggested for you : \n')
i=1
for movie in sorted_similar_movies:
  index=movie[0]
  title_from_index=movies_data[movies_data.index==index]['title'].values[0]
  if (i<30):
    print(i,' .',title_from_index)
    i+=1

Enter your favourite movie name: bat man
Movies Suggested for you : 

1  . Batman
2  . Batman Returns
3  . Batman & Robin
4  . The Dark Knight Rises
5  . Batman Begins
6  . The Dark Knight
7  . A History of Violence
8  . Superman
9  . Beetlejuice
10  . Bedazzled
11  . Mars Attacks!
12  . The Sentinel
13  . Planet of the Apes
14  . Man of Steel
15  . Suicide Squad
16  . The Mask
17  . Salton Sea
18  . Spider-Man 3
19  . The Postman Always Rings Twice
20  . Hang 'em High
21  . Spider-Man 2
22  . Dungeons & Dragons: Wrath of the Dragon God
23  . Superman Returns
24  . Jonah Hex
25  . Exorcist II: The Heretic
26  . Superman II
27  . Green Lantern
28  . Superman III
29  . Something's Gotta Give
