## Importing Essential Libraries

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

## Functions to get Info

In [2]:
def get_title(index):
	return df[df.index == index]["title"].values[0]

def get_index(title):
	return df[df.title == title]["index"].values[0]

In [3]:
df = pd.read_csv('movie_dataset.csv')
df.columns

Index(['index', 'budget', 'genres', 'homepage', 'id', 'keywords',
       'original_language', 'original_title', 'overview', 'popularity',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title',
       'vote_average', 'vote_count', 'cast', 'crew', 'director'],
      dtype='object')

## Selecting Features

In [4]:
features = df[['keywords', 'genres', 'cast', 'director', 'title', 'tagline']]

In [5]:
features.head()

Unnamed: 0,keywords,genres,cast,director,title,tagline
0,culture clash future space war space colony so...,Action Adventure Fantasy Science Fiction,Sam Worthington Zoe Saldana Sigourney Weaver S...,James Cameron,Avatar,Enter the World of Pandora.
1,ocean drug abuse exotic island east india trad...,Adventure Fantasy Action,Johnny Depp Orlando Bloom Keira Knightley Stel...,Gore Verbinski,Pirates of the Caribbean: At World's End,"At the end of the world, the adventure begins."
2,spy based on novel secret agent sequel mi6,Action Adventure Crime,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,Sam Mendes,Spectre,A Plan No One Escapes
3,dc comics crime fighter terrorist secret ident...,Action Crime Drama Thriller,Christian Bale Michael Caine Gary Oldman Anne ...,Christopher Nolan,The Dark Knight Rises,The Legend Ends
4,based on novel mars medallion space travel pri...,Action Adventure Science Fiction,Taylor Kitsch Lynn Collins Samantha Morton Wil...,Andrew Stanton,John Carter,"Lost in our world, found in another."


In [6]:
for i in features:
    features[i] = features[i].fillna('')

In [7]:
def combine(row):
    return row['keywords']+ " "+row['genres']+" "+row['cast']+" "+row['director'] + " " +row['title'] + " " +row['tagline']

In [8]:
features['combined_features'] = features.apply(combine, axis = 1)
print(features['combined_features'].head())

0    culture clash future space war space colony so...
1    ocean drug abuse exotic island east india trad...
2    spy based on novel secret agent sequel mi6 Act...
3    dc comics crime fighter terrorist secret ident...
4    based on novel mars medallion space travel pri...
Name: combined_features, dtype: object


## Applying Cosine Similarity

In [22]:
cv = CountVectorizer()
cm = cv.fit_transform(features['combined_features'])

In [23]:
cosine_similarity = cosine_similarity(cm)
user = 'Avatar'

In [24]:
index = get_index(user)

In [25]:
similar = list(enumerate(cosine_similarity[index]))
sorted_movies = sorted(similar, key = lambda x: x[1], reverse = True)

## Getting Top 5 Recommended Movies

In [26]:
for i, movie in enumerate(sorted_movies):
    print(get_title(movie[0]))
    if i > 5:
        break

Avatar
Guardians of the Galaxy
Aliens
Alien
Space Cowboys
The League of Extraordinary Gentlemen
Zathura: A Space Adventure


In [33]:
print('Movie Recommender System \n')
user = input('Enter Movie Name: ')
print('\nRecommended Movies:')
index = get_index(user)
similar = list(enumerate(cosine_similarity[index]))
sorted_movies = sorted(similar, key = lambda x: x[1], reverse = True)
for i, movie in enumerate(sorted_movies):
    print('\n \t',get_title(movie[0]))
    if i > 5:
        break

Movie Recommender System 

Enter Movie Name: Avatar

Recommended Movies:

 	 Avatar

 	 Guardians of the Galaxy

 	 Aliens

 	 Alien

 	 Space Cowboys

 	 The League of Extraordinary Gentlemen

 	 Zathura: A Space Adventure
