# Step 01 - Import Modules

In [1]:
import pandas as pd
import numpy as np
import difflib
import joblib

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 02 - Data Preprocessing

In [2]:
df = pd.read_csv('./movies.csv')

In [3]:
df.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [4]:
df.shape

(4803, 24)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   index                 4803 non-null   int64  
 1   budget                4803 non-null   int64  
 2   genres                4775 non-null   object 
 3   homepage              1712 non-null   object 
 4   id                    4803 non-null   int64  
 5   keywords              4391 non-null   object 
 6   original_language     4803 non-null   object 
 7   original_title        4803 non-null   object 
 8   overview              4800 non-null   object 
 9   popularity            4803 non-null   float64
 10  production_companies  4803 non-null   object 
 11  production_countries  4803 non-null   object 
 12  release_date          4802 non-null   object 
 13  revenue               4803 non-null   int64  
 14  runtime               4801 non-null   float64
 15  spoken_languages     

In [6]:
# selection of relevant feature for recommendation.

selected_feature = df[['genres', 'keywords', 'tagline', 'cast', 'director']]

In [7]:
# replacing the null values with str
for feature in selected_feature.columns:
    df[feature] = df[feature].fillna('')

In [8]:
# combining all the 5 selected feature

combine_feature = df['genres']+" "+df['keywords']+" "+df['tagline']+" "+df['cast']+" "+df['director']

In [9]:
combine_feature

0       Action Adventure Fantasy Science Fiction cultu...
1       Adventure Fantasy Action ocean drug abuse exot...
2       Action Adventure Crime spy based on novel secr...
3       Action Crime Drama Thriller dc comics crime fi...
4       Action Adventure Science Fiction based on nove...
                              ...                        
4798    Action Crime Thriller united states\u2013mexic...
4799    Comedy Romance  A newlywed couple's honeymoon ...
4800    Comedy Drama Romance TV Movie date love at fir...
4801      A New Yorker in Shanghai Daniel Henney Eliza...
4802    Documentary obsession camcorder crush dream gi...
Length: 4803, dtype: object

In [10]:
feature_selection = TfidfVectorizer(lowercase=True, stop_words='english')

In [11]:
feature_selection.fit(combine_feature)

In [12]:
x = feature_selection.transform(combine_feature)

### Cosine Similarity

In [13]:
# getting the similarity score using cosine similarityabs

similarity = cosine_similarity(x)

In [14]:
similarity.shape

(4803, 4803)

In [15]:
# getting the movies name by user

movies_name = input('Inter the Movies Name: ')

Inter the Movies Name: avatar


In [16]:
# creating a list with all the movies names given in the datasets
list_of_titles = df['title'].tolist()

In [17]:
# finding the movies name given by user
close_match = difflib.get_close_matches(movies_name, list_of_titles, n=1)[0]

In [18]:
# find the index of the movie title

index_of_movie = df[df['title'] == close_match].index.item()

In [20]:
similarity_score = list(enumerate(similarity[index_of_movie]))

In [21]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key=lambda x: x[1], reverse=True)

In [22]:
# print the name of similar movies based on the index

print('Movie suggedted for You:')

i = 0
for movies in sorted_similar_movies:
    i += 1
    index  = movies[0]
    movie_name = list_of_titles[index]
    if i < 30:
        print(movie_name)

Movie suggedted for You:
Avatar
Alien
Guardians of the Galaxy
Aliens
Star Trek Beyond
Star Trek Into Darkness
Galaxy Quest
Alien³
Gravity
Cargo
Trekkies
Moonraker
Jason X
Pocahontas
Space Cowboys
Lockout
Machete Kills
The Helix... Loaded
Event Horizon
Space Dogs
Gettysburg
Clash of the Titans
The Astronaut's Wife
Planet of the Apes
Star Wars: Clone Wars: Volume 1
The Right Stuff
Imaginary Heroes
Wing Commander
Star Trek


## 

## Creating Movie Recommendation system

In [23]:
def Similar_Movies():
    name = input('Inter Movies Name: ')
    movie_name = difflib.get_close_matches(name, list_of_titles, n=1)[0]
    index = list_of_titles.index(movie_name)
    
    similarity_score = list(enumerate(similarity[index]))
    sorted_similar_movies = sorted(similarity_score, key=lambda x: x[1], reverse=True)
    
    i = 0
    for movies in sorted_similar_movies:
        i += 1
        index  = movies[0]
        movies = list_of_titles[index]
        if movies != movie_name:
            if i < 30:
                print(movies)

In [24]:
Similar_Movies()

Inter Movies Name: Guardians of the Galaxy
Avatar
Star Trek Beyond
Star Trek Into Darkness
The Chronicles of Riddick
The Iron Giant
Moonraker
Space Dogs
Captain America: The Winter Soldier
Alien
Ant-Man
Avengers: Age of Ultron
Wing Commander
Gravity
Captain America: The First Avenger
The Avengers
Cargo
Zathura: A Space Adventure
Space Cowboys
Stargate: The Ark of Truth
The Helix... Loaded
X-Men: Days of Future Past
The Words
Home
X-Men: Apocalypse
Jason X
Iron Man
X-Men: The Last Stand
X-Men
