# Movie Recommendation System


"""
This project builds a content-based Movie Recommendation System using Python, Pandas, and Scikit-Learn.
It processes movie metadata, extracts relevant features (genres, keywords, cast, and crew),
and applies Machine learning (ML) techniques to suggest similar movies.
"""

In [1]:
# importing Libraries
import os
import pandas as pd
import numpy as np

In [5]:
# importing dataset
movies = pd.read_csv('/Users/jestin/Documents/data scince/machine learning/datasets/movies.csv')
movies.head(2)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500


In [6]:
movies.shape

(4803, 20)

In [7]:
# checking for duplicates
movies.duplicated().sum()

0

In [8]:
# checking for missing values in total
movies.isnull().sum().sum()

3941

In [9]:
# checking missing values columnwise
movies.isnull().sum()

budget                     0
genres                     0
homepage                3091
id                         0
keywords                   0
original_language          0
original_title             0
overview                   3
popularity                 0
production_companies       0
production_countries       0
release_date               1
revenue                    0
runtime                    2
spoken_languages           0
status                     0
tagline                  844
title                      0
vote_average               0
vote_count                 0
dtype: int64

In [10]:
# importing second datset 
credits= pd.read_csv('/Users/jestin/Documents/data scince/machine learning/datasets/credits.csv')
credits.head(3)

Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."


In [19]:
# size of the dataset
credits.shape

(4803, 4)

In [20]:
# checking for duplicates
credits.duplicated().sum()

0

In [21]:
# checking for missing values
credits.isnull().sum()

movie_id    0
title       0
cast        0
crew        0
dtype: int64

In [22]:
movies.columns

Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count'],
      dtype='object')

In [23]:
credits.columns

Index(['movie_id', 'title', 'cast', 'crew'], dtype='object')

### merging both tables

In [11]:
# merging both tables based on title column
movies = movies.merge(credits,on = 'title')
movies.head(2)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."


In [25]:
# after merging
movies.shape

(4809, 23)

In [26]:
movies.isnull().sum()

budget                     0
genres                     0
homepage                3096
id                         0
keywords                   0
original_language          0
original_title             0
overview                   3
popularity                 0
production_companies       0
production_countries       0
release_date               1
revenue                    0
runtime                    2
spoken_languages           0
status                     0
tagline                  844
title                      0
vote_average               0
vote_count                 0
movie_id                   0
cast                       0
crew                       0
dtype: int64

In [27]:
movies.isnull().sum()/len(movies)*100

budget                   0.000000
genres                   0.000000
homepage                64.379289
id                       0.000000
keywords                 0.000000
original_language        0.000000
original_title           0.000000
overview                 0.062383
popularity               0.000000
production_companies     0.000000
production_countries     0.000000
release_date             0.020794
revenue                  0.000000
runtime                  0.041589
spoken_languages         0.000000
status                   0.000000
tagline                 17.550426
title                    0.000000
vote_average             0.000000
vote_count               0.000000
movie_id                 0.000000
cast                     0.000000
crew                     0.000000
dtype: float64

In [28]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4809 entries, 0 to 4808
Data columns (total 23 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   budget                4809 non-null   int64  
 1   genres                4809 non-null   object 
 2   homepage              1713 non-null   object 
 3   id                    4809 non-null   int64  
 4   keywords              4809 non-null   object 
 5   original_language     4809 non-null   object 
 6   original_title        4809 non-null   object 
 7   overview              4806 non-null   object 
 8   popularity            4809 non-null   float64
 9   production_companies  4809 non-null   object 
 10  production_countries  4809 non-null   object 
 11  release_date          4808 non-null   object 
 12  revenue               4809 non-null   int64  
 13  runtime               4807 non-null   float64
 14  spoken_languages      4809 non-null   object 
 15  status               

In [29]:
movies['status'].value_counts()

status
Released           4801
Rumored               5
Post Production       3
Name: count, dtype: int64

In [6]:
# Given that the majority of movies in the dataset are in English, language may not be a significant feature
#for the recommendation model
(movies['original_language'].value_counts()/len(movies)*100).head()

original_language
en    93.782491
fr     1.455604
es     0.665419
zh     0.561447
de     0.561447
Name: count, dtype: float64

## Selecting significent Features for Recommendation Systems
#### droping other feautes

In [31]:
movies.columns

Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'vote_average',
       'vote_count', 'movie_id', 'cast', 'crew'],
      dtype='object')

In [32]:
# 'genres','movie_id','keywords','title','overview','cast','crew'

In [12]:
movies = movies[['genres','movie_id','keywords','title','overview','cast','crew']]
movies.tail(2)

Unnamed: 0,genres,movie_id,keywords,title,overview,cast,crew
4807,[],126186,[],Shanghai Calling,When ambitious New York attorney Sam is sent t...,"[{""cast_id"": 3, ""character"": ""Sam"", ""credit_id...","[{""credit_id"": ""52fe4ad9c3a368484e16a36b"", ""de..."
4808,"[{""id"": 99, ""name"": ""Documentary""}]",25975,"[{""id"": 1523, ""name"": ""obsession""}, {""id"": 224...",My Date with Drew,Ever since the second grade when he first saw ...,"[{""cast_id"": 3, ""character"": ""Herself"", ""credi...","[{""credit_id"": ""58ce021b9251415a390165d9"", ""de..."


In [8]:
movies['genres'][285]

'[{"id": 28, "name": "Action"}, {"id": 35, "name": "Comedy"}, {"id": 80, "name": "Crime"}]'

In [36]:
movies['genres'][300]

'[{"id": 12, "name": "Adventure"}, {"id": 28, "name": "Action"}, {"id": 53, "name": "Thriller"}, {"id": 878, "name": "Science Fiction"}]'

In [15]:
# import package ast (abstract syntax Trees)
import ast

In [25]:


def convert(text):
    synt =[]
    for i in ast.literal_eval(text):
        synt.append(i['name'])
    return synt

In [17]:
movies['genres']= movies['genres'].apply(convert)

In [18]:
movies['genres'][285]

['Action', 'Comedy', 'Crime']

In [19]:
movies['genres'][300]

['Adventure', 'Action', 'Thriller', 'Science Fiction']

In [41]:
movies['keywords'][1]

'[{"id": 270, "name": "ocean"}, {"id": 726, "name": "drug abuse"}, {"id": 911, "name": "exotic island"}, {"id": 1319, "name": "east india trading company"}, {"id": 2038, "name": "love of one\'s life"}, {"id": 2052, "name": "traitor"}, {"id": 2580, "name": "shipwreck"}, {"id": 2660, "name": "strong woman"}, {"id": 3799, "name": "ship"}, {"id": 5740, "name": "alliance"}, {"id": 5941, "name": "calypso"}, {"id": 6155, "name": "afterlife"}, {"id": 6211, "name": "fighter"}, {"id": 12988, "name": "pirate"}, {"id": 157186, "name": "swashbuckler"}, {"id": 179430, "name": "aftercreditsstinger"}]'

In [26]:
movies['keywords']= movies['keywords'].apply(convert)

In [32]:
movies['keywords'][1]

['ocean',
 'drug abuse',
 'exotic island',
 'east india trading company',
 "love of one's life",
 'traitor',
 'shipwreck',
 'strong woman',
 'ship',
 'alliance',
 'calypso',
 'afterlife',
 'fighter',
 'pirate',
 'swashbuckler',
 'aftercreditsstinger']

In [33]:
movies['cast'][0]

['Sam Worthington',
 'Zoe Saldana',
 'Sigourney Weaver',
 'Stephen Lang',
 'Michelle Rodriguez']

In [28]:
def convert5(text):
    syntex = []
    counter = 0
    for i in ast.literal_eval(text):
        if counter <5:
            syntex.append(i['name'])
        counter +=1
    return syntex
        

In [29]:
movies['cast'] = movies['cast'].apply(convert5)

In [30]:
movies['cast'][0]

['Sam Worthington',
 'Zoe Saldana',
 'Sigourney Weaver',
 'Stephen Lang',
 'Michelle Rodriguez']

In [20]:
movies.head(3)

Unnamed: 0,genres,movie_id,keywords,title,overview,cast,crew
0,"[Action, Adventure, Fantasy, Science Fiction]",19995,"[culture clash, future, space war, space colon...",Avatar,"In the 22nd century, a paraplegic Marine is di...","[Sam Worthington, Zoe Saldana, Sigourney Weave...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,"[Adventure, Fantasy, Action]",285,"[ocean, drug abuse, exotic island, east india ...",Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[Johnny Depp, Orlando Bloom, Keira Knightley, ...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,"[Action, Adventure, Crime]",206647,"[spy, based on novel, secret agent, sequel, mi...",Spectre,A cryptic message from Bond’s past sends him o...,"[Daniel Craig, Christoph Waltz, Léa Seydoux, R...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."


In [22]:
#Extracting only  Director Names from the Crew
def director(text):
    dir =[]
    for i in ast.literal_eval(text):
        if i['job']== 'Director':
            dir.append(i['name'])
    return dir
        
        

In [20]:
movies['crew'] = movies['crew'].apply(director)

In [31]:
movies.head(4)

Unnamed: 0,genres,movie_id,keywords,title,overview,cast,crew
0,"[Action, Adventure, Fantasy, Science Fiction]",19995,"[culture clash, future, space war, space colon...",Avatar,"In the 22nd century, a paraplegic Marine is di...","[Sam Worthington, Zoe Saldana, Sigourney Weave...",[James Cameron]
1,"[Adventure, Fantasy, Action]",285,"[ocean, drug abuse, exotic island, east india ...",Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[Johnny Depp, Orlando Bloom, Keira Knightley, ...",[Gore Verbinski]
2,"[Action, Adventure, Crime]",206647,"[spy, based on novel, secret agent, sequel, mi...",Spectre,A cryptic message from Bond’s past sends him o...,"[Daniel Craig, Christoph Waltz, Léa Seydoux, R...",[Sam Mendes]
3,"[Action, Crime, Drama, Thriller]",49026,"[dc comics, crime fighter, terrorist, secret i...",The Dark Knight Rises,Following the death of District Attorney Harve...,"[Christian Bale, Michael Caine, Gary Oldman, A...",[Christopher Nolan]


In [23]:
movies['crew'][1]

['Gore Verbinski']

In [34]:
# This function combines first names and last names into a single string   
# by removing any spaces. This helps prevent collisions during keyword searches.  

def collapse(A):  
    return [i.replace(" ", "") for i in A]  

In [35]:
movies.columns

Index(['genres', 'movie_id', 'keywords', 'title', 'overview', 'cast', 'crew'], dtype='object')

In [37]:
movies['genres']=movies['genres'].apply(collapse)
movies['keywords']=movies['keywords'].apply(collapse)
movies['cast']=movies['cast'].apply(collapse)
movies['crew']=movies['crew'].apply(collapse)

In [38]:
movies.head()

Unnamed: 0,genres,movie_id,keywords,title,overview,cast,crew
0,"[Action, Adventure, Fantasy, ScienceFiction]",19995,"[cultureclash, future, spacewar, spacecolony, ...",Avatar,"In the 22nd century, a paraplegic Marine is di...","[SamWorthington, ZoeSaldana, SigourneyWeaver, ...",[JamesCameron]
1,"[Adventure, Fantasy, Action]",285,"[ocean, drugabuse, exoticisland, eastindiatrad...",Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[JohnnyDepp, OrlandoBloom, KeiraKnightley, Ste...",[GoreVerbinski]
2,"[Action, Adventure, Crime]",206647,"[spy, basedonnovel, secretagent, sequel, mi6, ...",Spectre,A cryptic message from Bond’s past sends him o...,"[DanielCraig, ChristophWaltz, LéaSeydoux, Ralp...",[SamMendes]
3,"[Action, Crime, Drama, Thriller]",49026,"[dccomics, crimefighter, terrorist, secretiden...",The Dark Knight Rises,Following the death of District Attorney Harve...,"[ChristianBale, MichaelCaine, GaryOldman, Anne...",[ChristopherNolan]
4,"[Action, Adventure, ScienceFiction]",49529,"[basedonnovel, mars, medallion, spacetravel, p...",John Carter,"John Carter is a war-weary, former military ca...","[TaylorKitsch, LynnCollins, SamanthaMorton, Wi...",[AndrewStanton]


In [41]:
# searching null values

movies.isnull().sum()/len(movies)*100

genres      0.000000
movie_id    0.000000
keywords    0.000000
title       0.000000
overview    0.062383
cast        0.000000
crew        0.000000
dtype: float64

In [42]:
movies.dropna(inplace = True)

In [43]:
movies.isnull().sum()/len(movies)*100

genres      0.0
movie_id    0.0
keywords    0.0
title       0.0
overview    0.0
cast        0.0
crew        0.0
dtype: float64

In [47]:
movies['overview'][1000]

"A highly-evolved planet, whose denizens feel no emotion and reproduce by cloning, plans to take over Earth from the inside by sending an operative, fashioned with a humming, mechanical penis, to impregnate an earthling and stay until the birth. The alien, Harold Anderson, goes to Phoenix as a banker and sets to work finding a mate. His approaches to women are inept, and the humming phallus doesn't help, but on the advice of a banking colleague, he cruises an AA meeting, meets Susan, and somehow convinces her to marry. The clock starts to tick: will she conceive, have a baby, and lose Harold (and the child) to his planet before he discovers emotion and starts to care?"

In [48]:
# transform the content of the overview column in a DataFrame
# by splitting each overview text into a list of words.  

movies['overview'] = movies['overview'].apply(lambda x: x.split())

In [49]:
movies['overview'][1000]

['A',
 'highly-evolved',
 'planet,',
 'whose',
 'denizens',
 'feel',
 'no',
 'emotion',
 'and',
 'reproduce',
 'by',
 'cloning,',
 'plans',
 'to',
 'take',
 'over',
 'Earth',
 'from',
 'the',
 'inside',
 'by',
 'sending',
 'an',
 'operative,',
 'fashioned',
 'with',
 'a',
 'humming,',
 'mechanical',
 'penis,',
 'to',
 'impregnate',
 'an',
 'earthling',
 'and',
 'stay',
 'until',
 'the',
 'birth.',
 'The',
 'alien,',
 'Harold',
 'Anderson,',
 'goes',
 'to',
 'Phoenix',
 'as',
 'a',
 'banker',
 'and',
 'sets',
 'to',
 'work',
 'finding',
 'a',
 'mate.',
 'His',
 'approaches',
 'to',
 'women',
 'are',
 'inept,',
 'and',
 'the',
 'humming',
 'phallus',
 "doesn't",
 'help,',
 'but',
 'on',
 'the',
 'advice',
 'of',
 'a',
 'banking',
 'colleague,',
 'he',
 'cruises',
 'an',
 'AA',
 'meeting,',
 'meets',
 'Susan,',
 'and',
 'somehow',
 'convinces',
 'her',
 'to',
 'marry.',
 'The',
 'clock',
 'starts',
 'to',
 'tick:',
 'will',
 'she',
 'conceive,',
 'have',
 'a',
 'baby,',
 'and',
 'lose',
 

In [50]:
movies.columns

Index(['genres', 'movie_id', 'keywords', 'title', 'overview', 'cast', 'crew'], dtype='object')

In [56]:

movies['tag']= movies['overview']+ movies['keywords']+ movies['genres']+ movies['cast']+ movies['crew']

In [57]:
movies.head()

Unnamed: 0,genres,movie_id,keywords,title,overview,cast,crew,tag
0,"[Action, Adventure, Fantasy, ScienceFiction]",19995,"[cultureclash, future, spacewar, spacecolony, ...",Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[SamWorthington, ZoeSaldana, SigourneyWeaver, ...",[JamesCameron],"[In, the, 22nd, century,, a, paraplegic, Marin..."
1,"[Adventure, Fantasy, Action]",285,"[ocean, drugabuse, exoticisland, eastindiatrad...",Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[JohnnyDepp, OrlandoBloom, KeiraKnightley, Ste...",[GoreVerbinski],"[Captain, Barbossa,, long, believed, to, be, d..."
2,"[Action, Adventure, Crime]",206647,"[spy, basedonnovel, secretagent, sequel, mi6, ...",Spectre,"[A, cryptic, message, from, Bond’s, past, send...","[DanielCraig, ChristophWaltz, LéaSeydoux, Ralp...",[SamMendes],"[A, cryptic, message, from, Bond’s, past, send..."
3,"[Action, Crime, Drama, Thriller]",49026,"[dccomics, crimefighter, terrorist, secretiden...",The Dark Knight Rises,"[Following, the, death, of, District, Attorney...","[ChristianBale, MichaelCaine, GaryOldman, Anne...",[ChristopherNolan],"[Following, the, death, of, District, Attorney..."
4,"[Action, Adventure, ScienceFiction]",49529,"[basedonnovel, mars, medallion, spacetravel, p...",John Carter,"[John, Carter, is, a, war-weary,, former, mili...","[TaylorKitsch, LynnCollins, SamanthaMorton, Wi...",[AndrewStanton],"[John, Carter, is, a, war-weary,, former, mili..."


In [60]:
# assigning a new name 

New_movies = movies.drop(columns=['genres','keywords','overview','cast','crew'])

In [62]:
New_movies

Unnamed: 0,movie_id,title,tag
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin..."
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d..."
2,206647,Spectre,"[A, cryptic, message, from, Bond’s, past, send..."
3,49026,The Dark Knight Rises,"[Following, the, death, of, District, Attorney..."
4,49529,John Carter,"[John, Carter, is, a, war-weary,, former, mili..."
...,...,...,...
4804,9367,El Mariachi,"[El, Mariachi, just, wants, to, play, his, gui..."
4805,72766,Newlyweds,"[A, newlywed, couple's, honeymoon, is, upended..."
4806,231617,"Signed, Sealed, Delivered","[""Signed,, Sealed,, Delivered"", introduces, a,..."
4807,126186,Shanghai Calling,"[When, ambitious, New, York, attorney, Sam, is..."


In [65]:
New_movies['tag']= New_movies['tag'].apply(lambda x: " ".join(x))

In [67]:
New_movies.head()

Unnamed: 0,movie_id,title,tag
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di..."
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha..."
2,206647,Spectre,A cryptic message from Bond’s past sends him o...
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...
4,49529,John Carter,"John Carter is a war-weary, former military ca..."


# Now we have cleaned data for building recommendation System

In [83]:
# feature engeenering required to convert non-numeric data to numeric data 

from sklearn.feature_extraction.text import CountVectorizer  
cv = CountVectorizer(max_features = 5000,stop_words='english',binary = True)
vector = cv.fit_transform(New_movies['tag']).toarray()

In [84]:
vector.shape

(4806, 5000)

# building model- cosine similarity

In [85]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(vector)
similarity.shape

(4806, 4806)

In [86]:
similarity[0]

array([1.        , 0.08827348, 0.06465082, ..., 0.02787473, 0.03178209,
       0.        ])

In [88]:
New_movies.head()

Unnamed: 0,movie_id,title,tag
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di..."
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha..."
2,206647,Spectre,A cryptic message from Bond’s past sends him o...
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...
4,49529,John Carter,"John Carter is a war-weary, former military ca..."


In [89]:
# to find the index of a specific movie
New_movies[New_movies['title']== 'Avatar'].index[0]

0

In [91]:
#to sort the similarities of a specific item in a similarity matrix 
sorted(list(enumerate(similarity[0])),reverse = True)

[(4805, 0.0),
 (4804, 0.03178208630818641),
 (4803, 0.027874733666903028),
 (4802, 0.0657951694959769),
 (4801, 0.033501260508640406),
 (4800, 0.0),
 (4799, 0.06356417261637282),
 (4798, 0.027186362391351845),
 (4797, 0.0),
 (4796, 0.0),
 (4795, 0.0),
 (4794, 0.0),
 (4793, 0.0657951694959769),
 (4792, 0.0),
 (4791, 0.0),
 (4790, 0.03481553119113957),
 (4789, 0.02686076546751268),
 (4788, 0.0),
 (4787, 0.02566635011696727),
 (4786, 0.0),
 (4785, 0.0),
 (4784, 0.04652421051992355),
 (4783, 0.0),
 (4782, 0.030303030303030307),
 (4781, 0.06963106238227915),
 (4780, 0.025949964805384102),
 (4779, 0.0),
 (4778, 0.0),
 (4777, 0.055048188256318034),
 (4776, 0.0),
 (4775, 0.03629770043203139),
 (4774, 0.0),
 (4773, 0.037113480951260276),
 (4772, 0.03481553119113957),
 (4771, 0.04222003309207491),
 (4770, 0.0),
 (4769, 0.0),
 (4768, 0.0),
 (4767, 0.03993615319154359),
 (4766, 0.025391835805951283),
 (4765, 0.0),
 (4764, 0.0),
 (4763, 0.0),
 (4762, 0.0),
 (4761, 0.03413943709994595),
 (4760, 0.0)

# Building Recommendation System

In [97]:
def recommend (movie):
    index = New_movies[New_movies['title']==movie].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True, key = lambda x: x[1])
    for i in distances[1:11]:
        print(New_movies.iloc[i[0]].title)

In [99]:
recommend('Avatar')

Small Soldiers
Jupiter Ascending
Star Trek Into Darkness
Ender's Game
The Fifth Element
Moonraker
The Book of Life
BloodRayne
The Helix... Loaded
Beowulf


In [100]:
recommend('The Dark Knight Rises')

The Dark Knight
Batman Begins
Batman
Batman
Batman Forever
Batman & Robin
Nighthawks
Carlos
Amidst the Devil's Wings
Batman Returns


In [101]:
recommend('Kung Fu Panda 3')

Kung Fu Panda
Kung Fu Panda 2
How to Train Your Dragon 2
Teenage Mutant Ninja Turtles III
My Big Fat Greek Wedding 2
Shark Tale
The Helix... Loaded
Shrek
Alpha and Omega: The Legend of the Saw Tooth Cave
Ishtar
