
##Content-Based Recommendation System








In [None]:
# +-------------------------------+
# |           Start               |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Load datasets: movies.csv &   |
# | credits.csv                   |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Merge datasets on 'title'     |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Select relevant columns:       |
# | movie_id, title, overview,    |
# | genres, keywords, cast, crew  |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Preprocess data:               |
# | - Convert JSON strings to     |
# |   lists                       |
# | - Keep top 3 cast members     |
# | - Extract director            |
# | - Remove spaces from names    |
# | - Tokenize overview text      |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Combine text fields into a     |
# | single 'tags' column           |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Vectorize 'tags' using         |
# | CountVectorizer (max 5000 features) |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Compute cosine similarity      |
# | matrix for all movies          |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Define recommendation function |
# | - Find movie index             |
# | - Retrieve similarity scores   |
# | - Sort and select top 5 movies |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Test recommendation function   |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# | Save movie list and similarity |
# | matrix using pickle            |
# +-------------------------------+
#               |
#               v
# +-------------------------------+
# |             End                |
# +-------------------------------+


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Importing required libraries

In [10]:

import numpy as np  # for numerical operations
import pandas as pd  # for data manipulation and analysis (mainly using DataFrames)
import os  # for interacting with the operating system (e.g., reading files)


## Reading the movies and credits data

In [11]:

movies = pd.read_csv('/content/tmdb_5000_movies.csv')
credits = pd.read_csv("/content/tmdb_5000_credits.csv")


## Display first 2 rows of the movies dataframe

In [12]:

movies.head(2)


Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500


## Checking shape (number of rows and columns) of movies data

In [13]:

movies.shape



(4803, 20)

## Previewing the credits dataset

In [14]:

credits.head()


Unnamed: 0,movie_id,title,cast,crew
0,19995,Avatar,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,285,Pirates of the Caribbean: At World's End,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,206647,Spectre,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,49026,The Dark Knight Rises,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,49529,John Carter,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


## Merging both datasets on the "title" column to get a single dataset


In [15]:
movies = movies.merge(credits, on='title')


## Displaying the combined dataset

In [16]:

movies.head()


Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,...,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,movie_id,cast,crew
0,237000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.avatarmovie.com/,19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...",...,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,19995,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,300000000,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",http://disney.go.com/disneypictures/pirates/,285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...",...,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,285,"[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,245000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://www.sonypictures.com/movies/spectre/,206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",en,Spectre,A cryptic message from Bondâ€™s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...",...,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,206647,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,250000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",http://www.thedarkknightrises.com/,49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...",...,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,49026,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,260000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",http://movies.disney.com/john-carter,49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]",...,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,49529,"[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


## Selecting only the required columns for the recommendation system


In [17]:
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew',"homepage"	]]
movies.head()


Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,homepage
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",http://www.avatarmovie.com/
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",http://disney.go.com/disneypictures/pirates/
2,206647,Spectre,A cryptic message from Bondâ€™s past sends him o...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...",http://www.sonypictures.com/movies/spectre/
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...",http://www.thedarkknightrises.com/
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de...",http://movies.disney.com/john-carter


## Importing Abstract Syntax Trees (for safely evaluating strings that look like lists/dictionaries)


In [18]:
import ast



## Function to extract only the 'name' field from a stringified list of dictionaries (e.g., genres, keywords)


In [19]:
def convert_to_list(text):
    tempL = []
    for i in ast.literal_eval(text):
        tempL.append(i['name'])
    return tempL


## Dropping rows with missing (NaN) values to clean the data

In [20]:

movies.dropna(inplace=True)



## Applying the convert function to 'genres' column

In [21]:


movies['genres'] = movies['genres'].apply(convert_to_list)
movies.head()



Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,homepage
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",http://www.avatarmovie.com/
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",http://disney.go.com/disneypictures/pirates/
2,206647,Spectre,A cryptic message from Bondâ€™s past sends him o...,"[Action, Adventure, Crime]","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...",http://www.sonypictures.com/movies/spectre/
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[Action, Crime, Drama, Thriller]","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...",http://www.thedarkknightrises.com/
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[Action, Adventure, Science Fiction]","[{""id"": 818, ""name"": ""based on novel""}, {""id"":...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de...",http://movies.disney.com/john-carter


## Applying the same function to 'keywords' column


In [22]:


movies['keywords'] = movies['keywords'].apply(convert_to_list)
movies.head()




Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,homepage
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, Science Fiction]","[culture clash, future, space war, space colon...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de...",http://www.avatarmovie.com/
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[ocean, drug abuse, exotic island, east india ...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...",http://disney.go.com/disneypictures/pirates/
2,206647,Spectre,A cryptic message from Bondâ€™s past sends him o...,"[Action, Adventure, Crime]","[spy, based on novel, secret agent, sequel, mi...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...",http://www.sonypictures.com/movies/spectre/
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[Action, Crime, Drama, Thriller]","[dc comics, crime fighter, terrorist, secret i...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...",http://www.thedarkknightrises.com/
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[Action, Adventure, Science Fiction]","[based on novel, mars, medallion, space travel...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de...",http://movies.disney.com/john-carter


## Function to get top 3 cast members from the list


In [23]:

def convert_to_list_1(text):
    tempL = []
    counter = 0
    for i in ast.literal_eval(text):
        if counter < 3:
            tempL.append(i['name'])
        counter += 1
    return tempL



## Extracting top 3 cast members


In [24]:


movies['cast'] = movies['cast'].apply(convert_to_list_1)
movies['cast'] = movies['cast'].apply(lambda x: x[0:3])




## Function to extract the director from the crew list


In [25]:
def fetch_director(text):
    tempL = []
    for i in ast.literal_eval(text):
        if i['job'] == 'Director':
            tempL.append(i['name'])
    return tempL


## Applying the function to get directors


In [26]:
movies['crew'] = movies['crew'].apply(fetch_director)



## Sample records to see how the data looks


In [27]:
movies.sample(15)



Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,homepage
4654,78307,On the Outs,Follows the choices made by three young women ...,[Drama],"[prison, drug dealer, single mother, teenage p...","[Judy Marte, Paola Mendoza, Flaco Navaja]","[Lori Silverbush, Michael Skolnik]",http://www.ontheouts.com/
2419,9792,The Hills Have Eyes,Based on Wes Craven's 1977 suspenseful cult cl...,"[Horror, Thriller]","[ambush, new mexico, van, family holiday, axe ...","[Aaron Stanford, Kathleen Quinlan, Vinessa Shaw]",[Alexandre Aja],http://www.foxsearchlight.com/thehillshaveeyes/
2060,44113,Stone,Parole officer Jack Mabry has only a few weeks...,[Drama],"[prison, fire, manipulation, seduction, vegeta...","[Frances Conroy, Robert De Niro, Edward Norton]",[John Curran],http://www.stonemovie.com/stone.html
1547,10637,Remember the Titans,After leading his football team to 15 winning ...,[Drama],"[ku klux klan, american football, trainer, spo...","[Denzel Washington, Will Patton, Ryan Hurst]",[Boaz Yakin],http://movies.disney.com/remember-the-titans
2121,15655,The Tigger Movie,"As it happens, everybody - Pooh, Piglet, Eeyor...","[Animation, Family]","[owl, donkey, tiger, piglet, bear, rabbit, wom...","[Jim Cummings, Nikita Hopkins, Ken Sansom]",[Jun Falkenstein],http://www.tigger.com
1746,4964,Knocked Up,"For fun loving party animal Ben Stone, the las...","[Comedy, Romance, Drama]","[alcohol, one-night stand, bed, pregnancy and ...","[Seth Rogen, Katherine Heigl, Leslie Mann]",[Judd Apatow],http://knockedupmovie.com/
4525,41144,To Save A Life,Jake Taylor has everything. He has a beautiful...,[Drama],"[christian, independent film]","[Randy Wayne, Deja Kreutzberg, Joshua Weigel]",[Brian Baugh],http://tosavealifemovie.com/
2380,37903,The White Ribbon,Strange events happen in a small village in th...,"[Crime, Drama, Mystery]","[germany, child abuse, pastor, propaganda, chi...","[Ursina Lardi, Ulrich Tukur, Burghart KlauÃŸner]",[Michael Haneke],http://www.sonyclassics.com/thewhiteribbon/
1702,315664,Florence Foster Jenkins,"The story of Florence Foster Jenkins, a New Yo...","[Comedy, Drama]","[opera, biography, singing false]","[Meryl Streep, Hugh Grant, Rebecca Ferguson]",[Stephen Frears],http://www.florencefosterjenkinsmovie.com/
2151,6933,One Missed Call,Several people start receiving voice-mails fro...,"[Horror, Mystery, Thriller]","[candy, loss of sister, mobile phone, answerin...","[Shannyn Sossamon, Edward Burns, Ana Claudia T...",[Eric Valette],http://onemissedcallmovie.warnerbros.com/


## Function to remove spaces in multi-word names (e.g., "Tom Cruise" â†’ "TomCruise")


In [28]:
def collapse(L):
    L1 = []
    for i in L:
        L1.append(i.replace(" ", ""))
    return L1



## Removing spaces in names to treat them as single tokens


In [29]:
movies['cast'] = movies['cast'].apply(collapse)
movies['crew'] = movies['crew'].apply(collapse)
movies['genres'] = movies['genres'].apply(collapse)
movies['keywords'] = movies['keywords'].apply(collapse)


In [30]:
movies['genres'][0]

['Action', 'Adventure', 'Fantasy', 'ScienceFiction']

In [31]:
movies.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,homepage
0,19995,Avatar,"In the 22nd century, a paraplegic Marine is di...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron],http://www.avatarmovie.com/
1,285,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski],http://disney.go.com/disneypictures/pirates/
2,206647,Spectre,A cryptic message from Bondâ€™s past sends him o...,"[Action, Adventure, Crime]","[spy, basedonnovel, secretagent, sequel, mi6, ...","[DanielCraig, ChristophWaltz, LÃ©aSeydoux]",[SamMendes],http://www.sonypictures.com/movies/spectre/
3,49026,The Dark Knight Rises,Following the death of District Attorney Harve...,"[Action, Crime, Drama, Thriller]","[dccomics, crimefighter, terrorist, secretiden...","[ChristianBale, MichaelCaine, GaryOldman]",[ChristopherNolan],http://www.thedarkknightrises.com/
4,49529,John Carter,"John Carter is a war-weary, former military ca...","[Action, Adventure, ScienceFiction]","[basedonnovel, mars, medallion, spacetravel, p...","[TaylorKitsch, LynnCollins, SamanthaMorton]",[AndrewStanton],http://movies.disney.com/john-carter


## Splitting the overview text into individual words


In [32]:

movies['overview'] = movies['overview'].apply(lambda x: x.split())




In [33]:
movies['overview'][0]

['In',
 'the',
 '22nd',
 'century,',
 'a',
 'paraplegic',
 'Marine',
 'is',
 'dispatched',
 'to',
 'the',
 'moon',
 'Pandora',
 'on',
 'a',
 'unique',
 'mission,',
 'but',
 'becomes',
 'torn',
 'between',
 'following',
 'orders',
 'and',
 'protecting',
 'an',
 'alien',
 'civilization.']

In [34]:
movies.head()

Unnamed: 0,movie_id,title,overview,genres,keywords,cast,crew,homepage
0,19995,Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[Action, Adventure, Fantasy, ScienceFiction]","[cultureclash, future, spacewar, spacecolony, ...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron],http://www.avatarmovie.com/
1,285,Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[Adventure, Fantasy, Action]","[ocean, drugabuse, exoticisland, eastindiatrad...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski],http://disney.go.com/disneypictures/pirates/
2,206647,Spectre,"[A, cryptic, message, from, Bondâ€™s, past, send...","[Action, Adventure, Crime]","[spy, basedonnovel, secretagent, sequel, mi6, ...","[DanielCraig, ChristophWaltz, LÃ©aSeydoux]",[SamMendes],http://www.sonypictures.com/movies/spectre/
3,49026,The Dark Knight Rises,"[Following, the, death, of, District, Attorney...","[Action, Crime, Drama, Thriller]","[dccomics, crimefighter, terrorist, secretiden...","[ChristianBale, MichaelCaine, GaryOldman]",[ChristopherNolan],http://www.thedarkknightrises.com/
4,49529,John Carter,"[John, Carter, is, a, war-weary,, former, mili...","[Action, Adventure, ScienceFiction]","[basedonnovel, mars, medallion, spacetravel, p...","[TaylorKitsch, LynnCollins, SamanthaMorton]",[AndrewStanton],http://movies.disney.com/john-carter


## Creating a new column 'tags' that combines overview, genres, keywords, cast, and crew


In [35]:

movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']



In [36]:
movies['tags'][0]

['In',
 'the',
 '22nd',
 'century,',
 'a',
 'paraplegic',
 'Marine',
 'is',
 'dispatched',
 'to',
 'the',
 'moon',
 'Pandora',
 'on',
 'a',
 'unique',
 'mission,',
 'but',
 'becomes',
 'torn',
 'between',
 'following',
 'orders',
 'and',
 'protecting',
 'an',
 'alien',
 'civilization.',
 'Action',
 'Adventure',
 'Fantasy',
 'ScienceFiction',
 'cultureclash',
 'future',
 'spacewar',
 'spacecolony',
 'society',
 'spacetravel',
 'futuristic',
 'romance',
 'space',
 'alien',
 'tribe',
 'alienplanet',
 'cgi',
 'marine',
 'soldier',
 'battle',
 'loveaffair',
 'antiwar',
 'powerrelations',
 'mindandsoul',
 '3d',
 'SamWorthington',
 'ZoeSaldana',
 'SigourneyWeaver',
 'JamesCameron']

## Dropping the original columns that were combined into 'tags'


In [37]:
new = movies.drop(columns=['overview', 'genres', 'keywords', 'cast', 'crew'])


## Joining the list of words in 'tags' into a single string (space-separated)


In [38]:
new['tags'] = new['tags'].apply(lambda x: " ".join(x))
new.head()


Unnamed: 0,movie_id,title,homepage,tags
0,19995,Avatar,http://www.avatarmovie.com/,"In the 22nd century, a paraplegic Marine is di..."
1,285,Pirates of the Caribbean: At World's End,http://disney.go.com/disneypictures/pirates/,"Captain Barbossa, long believed to be dead, ha..."
2,206647,Spectre,http://www.sonypictures.com/movies/spectre/,A cryptic message from Bondâ€™s past sends him o...
3,49026,The Dark Knight Rises,http://www.thedarkknightrises.com/,Following the death of District Attorney Harve...
4,49529,John Carter,http://movies.disney.com/john-carter,"John Carter is a war-weary, former military ca..."


In [39]:
new['tags']

Unnamed: 0,tags
0,"In the 22nd century, a paraplegic Marine is di..."
1,"Captain Barbossa, long believed to be dead, ha..."
2,A cryptic message from Bondâ€™s past sends him o...
3,Following the death of District Attorney Harve...
4,"John Carter is a war-weary, former military ca..."
...,...
4790,Josh's life is pretty much in the toilet. He's...
4797,Recently dumped by his girlfirend for another ...
4802,Friends/fledgling entrepreneurs invent a devic...
4806,"""Signed, Sealed, Delivered"" introduces a dedic..."


## Converting text data into numerical vectors using Bag of Words model (with max 5000 features and removing English stopwords)


In [40]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000, stop_words='english')
vector = cv.fit_transform(new['tags']).toarray()



In [41]:
vector

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

## Checking shape of the resulting matrix (rows = number of movies, columns = vocabulary size)


In [42]:
vector.shape


(1713, 5000)

## Calculating cosine similarity between all movie vectors


In [43]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(vector)



## Sample similarity values


In [44]:
similarity[0]

array([1.        , 0.08257228, 0.05504819, ..., 0.05423261, 0.0225877 ,
       0.0243975 ])

* Because cosine_similarity computes similarity between every pair of rows (movies) in vector.

* Each row corresponds to one movieâ€™s feature vector.

* So you get similarity scores for all pairs of 4806 movies.

In [45]:
similarity.shape


(1713, 1713)

## Helper function to recommend similar movies based on cosine similarity


In [48]:

def recommend(movie):
    # Find index of the given movie
    index = new[new['title'] == movie].index[0]
    # Get similarity scores for that movie with all other movies
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
    # Print top 5 most similar movies (excluding the input movie itself)
    for i in distances[1:6]:
        print(new.iloc[i[0]].title)



##This is how recommend function works:

###Step-by-step explanation of the **`recommend`** function

---

### Purpose:

This function **finds and prints movies similar to the given movie** based on their content similarity (using cosine similarity scores).

---

### Breakdown of the code:

```python
def recommend(movie):
```

* This defines a function named `recommend` that takes one input parameter, `movie` â€” the movie title you want recommendations for.

---

```python
index = new[new['title'] == movie].index[0]
```

* This line finds the **row index** of the movie in the dataset `new` where the title matches the given `movie` name.
* `new['title'] == movie` creates a boolean filter.
* `.index[0]` extracts the first matching index.
* This index will be used to look up similarity scores for that movie.

---

```python
distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])
```

* `similarity` is a matrix where each row corresponds to a movie, and each column contains similarity scores between that movie and every other movie.
* `similarity[index]` gives the similarity scores of the input movie with all other movies.
* `enumerate()` pairs each similarity score with its movie index, creating tuples like `(movie_index, similarity_score)`.
* `list()` converts the enumerate object into a list of tuples.
* `sorted(..., reverse=True, key=lambda x: x[1])` sorts this list **in descending order** by the similarity score (`x[1]`), so the most similar movies come first.

---

```python
for i in distances[1:6]:
    print(new.iloc[i[0]].title)
```

* `distances[1:6]` slices the sorted list to get the **top 5 movies** after the first one.
* We skip the first element `distances[0]` because that is the movie itself (with similarity = 1).
* The loop goes through these 5 most similar movies.
* `new.iloc[i[0]].title` fetches the title of the movie using the index `i[0]` from the original dataframe.
* `print()` displays the recommended movie titles.

---

### Summary:

* You give the function a movie name.
* It finds the similarity scores of that movie with all others.
* It sorts the movies by similarity score, highest first.
* It prints the top 5 similar movie titles (excluding the movie you searched for).


## Example usage of the recommendation system


In [53]:
recommend('Avatar')

Aliens vs Predator: Requiem
Battle: Los Angeles
Ender's Game
Apollo 18
Jupiter Ascending


## Saving the data and similarity matrix to files so that they can be used in a web app or deployed system


In [54]:
import pickle
pickle.dump(new, open('movie_list.pkl', 'wb'))
pickle.dump(similarity, open('similarity.pkl', 'wb'))


## Explaination for Recommend function

In [55]:
new[new['title'] == "X-Men: Days of Future Past"].index[0]

np.int64(46)

In [56]:
list(enumerate(similarity[46]))

[(0, np.float64(0.04303314829119352)),
 (1, np.float64(0.04737793696791343)),
 (2, np.float64(0.04737793696791343)),
 (3, np.float64(0.046225016352102424)),
 (4, np.float64(0.062257280636469035)),
 (5, np.float64(0.07622159339667059)),
 (6, np.float64(0.01571348402636772)),
 (7, np.float64(0.07939884455672182)),
 (8, np.float64(0.024056261216234404)),
 (9, np.float64(0.08114408259335795)),
 (10, np.float64(0.09938079899999065)),
 (11, np.float64(0.04103049699311091)),
 (12, np.float64(0.050539869571926896)),
 (13, np.float64(0.056613851707229795)),
 (14, np.float64(0.15660307954993902)),
 (15, np.float64(0.04303314829119352)),
 (16, np.float64(0.06751595780557779)),
 (17, np.float64(0.036369648372665396)),
 (18, np.float64(0.06600983619844496)),
 (19, np.float64(0.07804881763180782)),
 (20, np.float64(0.12634967392981725)),
 (21, np.float64(0.04888237167378443)),
 (22, np.float64(0.05237828008789241)),
 (23, np.float64(0.04593152121746254)),
 (24, np.float64(0.08948747402853657)),
 (25

In [57]:
sorted(list(enumerate(similarity[46])), reverse=True, key=lambda x: x[1])[0:5]

[(46, np.float64(0.9999999999999998)),
 (891, np.float64(0.3132061590998779)),
 (526, np.float64(0.2697815494246991)),
 (775, np.float64(0.26057865332352387)),
 (387, np.float64(0.18257418583505539))]