**Importing the necassary libraries**

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

*Importing and analyzing the dataset*

In [4]:
from google.colab import files
uploaded = files.upload()

Saving movies.csv to movies.csv


In [5]:
my_df = pd.read_csv("movies.csv")

In [None]:
my_df.head()

Unnamed: 0,index,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count,cast,crew,director
0,0,237000000,Action Adventure Fantasy Science Fiction,http://www.avatarmovie.com/,19995,culture clash future space war space colony so...,en,Avatar,"In the 22nd century, a paraplegic Marine is di...",150.437577,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2009-12-10,2787965087,162.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Enter the World of Pandora.,Avatar,7.2,11800,Sam Worthington Zoe Saldana Sigourney Weaver S...,"[{'name': 'Stephen E. Rivkin', 'gender': 0, 'd...",James Cameron
1,1,300000000,Adventure Fantasy Action,http://disney.go.com/disneypictures/pirates/,285,ocean drug abuse exotic island east india trad...,en,Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...",139.082615,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2007-05-19,961000000,169.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"At the end of the world, the adventure begins.",Pirates of the Caribbean: At World's End,6.9,4500,Johnny Depp Orlando Bloom Keira Knightley Stel...,"[{'name': 'Dariusz Wolski', 'gender': 2, 'depa...",Gore Verbinski
2,2,245000000,Action Adventure Crime,http://www.sonypictures.com/movies/spectre/,206647,spy based on novel secret agent sequel mi6,en,Spectre,A cryptic message from Bond’s past sends him o...,107.376788,"[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",2015-10-26,880674609,148.0,"[{""iso_639_1"": ""fr"", ""name"": ""Fran\u00e7ais""},...",Released,A Plan No One Escapes,Spectre,6.3,4466,Daniel Craig Christoph Waltz L\u00e9a Seydoux ...,"[{'name': 'Thomas Newman', 'gender': 2, 'depar...",Sam Mendes
3,3,250000000,Action Crime Drama Thriller,http://www.thedarkknightrises.com/,49026,dc comics crime fighter terrorist secret ident...,en,The Dark Knight Rises,Following the death of District Attorney Harve...,112.31295,"[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-07-16,1084939099,165.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The Legend Ends,The Dark Knight Rises,7.6,9106,Christian Bale Michael Caine Gary Oldman Anne ...,"[{'name': 'Hans Zimmer', 'gender': 2, 'departm...",Christopher Nolan
4,4,260000000,Action Adventure Science Fiction,http://movies.disney.com/john-carter,49529,based on novel mars medallion space travel pri...,en,John Carter,"John Carter is a war-weary, former military ca...",43.926995,"[{""name"": ""Walt Disney Pictures"", ""id"": 2}]","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2012-03-07,284139100,132.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,"Lost in our world, found in another.",John Carter,6.1,2124,Taylor Kitsch Lynn Collins Samantha Morton Wil...,"[{'name': 'Andrew Stanton', 'gender': 2, 'depa...",Andrew Stanton


In [6]:
my_df.columns

Index(['index', 'budget', 'genres', 'homepage', 'id', 'keywords',
       'original_language', 'original_title', 'overview', 'popularity',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'tagline', 'title',
       'vote_average', 'vote_count', 'cast', 'crew', 'director'],
      dtype='object')

**Feature Selection for Recommending Movies**

In [7]:
features = ['keywords','cast','genres','director']

*Data preprocessing for empty values*

In [8]:
for feature in features:
    my_df[feature] = my_df[feature].fillna('') 

*Combining the selected features to process*

In [9]:
def combine_features(row):
    return row['keywords']+" "+row['cast']+" "+row['genres']+" "+row['director']

In [10]:
my_df["selected_features"] = my_df.apply(combine_features,axis=1)

*Creating the dataframe of combined features*

In [11]:
sf = CountVectorizer() 
count_matrix = sf.fit_transform(my_df["selected_features"])

*Computing the cosine similarity*

In [12]:
cosine_sim = cosine_similarity(count_matrix)

*Let's say user like the movie "The Dark Knight Rises"*

In [13]:
liked_movie = "The Dark Knight Rises"

In [14]:
liked_movie_index = my_df[my_df.title == liked_movie]["index"].values[0]

In [15]:
liked_movie_index

3

*Now we will need to return the movies in descending order of their similarity in order to take out most similar movies.*

In [16]:
reco_movies = list(enumerate(cosine_sim[liked_movie_index]))

In [17]:
reco_movies

[(0, 0.03774256780481986),
 (1, 0.03580574370197164),
 (2, 0.16357216402190616),
 (3, 0.9999999999999997),
 (4, 0.03922322702763681),
 (5, 0.07844645405527362),
 (6, 0.0),
 (7, 0.03580574370197164),
 (8, 0.08362420100070908),
 (9, 0.12009611535381537),
 (10, 0.11766968108291043),
 (11, 0.23533936216582085),
 (12, 0.04089304100547654),
 (13, 0.03922322702763681),
 (14, 0.14824986333222023),
 (15, 0.0),
 (16, 0.03522349768381735),
 (17, 0.04181210050035454),
 (18, 0.07161148740394328),
 (19, 0.04181210050035454),
 (20, 0.04003203845127179),
 (21, 0.04003203845127179),
 (22, 0.0),
 (23, 0.0),
 (24, 0.06933752452815364),
 (25, 0.08770580193070293),
 (26, 0.03706246583305506),
 (27, 0.08006407690254358),
 (28, 0.08006407690254358),
 (29, 0.1254363015010636),
 (30, 0.07692307692307693),
 (31, 0.07844645405527362),
 (32, 0.07412493166611012),
 (33, 0.07412493166611012),
 (34, 0.0),
 (35, 0.08559209850218259),
 (36, 0.08006407690254358),
 (37, 0.0),
 (38, 0.038461538461538464),
 (39, 0.1153846

In [18]:
des_reco_movies = sorted(reco_movies,key=lambda x:x[1], reverse = True)[1:]

**Now let's recommend the top 5 similar movies from the list**

In [19]:
i=0 #Counter

In [20]:
for row in des_reco_movies:
  title = my_df[my_df.index == row[0]]["title"].values[0]
  print(title)
  i+=1
  if i==5:
    break

Batman Begins
The Dark Knight
Amidst the Devil's Wings
The Killer Inside Me
The Prestige


In [21]:
firstRec = "Battleship"

In [22]:
firstRecIndex=my_df[my_df.title==firstRec]["index"].values[0]

In [23]:
firstRecIndex

27

In [24]:
recMovies = list(enumerate(cosine_sim[firstRecIndex]))

In [25]:
recMovies

[(0, 0.15713484026367724),
 (1, 0.14907119849998599),
 (2, 0.08512565307587487),
 (3, 0.08006407690254358),
 (4, 0.24494897427831788),
 (5, 0.08164965809277262),
 (6, 0.0),
 (7, 0.14907119849998599),
 (8, 0.04351941398892446),
 (9, 0.08333333333333336),
 (10, 0.16329931618554525),
 (11, 0.12247448713915893),
 (12, 0.17025130615174974),
 (13, 0.08164965809277262),
 (14, 0.1543033499620919),
 (15, 0.04003203845127179),
 (16, 0.14664711502135333),
 (17, 0.08703882797784893),
 (18, 0.1118033988749895),
 (19, 0.1305582419667734),
 (20, 0.08333333333333336),
 (21, 0.08333333333333336),
 (22, 0.0936585811581694),
 (23, 0.045643546458763846),
 (24, 0.10825317547305485),
 (25, 0.045643546458763846),
 (26, 0.1543033499620919),
 (27, 0.9999999999999997),
 (28, 0.2083333333333334),
 (29, 0.1305582419667734),
 (30, 0.08006407690254358),
 (31, 0.16329931618554525),
 (32, 0.03857583749052298),
 (33, 0.1928791874526149),
 (34, 0.0),
 (35, 0.17817416127494962),
 (36, 0.1666666666666667),
 (37, 0.045643

In [26]:
orderedMovieRecs = sorted(recMovies, key=lambda x:x[1], reverse=True)[1:]

In [27]:
i=0

In [28]:
for row in recMovies:
  title=my_df[my_df.index==row[0]]["title"].values[0]
  print(title)
  i+=1
  if i==5:
    break

Avatar
Pirates of the Caribbean: At World's End
Spectre
The Dark Knight Rises
John Carter
