In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity, cosine_distances

### Discovering datset1

In [2]:
dataset = pd.read_csv(r"Dataset/tmdb_5000_credits.csv")
dataset.sample(5)

Unnamed: 0,movie_id,title,cast,crew
2498,12410,Good,"[{""cast_id"": 2, ""character"": ""Halder"", ""credit...","[{""credit_id"": ""52fe44dd9251416c75043a39"", ""de..."
1965,1788,Footloose,"[{""cast_id"": 1, ""character"": ""Ren McCormack"", ...","[{""credit_id"": ""52fe4315c3a36847f8039007"", ""de..."
3389,31535,Chairman of the Board,"[{""cast_id"": 1, ""character"": ""Edison"", ""credit...","[{""credit_id"": ""52fe44859251416c91013979"", ""de..."
2278,581,Dances with Wolves,"[{""cast_id"": 12, ""character"": ""Lieutenant Dunb...","[{""credit_id"": ""52fe4255c3a36847f801630d"", ""de..."
414,9637,Scooby-Doo,"[{""cast_id"": 13, ""character"": ""Fred Jones"", ""c...","[{""credit_id"": ""52fe4514c3a36847f80bb51f"", ""de..."


##### Null vals

In [12]:
dataset.isna().sum()

movie_id    0
title       0
cast        0
crew        0
dtype: int64

##### Duplicates

### Discovering datset2

In [6]:
dataset2 = pd.read_csv(r"Dataset/tmdb_5000_movies.csv")
dataset2.sample(5)

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
1189,40000000,"[{""id"": 35, ""name"": ""Comedy""}]",http://www.gethimtothegreek.net/,32823,"[{""id"": 179430, ""name"": ""aftercreditsstinger""}]",en,Get Him to the Greek,Pinnacle records has the perfect plan to get t...,17.869443,"[{""name"": ""Universal Pictures"", ""id"": 33}, {""n...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2010-06-04,90029656,109.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,Aaron Green has 72 hours to get a Rock Star fr...,Get Him to the Greek,5.9,612
4644,0,"[{""id"": 27, ""name"": ""Horror""}]",,325123,[],en,Teeth and Blood,A beautiful diva is murdered on the set of hor...,0.055325,[],[],2015-03-10,0,96.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,,Teeth and Blood,3.0,1
4094,2000000,"[{""id"": 18, ""name"": ""Drama""}, {""id"": 10402, ""n...",,43839,"[{""id"": 1691, ""name"": ""dance""}, {""id"": 167411,...",en,Alexander's Ragtime Band,"Roger Grant, a classical violinist, disappoint...",0.483974,"[{""name"": ""Twentieth Century Fox Film Corporat...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",1938-05-24,4000000,106.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,An American Cavalcade!,Alexander's Ragtime Band,4.8,6
3253,8000000,"[{""id"": 18, ""name"": ""Drama""}, {""id"": 35, ""name...",,14429,"[{""id"": 6270, ""name"": ""high school""}, {""id"": 1...",en,Drive Me Crazy,Nicole and Chase live next door to each other ...,4.482714,"[{""name"": ""Twentieth Century Fox Film Corporat...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",1999-10-01,22593409,91.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The last guy she wants is the only one she needs.,Drive Me Crazy,5.8,62
1788,26000000,"[{""id"": 27, ""name"": ""Horror""}, {""id"": 53, ""nam...",,11460,"[{""id"": 1562, ""name"": ""hostage""}, {""id"": 2250,...",en,Red Eye,After attending the funeral of her grandmother...,8.802626,"[{""name"": ""DreamWorks SKG"", ""id"": 27}, {""name""...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2005-08-04,57891803,85.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Fear takes flight.,Red Eye,6.2,455


In [7]:
dataset2.shape

(4803, 20)

##### Handling Null vals

In [8]:
# Percentages of empty vals
((dataset2.isna().sum())/dataset2.shape[0])*100

budget                   0.000000
genres                   0.000000
homepage                64.355611
id                       0.000000
keywords                 0.000000
original_language        0.000000
original_title           0.000000
overview                 0.062461
popularity               0.000000
production_companies     0.000000
production_countries     0.000000
release_date             0.020820
revenue                  0.000000
runtime                  0.041641
spoken_languages         0.000000
status                   0.000000
tagline                 17.572351
title                    0.000000
vote_average             0.000000
vote_count               0.000000
dtype: float64

In [265]:
# Dropping homepage and tagline as most of the things here are empty
dataset2.drop(columns = ["homepage","tagline"], inplace= True)

##### Handling duplicates

In [266]:
# Finding the unique elements
dataset.nunique()

movie_id    4803
title       4800
cast        4761
crew        4776
dtype: int64

In [13]:
# To find the total number of rows which are duplicates
dataset2.duplicated().sum()

0

In [14]:
duplicates = dataset2[dataset2.title.duplicated()].title

In [15]:
duplicates

2877           The Host
3693    Out of the Blue
4267             Batman
Name: title, dtype: object

In [16]:
dataset2[dataset2.title.isin(duplicates)]

Unnamed: 0,budget,genres,homepage,id,keywords,original_language,original_title,overview,popularity,production_companies,production_countries,release_date,revenue,runtime,spoken_languages,status,tagline,title,vote_average,vote_count
972,44000000,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",,72710,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",en,The Host,A parasitic alien soul is injected into the bo...,42.933027,"[{""name"": ""Nick Wechsler Productions"", ""id"": 8...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",2013-03-22,63327201,125.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,You will be one of us,The Host,6.0,1817
1359,35000000,"[{""id"": 14, ""name"": ""Fantasy""}, {""id"": 28, ""na...",,268,"[{""id"": 848, ""name"": ""double life""}, {""id"": 84...",en,Batman,The Dark Knight of Gotham City begins his war ...,44.104469,"[{""name"": ""PolyGram Filmed Entertainment"", ""id...","[{""iso_3166_1"": ""GB"", ""name"": ""United Kingdom""...",1989-06-23,411348924,126.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}, {""iso...",Released,Have you ever danced with the devil in the pal...,Batman,7.0,2096
2877,11000000,"[{""id"": 27, ""name"": ""Horror""}, {""id"": 18, ""nam...",http://www.hostmovie.com/,1255,"[{""id"": 1261, ""name"": ""river""}, {""id"": 1880, ""...",ko,괴물,Gang-du is a dim-witted man working at his fat...,27.65527,"[{""name"": ""Cineclick Asia"", ""id"": 685}, {""name...","[{""iso_3166_1"": ""KR"", ""name"": ""South Korea""}]",2006-07-27,88489643,119.0,"[{""iso_639_1"": ""ko"", ""name"": ""\ud55c\uad6d\uc5...",Released,Monsters are real.,The Host,6.7,537
3647,0,"[{""id"": 18, ""name"": ""Drama""}]",,39269,"[{""id"": 4470, ""name"": ""punk""}, {""id"": 10183, ""...",en,Out of the Blue,Dennis Hopper is a hard-drinking truck driver ...,0.679351,"[{""name"": ""Robson Street"", ""id"": 71953}]","[{""iso_3166_1"": ""CA"", ""name"": ""Canada""}]",1980-05-01,0,94.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,A harrowing drama from the director of Easy Rider,Out of the Blue,6.5,17
3693,0,"[{""id"": 18, ""name"": ""Drama""}]",,10844,"[{""id"": 2658, ""name"": ""new zealand""}, {""id"": 3...",en,Out of the Blue,Ordinary people find extraordinary courage in ...,0.706355,[],"[{""iso_3166_1"": ""NZ"", ""name"": ""New Zealand""}]",2006-10-12,0,103.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,The true story of a small town massacre,Out of the Blue,5.9,18
4267,1377800,"[{""id"": 10751, ""name"": ""Family""}, {""id"": 12, ""...",,2661,"[{""id"": 339, ""name"": ""submarine""}, {""id"": 849,...",en,Batman,The Dynamic Duo faces four super-villains who ...,9.815394,"[{""name"": ""Twentieth Century Fox Film Corporat...","[{""iso_3166_1"": ""US"", ""name"": ""United States o...",1966-07-30,0,105.0,"[{""iso_639_1"": ""en"", ""name"": ""English""}]",Released,He's Here Big As Life In A Real Bat-Epic,Batman,6.1,203


### Merging the datasets

In [17]:
dataset2.rename(columns={'id': 'movie_id'},inplace = True)

The best way to merge the dataset is based on the id but the name of the id column differ in the 2 tables

In [18]:
# The best way to merge the dataset is based on the id but the name of the same columns differ in the 2 tables
Mergeddataset = pd.merge(dataset,dataset2,on = "movie_id")

In [20]:
Mergeddataset.shape

(4803, 23)

In [21]:
Mergeddataset.columns

Index(['movie_id', 'title_x', 'cast', 'crew', 'budget', 'genres', 'homepage',
       'keywords', 'original_language', 'original_title', 'overview',
       'popularity', 'production_companies', 'production_countries',
       'release_date', 'revenue', 'runtime', 'spoken_languages', 'status',
       'tagline', 'title_y', 'vote_average', 'vote_count'],
      dtype='object')

In [22]:
# as both the dataset contains title hence its showing title_x and title_y
Mergeddataset.drop(columns = ["title_y"],inplace=True)
Mergeddataset.rename(columns = {"title_x":"title"},inplace=True)
Mergeddataset.columns

Index(['movie_id', 'title', 'cast', 'crew', 'budget', 'genres', 'homepage',
       'keywords', 'original_language', 'original_title', 'overview',
       'popularity', 'production_companies', 'production_countries',
       'release_date', 'revenue', 'runtime', 'spoken_languages', 'status',
       'tagline', 'vote_average', 'vote_count'],
      dtype='object')

##### Removing columns which are not necessary

#### we didn't included numerical column as can disturb the work flow
relesedate,revenue,popularity,vote average

#### Taking the following columns
genres,id,keywords,title,overview,cast,crew(directoe)

In [24]:
Mergeddataset = Mergeddataset[["genres","movie_id","keywords","title","overview","cast","crew"]]

In [25]:
Mergeddataset.head()

Unnamed: 0,genres,movie_id,keywords,title,overview,cast,crew
0,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",19995,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...",Avatar,"In the 22nd century, a paraplegic Marine is di...","[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""credit_id"": ""52fe48009251416c750aca23"", ""de..."
1,"[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...",285,"[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...",Pirates of the Caribbean: At World's End,"Captain Barbossa, long believed to be dead, ha...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de..."
2,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",206647,"[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...",Spectre,A cryptic message from Bond’s past sends him o...,"[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de..."
3,"[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...",49026,"[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...",The Dark Knight Rises,Following the death of District Attorney Harve...,"[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de..."
4,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...",49529,"[{""id"": 818, ""name"": ""based on novel""}, {""id"":...",John Carter,"John Carter is a war-weary, former military ca...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


In [26]:
Mergeddataset.isnull().sum()

genres      0
movie_id    0
keywords    0
title       0
overview    3
cast        0
crew        0
dtype: int64

In [28]:
# Dropping the rows which don't have overview
Mergeddataset.dropna(inplace = True)

In [29]:
import ast
ast.literal_eval(Mergeddataset.iloc[0].genres)

[{'id': 28, 'name': 'Action'},
 {'id': 12, 'name': 'Adventure'},
 {'id': 14, 'name': 'Fantasy'},
 {'id': 878, 'name': 'Science Fiction'}]

In [30]:
def convert(obj):
    L=[]
    for i in ast.literal_eval(obj):
        L.append(i["name"])
    return L 

In [31]:
Mergeddataset["genres"] = Mergeddataset.genres.apply(convert)

In [32]:
Mergeddataset["keywords"] = Mergeddataset.keywords.apply(convert)

In [33]:
def convertCast(obj):
    L=[]
    counter =0
    for i in ast.literal_eval(obj):
        if counter!=3:
            L.append(i["name"])
            counter+=1
    return L 

In [34]:
# Taking into considerations the top 3 actors only
Mergeddataset["cast"] = Mergeddataset.cast.apply(convertCast)

In [35]:
# Want to add only director name from the list of dictionary
ast.literal_eval(Mergeddataset.iloc[0].crew)

def convertCrew(obj):
    L=[]
    listval = ast.literal_eval(obj)
    for i in listval:
        if i["job"] == "Director":
            L.append(i["name"])
    return L 

convertCrew(Mergeddataset.iloc[0].crew)

['James Cameron']

In [36]:
Mergeddataset["crew"] = Mergeddataset.crew.apply(convertCrew)

In [37]:
# Splitting Overview and taking only the important words into consideration
Mergeddataset["overview"] = Mergeddataset.overview.apply(lambda x: x.split())

In [38]:
# Removing spaces as to avoid permutation like [Saksham , Verma ,Shubham ] --> can result
# Saksham Verma , Shubham Verma

def combineSpaceSep(obj):
    for i in obj:
        i.replace(" ","")
    return obj
        
Mergeddataset[["genres","keywords","overview","cast","crew"]] = Mergeddataset[["genres","keywords","overview","cast","crew"]].applymap(lambda x:[i.replace(" ","") for i in x])

In [39]:
Mergeddataset.head()

Unnamed: 0,genres,movie_id,keywords,title,overview,cast,crew
0,"[Action, Adventure, Fantasy, ScienceFiction]",19995,"[cultureclash, future, spacewar, spacecolony, ...",Avatar,"[In, the, 22nd, century,, a, paraplegic, Marin...","[SamWorthington, ZoeSaldana, SigourneyWeaver]",[JamesCameron]
1,"[Adventure, Fantasy, Action]",285,"[ocean, drugabuse, exoticisland, eastindiatrad...",Pirates of the Caribbean: At World's End,"[Captain, Barbossa,, long, believed, to, be, d...","[JohnnyDepp, OrlandoBloom, KeiraKnightley]",[GoreVerbinski]
2,"[Action, Adventure, Crime]",206647,"[spy, basedonnovel, secretagent, sequel, mi6, ...",Spectre,"[A, cryptic, message, from, Bond’s, past, send...","[DanielCraig, ChristophWaltz, LéaSeydoux]",[SamMendes]
3,"[Action, Crime, Drama, Thriller]",49026,"[dccomics, crimefighter, terrorist, secretiden...",The Dark Knight Rises,"[Following, the, death, of, District, Attorney...","[ChristianBale, MichaelCaine, GaryOldman]",[ChristopherNolan]
4,"[Action, Adventure, ScienceFiction]",49529,"[basedonnovel, mars, medallion, spacetravel, p...",John Carter,"[John, Carter, is, a, war-weary,, former, mili...","[TaylorKitsch, LynnCollins, SamanthaMorton]",[AndrewStanton]


In [40]:
# Combining all the columns
Mergeddataset["tag"] = Mergeddataset["genres"]+Mergeddataset["keywords"]+Mergeddataset["overview"]+Mergeddataset["cast"]+ Mergeddataset["crew"]

In [41]:
Finaldata = Mergeddataset[["movie_id","title","tag"]]
Finaldata.sample(5)

Unnamed: 0,movie_id,title,tag
54,105864,The Good Dinosaur,"[Adventure, Animation, Family, tyrannosaurusre..."
4619,118452,"First Love, Last Rites","[Drama, sex, misfit, boring, Joey, and, Sissel..."
161,205584,Gods of Egypt,"[Fantasy, egypt, underworld, fight, mythology,..."
3914,821,Judgment at Nuremberg,"[Drama, History, judge, concentrationcamp, wor..."
3187,12621,Hamlet 2,"[Comedy, Music, musicteacher, musical, theatre..."


In [42]:
# Joining all the variable with space seperation
Finaldata.tag = Finaldata.tag.apply(lambda x : " ".join(x))
Finaldata.sample(5)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


Unnamed: 0,movie_id,title,tag
2335,160588,Blue Jasmine,Comedy Drama sanfrancisco sistersisterrelation...
373,2067,Mission to Mars,ScienceFiction mars spacecraft spacetravel ali...
1047,10731,The Client,Drama Thriller Crime Mystery suicide brotherbr...
837,175574,Free Birds,Animation Comedy Family holiday thanksgiving f...
4598,14278,Murderball,Documentary paralympics wheelchair sport rugby...


### Vectorization + Stop word removal + Rootwords/Stemming

##### Stop words : 
* Are the words like is , am , are , was , were etc.

##### Rootwords/Stemming :
* find the root word for it like loves ,loving--> lov

In [61]:
# 5000 was achieved via hyperparameter tuning and stopwords are removed
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000, stop_words='english')

# it will give a sparse matrix
vector = cv.fit_transform(Finaldata['tag']).toarray()

In [62]:
# Here each row has 
vector

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [63]:
cv.get_feature_names()

['000',
 '007',
 '10',
 '100',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '17th',
 '18',
 '18th',
 '18thcenturi',
 '19',
 '1910',
 '1920',
 '1930',
 '1940',
 '1944',
 '1950',
 '1950s',
 '1960',
 '1960s',
 '1970',
 '1970s',
 '1971',
 '1974',
 '1976',
 '1980',
 '1985',
 '1990',
 '1999',
 '19th',
 '19thcenturi',
 '20',
 '200',
 '2003',
 '2009',
 '20th',
 '21st',
 '23',
 '24',
 '25',
 '30',
 '300',
 '3d',
 '40',
 '50',
 '500',
 '60',
 '70',
 '80',
 'aaron',
 'aaroneckhart',
 'abandon',
 'abduct',
 'abigailbreslin',
 'abil',
 'abl',
 'aboard',
 'abov',
 'abu',
 'academ',
 'academi',
 'accept',
 'access',
 'accid',
 'accident',
 'acclaim',
 'accompani',
 'accomplish',
 'account',
 'accu',
 'ace',
 'achiev',
 'acquaint',
 'act',
 'action',
 'actionhero',
 'activ',
 'activist',
 'activities',
 'actor',
 'actress',
 'actual',
 'ad',
 'adam',
 'adamsandl',
 'adamshankman',
 'adapt',
 'add',
 'addict',
 'adjust',
 'admir',
 'admit',
 'adolesc',
 'adopt',
 'ador',
 'adrienbrodi',
 'adult',


In [64]:
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

# This method helps in applying ps.stem(word) to each element of the sentence
def stemming(text):
    y= []
    
    for i in text.split():
        y.append(ps.stem(i))
    
    return " ".join(y)

In [65]:
Finaldata.tag = Finaldata.tag.apply(stemming)

In [66]:
Finaldata.tag

0       action adventur fantasi sciencefict culturecla...
1       adventur fantasi action ocean drugabu exoticis...
2       action adventur crime spi basedonnovel secreta...
3       action crime drama thriller dccomic crimefight...
4       action adventur sciencefict basedonnovel mar m...
                              ...                        
4798    action crime thriller unitedstates–mexicobarri...
4799    comedi romanc A newlyw couple' honeymoon is up...
4800    comedi drama romanc tvmovi date loveatfirstsig...
4801    when ambiti new york attorney sam is sent to s...
4802    documentari obsess camcord crush dreamgirl eve...
Name: tag, Length: 4800, dtype: object

In [67]:
cv.get_feature_names()

['000',
 '007',
 '10',
 '100',
 '11',
 '12',
 '13',
 '14',
 '15',
 '16',
 '17',
 '17th',
 '18',
 '18th',
 '18thcenturi',
 '19',
 '1910',
 '1920',
 '1930',
 '1940',
 '1944',
 '1950',
 '1950s',
 '1960',
 '1960s',
 '1970',
 '1970s',
 '1971',
 '1974',
 '1976',
 '1980',
 '1985',
 '1990',
 '1999',
 '19th',
 '19thcenturi',
 '20',
 '200',
 '2003',
 '2009',
 '20th',
 '21st',
 '23',
 '24',
 '25',
 '30',
 '300',
 '3d',
 '40',
 '50',
 '500',
 '60',
 '70',
 '80',
 'aaron',
 'aaroneckhart',
 'abandon',
 'abduct',
 'abigailbreslin',
 'abil',
 'abl',
 'aboard',
 'abov',
 'abu',
 'academ',
 'academi',
 'accept',
 'access',
 'accid',
 'accident',
 'acclaim',
 'accompani',
 'accomplish',
 'account',
 'accu',
 'ace',
 'achiev',
 'acquaint',
 'act',
 'action',
 'actionhero',
 'activ',
 'activist',
 'activities',
 'actor',
 'actress',
 'actual',
 'ad',
 'adam',
 'adamsandl',
 'adamshankman',
 'adapt',
 'add',
 'addict',
 'adjust',
 'admir',
 'admit',
 'adolesc',
 'adopt',
 'ador',
 'adrienbrodi',
 'adult',


In [70]:
# In higher dimension eucleadian distance is not a very good option
similarity = cosine_similarity(vector)

In [71]:
similarity

array([[1.        , 0.08346223, 0.0860309 , ..., 0.04499213, 0.        ,
        0.        ],
       [0.08346223, 1.        , 0.06063391, ..., 0.02378257, 0.        ,
        0.02615329],
       [0.0860309 , 0.06063391, 1.        , ..., 0.02451452, 0.        ,
        0.        ],
       ...,
       [0.04499213, 0.02378257, 0.02451452, ..., 1.        , 0.03962144,
        0.04229549],
       [0.        , 0.        , 0.        , ..., 0.03962144, 1.        ,
        0.08714204],
       [0.        , 0.02615329, 0.        , ..., 0.04229549, 0.08714204,
        1.        ]])

In [75]:
Finaldata[Finaldata['title'] == 'The Lego Movie'].index[0]

744

In [78]:
# key = lambda x: x[1] --> as we have to deal with the first element and we don't care of the 0th one
def recommend(movie):
    index = Finaldata[Finaldata['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:6]:
        print(Finaldata.iloc[i[0]].title)

In [79]:
recommend('Gandhi')

Gandhi, My Father
Guiana 1838
The Wind That Shakes the Barley
Mr. Turner
A Passage to India
