In [64]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [65]:
links_small = pd.read_csv('links_small.csv')
links_small = links_small[links_small['tmdbId'].notnull()]['tmdbId'].astype('int')

In [66]:
md = pd.read_csv('movies_metadata.csv', low_memory=False)
md = md.drop([19730, 29503, 35587])

In [67]:
md['id'] = md['id'].astype('int')

In [68]:
smd = md[md['id'].isin(links_small)]
smd.shape

(9099, 24)

# Movie Description based

In [69]:
smd['tagline'] = smd['tagline'].fillna('')
smd['description'] = smd['tagline'] + smd['overview']
smd['description'] = smd['description'].fillna('')

In [70]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(analyzer = 'word', ngram_range = (1,2), min_df = 0, stop_words = 'english')
tfidf_matrix = tf.fit_transform(smd['description'])

In [71]:
tfidf_matrix.shape

(9099, 269262)

In [72]:
from sklearn.metrics.pairwise import linear_kernel
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [73]:
cosine_sim[0]

array([1.        , 0.00680204, 0.        , ..., 0.        , 0.00344826,
       0.        ])

In [74]:
smd = smd.reset_index()
titles = smd['title']
indices = pd.Series(smd.index, index=smd['title'])

In [75]:
def get_recommendations(title):
    idx = indices[title]
    sim_scores  = list(enumerate(cosine_sim[idx]))
    sim_scores  = sorted(sim_scores, key=lambda x: x[1], reverse = True)
    sim_scores  = sim_scores[1:31]
    movie_indices = [i[0] for i in sim_scores]
    return titles.iloc[movie_indices]

In [76]:
get_recommendations('The Godfather').head(10)

973      The Godfather: Part II
8387                 The Family
3509                       Made
4196         Johnny Dangerously
5667                       Fury
29               Shanghai Triad
2412             American Movie
1582    The Godfather: Part III
2159              Summer of Sam
4221                    8 Women
Name: title, dtype: object

In [77]:
get_recommendations('The Dark Knight').head(10)

7931                      The Dark Knight Rises
132                              Batman Forever
1113                             Batman Returns
8227    Batman: The Dark Knight Returns, Part 2
7565                 Batman: Under the Red Hood
524                                      Batman
7901                           Batman: Year One
2579               Batman: Mask of the Phantasm
2696                                        JFK
8165    Batman: The Dark Knight Returns, Part 1
Name: title, dtype: object