In [1]:
import pandas as pd
movies_df = pd.read_csv('movies.csv')
movies_df['overview'].head()

0    In the 22nd century, a paraplegic Marine is di...
1    Captain Barbossa, long believed to be dead, ha...
2    A cryptic message from Bond’s past sends him o...
3    Following the death of District Attorney Harve...
4    John Carter is a war-weary, former military ca...
Name: overview, dtype: object

In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english')
movies_df['overview'] = movies_df['overview'].fillna('')
tfidf_matrix = tfidf.fit_transform(movies_df['overview'])
tfidf_matrix.shape

(4803, 20978)

In [3]:
from sklearn.metrics.pairwise import linear_kernel
cos_similarity = linear_kernel(tfidf_matrix, tfidf_matrix)
cos_similarity

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.02160533, 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.01488159, 0.        ,
        0.        ],
       ...,
       [0.        , 0.02160533, 0.01488159, ..., 1.        , 0.01609091,
        0.00701914],
       [0.        , 0.        , 0.        , ..., 0.01609091, 1.        ,
        0.01171696],
       [0.        , 0.        , 0.        , ..., 0.00701914, 0.01171696,
        1.        ]])

In [4]:
indices = pd.Series(movies_df.index, index = movies_df['original_title']).drop_duplicates()
indices

original_title
Avatar                                            0
Pirates of the Caribbean: At World's End          1
Spectre                                           2
The Dark Knight Rises                             3
John Carter                                       4
Spider-Man 3                                      5
Tangled                                           6
Avengers: Age of Ultron                           7
Harry Potter and the Half-Blood Prince            8
Batman v Superman: Dawn of Justice                9
Superman Returns                                 10
Quantum of Solace                                11
Pirates of the Caribbean: Dead Man's Chest       12
The Lone Ranger                                  13
Man of Steel                                     14
The Chronicles of Narnia: Prince Caspian         15
The Avengers                                     16
Pirates of the Caribbean: On Stranger Tides      17
Men in Black 3                                   

In [5]:
def get_recommendations(title, cos_similarity=cos_similarity):
    idx = indices[title]
    sim_scores = list(enumerate(cos_similarity[idx]))
    sim_scores = sorted(sim_scores, key = lambda x:x[1],reverse = True)
    sim_scores = sim_scores[1:11]
    movies_indices = [i[0] for i in sim_scores]
    return movies_df['original_title'].iloc[movies_indices]

In [6]:
get_recommendations('Avatar')

3604                       Apollo 18
2130                    The American
634                       The Matrix
1341                Obitaemyy Ostrov
529                 Tears of the Sun
1610                           Hanna
311     The Adventures of Pluto Nash
847                         Semi-Pro
775                        Supernova
2628             Blood and Chocolate
Name: original_title, dtype: object

In [7]:
get_recommendations('Newlyweds')

3969                         Something Wicked
616                                     Ted 2
2689                       Our Family Wedding
1576                               Bride Wars
2290                             Just Married
504                   The Secret Life of Pets
866                        Bullet to the Head
4576                                Blue Ruin
3025    The Second Best Exotic Marigold Hotel
3155                              Melancholia
Name: original_title, dtype: object

In [8]:
get_recommendations('The Hobbit: The Desolation of Smaug')

98                      The Hobbit: An Unexpected Journey
19              The Hobbit: The Battle of the Five Armies
262     The Lord of the Rings: The Fellowship of the Ring
329         The Lord of the Rings: The Return of the King
160                            How to Train Your Dragon 2
1306                          Dragon Nest: Warriors' Dawn
4612                                              Old Joy
1524                                George and the Dragon
2207                                            12 Rounds
292                                                Eragon
Name: original_title, dtype: object

In [9]:
get_recommendations('The Amazing Spider-Man')

38       The Amazing Spider-Man 2
159                    Spider-Man
5                    Spider-Man 3
1572    Forgetting Sarah Marshall
30                   Spider-Man 2
572                          Hook
143                           Pan
4246                      Trucker
1179              I Love You, Man
641                      Due Date
Name: original_title, dtype: object