In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

#'movies' dataframe
movies = pd.read_csv("C:\\Users\\Aisling\\Documents\\Python Exercises\\walt_disney_animated_movies_1990_present.csv")

In [2]:
movies['overview'][0]

'The R.A.S. agents, Miss Bianca and Bernard, race to Australia to save a little boy and a rare golden eagle from a murderous poacher.'

In [3]:
##Recommend movies based on plot##

tfidf = TfidfVectorizer(stop_words='english')

movies['overview'] = movies['overview'].fillna('')

overview_matrix = tfidf.fit_transform(movies['overview'])

overview_matrix.shape

(78, 856)

In [4]:
similarity_matrix = linear_kernel(overview_matrix, overview_matrix)
similarity_matrix

array([[1.        , 0.        , 0.        , ..., 0.01603651, 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.04522255, 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.01603651, 0.04522255, 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [5]:
#movies index mapping

mapping = pd.Series(movies.index, index = movies['title'])
mapping

title
The Rescuers Down Under      0
Beauty and the Beast         1
Aladdin                      2
The Lion King                3
A Goofy Movie                4
                            ..
Onward                      73
Soul                        74
Raya and the Last Dragon    75
NaN                         76
NaN                         77
Length: 78, dtype: int64

In [6]:
def recommend_movies_basedon_plot(movie_input):
    
    movie_index = mapping[movie_input]
    
    #get similarity values with other movies
    similarity_score = list(enumerate(similarity_matrix[movie_index]))
    
    #sort in descending order the similarity score of the movie inputted with the other movies
    similarity_score = sorted(similarity_score, key=lambda x: x[1], reverse=True)
    
    #get the scores of the 15 most similar movies
    similarity_score = similarity_score[1:15]
    
    #return movie names using the mapping series
    movie_indices = [i[0] for i in similarity_score]
    return (movies['title'].iloc[movie_indices])

In [7]:
recommend_movies_basedon_plot('Raya and the Last Dragon')

19               Monsters, Inc.
57          Monsters University
22                Spirited Away
48                      Tangled
1          Beauty and the Beast
6                     Toy Story
47          Tales from Earthsea
30              The Incredibles
65                 Finding Dory
74                         Soul
63            The Good Dinosaur
68                         Coco
69                Incredibles 2
70    Ralph Breaks the Internet
Name: title, dtype: object

In [8]:
recommend_movies_basedon_plot('Bolt')

65                 Finding Dory
4                 A Goofy Movie
30              The Incredibles
50                       Cars 2
40                       WALL-E
72                    Frozen II
25           Piglet's Big Movie
70    Ralph Breaks the Internet
17         Recess: School's Out
29            Home on the Range
9                         Mulan
0       The Rescuers Down Under
10                 A Bug's Life
69                Incredibles 2
Name: title, dtype: object

In [9]:
recommend_movies_basedon_plot('Wreck-It Ralph')

70    Ralph Breaks the Internet
58                       Planes
73                       Onward
3                 The Lion King
8                      Hercules
45    The Princess and the Frog
67                       Cars 3
31       Pooh's Heffalump Movie
1          Beauty and the Beast
26                 Finding Nemo
25           Piglet's Big Movie
54                        Brave
61                   Big Hero 6
33                      Valiant
Name: title, dtype: object

In [10]:
recommend_movies_basedon_plot('Tangled')

12                      Tarzan
30             The Incredibles
27                Brother Bear
65                Finding Dory
75    Raya and the Last Dragon
72                   Frozen II
63           The Good Dinosaur
67                      Cars 3
58                      Planes
62                  Inside Out
52                    Arrietty
20        Return to Never Land
22               Spirited Away
54                       Brave
Name: title, dtype: object

In [11]:
recommend_movies_basedon_plot('Zootopia')

29              Home on the Range
62                     Inside Out
19                 Monsters, Inc.
0         The Rescuers Down Under
1            Beauty and the Beast
2                         Aladdin
3                   The Lion King
4                   A Goofy Movie
5                      Pocahontas
6                       Toy Story
7     The Hunchback of Notre Dame
8                        Hercules
9                           Mulan
10                   A Bug's Life
Name: title, dtype: object

In [12]:
##Recommend movies based on director##

tfidf = TfidfVectorizer(stop_words='english')

movies['Directed by'] = movies['Directed by'].fillna('')

dir_matrix = tfidf.fit_transform(movies['Directed by'])

dir_matrix.shape

(78, 118)

In [13]:
similarity_matrix_dir = linear_kernel(dir_matrix, dir_matrix)
similarity_matrix_dir

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [14]:
def recommend_movies_same_director(movie_input):
    
    #use mapping from earlier in code
    movie_index = mapping[movie_input]
    
    #get movies with the same director
    similarity_score_dir = list(enumerate(similarity_matrix_dir[movie_index]))
    
    #sort in descending order the similarity score of the movie inputted with the other movies
    similarity_score_dir = sorted(similarity_score_dir, key=lambda x: x[1], reverse=True)
    
    #get the scores of the 5 movies with same director
    similarity_score_dir = similarity_score_dir[1:5]
    
    #return movie names using the mapping series
    movie_indices = [i[0] for i in similarity_score_dir]
    return (movies['title'].iloc[movie_indices])

In [15]:
recommend_movies_same_director('Tangled')

41                       Bolt
64                   Zootopia
0     The Rescuers Down Under
1        Beauty and the Beast
Name: title, dtype: object

In [16]:
recommend_movies_same_director('Toy Story')

10    A Bug's Life
13     Toy Story 2
36            Cars
50          Cars 2
Name: title, dtype: object

In [17]:
recommend_movies_same_director('Ponyo')

32       Howl's Moving Castle
43                      Ponyo
47        Tales from Earthsea
0     The Rescuers Down Under
Name: title, dtype: object

In [18]:
recommend_movies_same_director('Monsters, Inc.')

42                         Up
62                 Inside Out
74                       Soul
0     The Rescuers Down Under
Name: title, dtype: object