In [5]:
import pandas as pd
import seaborn as sns 
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
# Import Song Title Data Set.
movie  = pd.read_csv("movies.csv")
movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [6]:
new_movie = movie

new_movie.head(2)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy


In [7]:
# Null Values
new_movie.isnull().sum()

movieId    0
title      0
genres     0
dtype: int64

In [8]:
# Info
new_movie.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9742 entries, 0 to 9741
Data columns (total 3 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   movieId  9742 non-null   int64 
 1   title    9742 non-null   object
 2   genres   9742 non-null   object
dtypes: int64(1), object(2)
memory usage: 228.5+ KB


In [9]:
# Stats
new_movie.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
movieId,9742.0,42200.353623,52160.494854,1.0,3248.25,7300.0,76232.0,193609.0


In [10]:
# Data Processing
new_movie['genres'] = new_movie['genres'].apply(lambda x: ' '.join(sorted(x.split('|'))))
films = new_movie

In [11]:
# Drop Duplicates
films =  films.drop_duplicates()

In [12]:
films

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure Animation Children Comedy Fantasy
1,2,Jumanji (1995),Adventure Children Fantasy
2,3,Grumpier Old Men (1995),Comedy Romance
3,4,Waiting to Exhale (1995),Comedy Drama Romance
4,5,Father of the Bride Part II (1995),Comedy
...,...,...,...
9737,193581,Black Butler: Book of the Atlantic (2017),Action Animation Comedy Fantasy
9738,193583,No Game No Life: Zero (2017),Animation Comedy Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action Animation


In [13]:
# Import libraries
from sklearn.feature_extraction.text import TfidfVectorizer

tfv = TfidfVectorizer(stop_words = "english")

tfidf = tfv.fit_transform(films["genres"])

In [14]:
from sklearn.metrics.pairwise import linear_kernel
cosine_sim = linear_kernel(tfidf,tfidf)

In [15]:
# recommendations
def get_recommendations(title, cosine_sim=cosine_sim, df=films):
    idx = df[df['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:11]
    movie_indices = [i[0] for i in sim_scores]
    return df['title'].iloc[movie_indices]

# Example
get_recommendations('No Game No Life: Zero (2017)')

4558    Triplets of Belleville, The (Les triplettes de...
4841                                    Cool World (1992)
7199                Mickey's Once Upon a Christmas (1999)
7368                   South Park: Imaginationland (2008)
8725                           Daddy, I'm A Zombie (2012)
8999                                     Anomalisa (2015)
9665                            Porky in Wackyland (1938)
9738                         No Game No Life: Zero (2017)
870                                   Lesson Faust (1994)
5842    Pom Poko (a.k.a. Raccoon War, The) (Heisei tan...
Name: title, dtype: object