In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import missingno as msno
%matplotlib inline

In [3]:
netflix=pd.read_csv(r"C:\Users\DELL\Desktop\Datasets\netflix_titles.csv")
netflix.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,"August 14, 2020",2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,"December 23, 2016",2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,"December 20, 2018",2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow..."
3,s4,Movie,9,Shane Acker,"Elijah Wood, John C. Reilly, Jennifer Connelly...",United States,"November 16, 2017",2009,PG-13,80 min,"Action & Adventure, Independent Movies, Sci-Fi...","In a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,Robert Luketic,"Jim Sturgess, Kevin Spacey, Kate Bosworth, Aar...",United States,"January 1, 2020",2008,PG-13,123 min,Dramas,A brilliant group of students become card-coun...


## Movie Recommendation System

Content based movie recommendations can be given by considering the following factors given in the dataframe:

1. Movie description 
2. Category in which the movie has been listed 

In [37]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english')

In [46]:
#A function called combined_features is created which is a column that is a merge of the columns description,director,cast and listed_in.

def combined_features(row):
    return str(row['description'])+' '+str(row['listed_in'])


netflix['combined_features']=netflix.apply(combined_features,axis=1)  #Adding the combined_features column in the original dataframe.

netflix['combined_features'].fillna('')   #Null values are replaced with the an empty string

netflix['combined_features'][0]  #Displaying the first value of the dataframe

'In a future where the elite inhabit an island paradise far from the crowded slums, you get one chance to join the 3% saved from squalor. International TV Shows, TV Dramas, TV Sci-Fi & Fantasy'

In [47]:
netflix.head(3) #The column combined_features gets added in the original dataframe.

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,Movie_release_type,Month,combined_features
0,s1,TV Show,3%,,"João Miguel, Bianca Comparato, Michel Gomes, R...",Brazil,2020-08-14,2020,TV-MA,4 Seasons,"International TV Shows, TV Dramas, TV Sci-Fi &...",In a future where the elite inhabit an island ...,New,8,In a future where the elite inhabit an island ...
1,s2,Movie,7:19,Jorge Michel Grau,"Demián Bichir, Héctor Bonilla, Oscar Serrano, ...",Mexico,2016-12-23,2016,TV-MA,93 min,"Dramas, International Movies",After a devastating earthquake hits Mexico Cit...,New,12,After a devastating earthquake hits Mexico Cit...
2,s3,Movie,23:59,Gilbert Chan,"Tedd Chan, Stella Chung, Henley Hii, Lawrence ...",Singapore,2018-12-20,2011,R,78 min,"Horror Movies, International Movies","When an army recruit is found dead, his fellow...",Early 21st century,12,"When an army recruit is found dead, his fellow..."


In [48]:
tfidf_matrix = tfidf.fit_transform(netflix['combined_features'])  

tfidf_matrix.shape

(7787, 17905)

In [49]:
from sklearn.metrics.pairwise import linear_kernel
cosine_similarity = linear_kernel(tfidf_matrix, tfidf_matrix)

In [50]:
indices = pd.Series(netflix.index, index=netflix['title']).drop_duplicates()  #Dropping duplicate values

In [51]:
def get_recommendations(title, cosine_sim=cosine_similarity):
    index = indices[title]

    # Getting the similarity scores of all movies with that movie
    sim_scores = list(enumerate(cosine_similarity[index]))

    # Sorting the movies based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Getting the scores of the 10 most similar movies
    sim_scores = sim_scores[1:11]

    # Getting the movie indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar movies
    return netflix['title'].iloc[movie_indices]

In [52]:
get_recommendations('Sherlock Holmes')

5775                                Spy Kids 3: Game Over
5651                                            Skiptrace
2963                                    In Family I Trust
3110                                         Jagga Jasoos
960                                            Black Rose
4636                                           One 2 Ka 4
1049                                          Borderliner
6432                                The Happytime Murders
5368                                               Samson
6048    The 101-Year-Old Man Who Skipped Out on the Bi...
Name: title, dtype: object

In [53]:
get_recommendations('Queen')

7746            دفعة القاهرة
6676          The Mirror Boy
619                 Atypical
1263       Chalay Thay Saath
641           Away From Home
4400      Nappily Ever After
7117             To the Bone
3649              Lion Pride
346            Ahista Ahista
2969    In Search of Fellini
Name: title, dtype: object

In [57]:
get_recommendations('Django Unchained' )

6162                     The Bounty Hunter
5303                     Running for Grace
6434                     The Hateful Eight
5671                             Slow West
4928             Power Rangers Dino Charge
2399                           Ghost Rider
2991    Indiana Jones and the Last Crusade
6194                         The Cakemaker
2403                        Ghost Whispers
393                    Alice in Borderland
Name: title, dtype: object