### Import dependencies

In [2]:
import pandas as pd
import numpy as np
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

### Data Collection and Pre-Processing

In [3]:
#loading the data from the csv file to pandas dataframe
movies_data=pd.read_csv('netflix_titles.csv')

In [4]:
#printing the first 5 row of the dataframe
movies_data.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [5]:
#number of rows and columns in the dataframe
movies_data.shape

(8807, 12)

In [6]:

movies_data.drop(['show_id'], axis=1,inplace=True)
movies_data['show_id'] = range(0, len(movies_data))
movies_data['show_id'] = movies_data['show_id'].astype(int)


In [7]:
movies_data.head()

Unnamed: 0,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,show_id
0,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm...",0
1,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...",1
2,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,2
3,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...",3
4,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,4


In [8]:
#selecting the relevant features for recommendation

selected_features = ['type','title','director','cast','listed_in','description']
print(selected_features)

['type', 'title', 'director', 'cast', 'listed_in', 'description']


In [9]:
#replacing the null values with null strings

for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [10]:
#combining all the selected features

combined_features = movies_data['type']+' '+movies_data['title']+' '+movies_data['director']+' '+movies_data['cast']+' '+movies_data['listed_in']+' '+movies_data['description']

In [11]:
print(combined_features)

0       Movie Dick Johnson Is Dead Kirsten Johnson  Do...
1       TV Show Blood & Water  Ama Qamata, Khosi Ngema...
2       TV Show Ganglands Julien Leclercq Sami Bouajil...
3       TV Show Jailbirds New Orleans   Docuseries, Re...
4       TV Show Kota Factory  Mayur More, Jitendra Kum...
                              ...                        
8802    Movie Zodiac David Fincher Mark Ruffalo, Jake ...
8803    TV Show Zombie Dumb   Kids' TV, Korean TV Show...
8804    Movie Zombieland Ruben Fleischer Jesse Eisenbe...
8805    Movie Zoom Peter Hewitt Tim Allen, Courteney C...
8806    Movie Zubaan Mozez Singh Vicky Kaushal, Sarah-...
Length: 8807, dtype: object


In [12]:
#converting text data into feature vectors

vectorizer = TfidfVectorizer()

In [13]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [14]:
print(feature_vectors)

  (0, 22104)	0.2556859262996421
  (0, 15544)	0.15094741664682876
  (0, 6584)	0.1579628976262392
  (0, 47378)	0.12351771541799113
  (0, 20362)	0.1247288598319088
  (0, 47832)	0.044536801847971125
  (0, 50928)	0.1718942644051687
  (0, 9957)	0.23596062222387937
  (0, 2301)	0.043946082243071144
  (0, 22446)	0.23596062222387937
  (0, 21958)	0.0515613920271563
  (0, 11672)	0.13505627861763453
  (0, 44874)	0.21184856001311006
  (0, 16263)	0.17396482088700804
  (0, 27337)	0.09352851400781047
  (0, 20802)	0.12827463314809864
  (0, 33973)	0.047619874040973786
  (0, 14650)	0.16370752352904935
  (0, 47349)	0.07789891710835083
  (0, 32794)	0.2315738640888728
  (0, 15869)	0.12279723140676213
  (0, 20460)	0.07838138761190905
  (0, 3250)	0.0860500304624441
  (0, 13057)	0.09426938750072429
  (0, 25371)	0.4197703771718649
  :	:
  (8806, 28782)	0.14360452251227612
  (8806, 24661)	0.14755462727478516
  (8806, 527)	0.09531000287048755
  (8806, 6698)	0.10874727805012618
  (8806, 51189)	0.09451431619617702
 

### Cosine Similarity

In [15]:
#getting similarity score useing cosine similarity

similarity = cosine_similarity(feature_vectors)

In [16]:
print(similarity)

[[1.00000000e+00 8.83517028e-03 1.75928996e-02 ... 9.08296903e-03
  1.28637404e-02 2.67691092e-02]
 [8.83517028e-03 1.00000000e+00 3.19758777e-02 ... 9.70620312e-04
  0.00000000e+00 2.58480494e-03]
 [1.75928996e-02 3.19758777e-02 1.00000000e+00 ... 5.10271618e-03
  9.66815781e-03 2.89455286e-02]
 ...
 [9.08296903e-03 9.70620312e-04 5.10271618e-03 ... 1.00000000e+00
  5.40989505e-02 5.59727737e-03]
 [1.28637404e-02 0.00000000e+00 9.66815781e-03 ... 5.40989505e-02
  1.00000000e+00 9.72197645e-03]
 [2.67691092e-02 2.58480494e-03 2.89455286e-02 ... 5.59727737e-03
  9.72197645e-03 1.00000000e+00]]


In [17]:
similarity.shape

(8807, 8807)

In [18]:
# getting the movie name from the user

movie_name = input('Enter your favourite movie name : ')

Enter your favourite movie name : baki


In [19]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = movies_data['title'].tolist()
print(list_of_all_titles)



In [20]:
# finding the close match for the movies name given by the user

find_close_match = difflib.get_close_matches(movie_name,list_of_all_titles)
print (find_close_match)

['Takki', 'Sakhi', 'Kalki']


In [21]:
close_match = find_close_match[0]
print(close_match)

Takki


In [22]:
# finding the index of the movie with title

index_of_the_movie = movies_data[movies_data.title == close_match]['show_id'].values[0]
print(index_of_the_movie)

2125


In [23]:
#getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.031019236421534847), (1, 0.02221980252508847), (2, 0.04144397735375523), (3, 0.020598790644138564), (4, 0.046276317215720064), (5, 0.028088639993409277), (6, 0.0071649805448052864), (7, 0.008458564087183339), (8, 0.03363584837111221), (9, 0.006115783848922149), (10, 0.0370502492070419), (11, 0.02182636770971823), (12, 0.012334299482623514), (13, 0.0055466915023838536), (14, 0.0382588473441081), (15, 0.02582708432348197), (16, 0.0059806433894067125), (17, 0.037854365892767194), (18, 0.00201397995664909), (19, 0.035274335431914), (20, 0.03830277724115968), (21, 0.029232902794772714), (22, 0.012506338233657502), (23, 0.007439233594395506), (24, 0.005302624963843397), (25, 0.038097179550639744), (26, 0.0037158215617506666), (27, 0.004482428035060989), (28, 0.0069183084178307515), (29, 0.004372968933985868), (30, 0.009961733621848063), (31, 0.016358372730001257), (32, 0.032543432223191095), (33, 0.03756163685580366), (34, 0.031388421921285374), (35, 0.007424580771562667), (36, 0.0036

In [24]:
# sorting the movies based on similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

[(2125, 1.0000000000000002), (5804, 0.1428491119337441), (2877, 0.11813334244229912), (1569, 0.10913815148201586), (8741, 0.10898832083548854), (1038, 0.10846612673498568), (1746, 0.0880980207426632), (4012, 0.08691485410745015), (3245, 0.08183548641649298), (5030, 0.08181105607982428), (366, 0.08122749257470127), (1229, 0.0810492163199209), (4265, 0.07806712061465146), (1623, 0.0773514502873715), (368, 0.07711489056794545), (2441, 0.0742290282196945), (1638, 0.07372863572796752), (2699, 0.07229795643627145), (7593, 0.07109790396405617), (4199, 0.06902057016337196), (8199, 0.06670485186916035), (3968, 0.06648970209726371), (3824, 0.06645736974673935), (4035, 0.0664350175797549), (5700, 0.06417696118063645), (3328, 0.06410999858564208), (8255, 0.06395565052990305), (95, 0.06389515169503265), (538, 0.06387600310988), (1058, 0.06377769066598457), (4257, 0.06303842070124442), (3400, 0.06295573927610093), (2402, 0.0628989310720302), (912, 0.06287720195257676), (6508, 0.06282424570888702), (

In [25]:
# print the name of similar movies based on index

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_form_index = movies_data[movies_data.show_id==index]['title'].values[0]
  if (i<11):
    print(i, '.',title_form_index)
    i+=1

Movies suggested for you : 

1 . Takki
2 . Saudi Arabia Uncovered
3 . Six Windows in the Desert
4 . Masameer Classics
5 . Wild Arabia
6 . Dancing Angels
7 . Can You Hear Me?
8 . My Pride
9 . Singapore Social
10 . Hitler's Circle of Evil


### Movie Recommendation System

In [26]:
movie_name = input('Enter your favourite movie name : ')

list_of_all_titles = movies_data['title'].tolist()

find_close_match = difflib.get_close_matches(movie_name,list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.title == close_match]['show_id'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_form_index = movies_data[movies_data.show_id==index]['title'].values[0]
  if (i<11):
    print(i, '.',title_form_index)
    i+=1

Enter your favourite movie name : Baki
Movies suggested for you : 

1 . Banyuki
2 . Seven Souls in the Skull Castle 2011
3 . Seven Souls in the Skull Castle: Season Wind
4 . Seven Souls in the Skull Castle: Season Moon Jogen
5 . Seven Souls in the Skull Castle: Season Bird
6 . Seven Souls in the Skull Castle: Season Moon Kagen
7 . SWORDGAI The Animation
8 . Seven Souls in the Skull Castle: Season Flower
9 . My Heroes Were Cowboys
10 . High & Low The Red Rain
