In [2]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
## loading the data
movie_data=pd.read_csv('netfilx_datasets.csv')

In [5]:
movie_data

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...
...,...,...,...,...,...,...,...,...,...,...,...,...
8802,s8803,Movie,Zodiac,David Fincher,"Mark Ruffalo, Jake Gyllenhaal, Robert Downey J...",United States,"November 20, 2019",2007,R,158 min,"Cult Movies, Dramas, Thrillers","A political cartoonist, a crime reporter and a..."
8803,s8804,TV Show,Zombie Dumb,,,,"July 1, 2019",2018,TV-Y7,2 Seasons,"Kids' TV, Korean TV Shows, TV Comedies","While living alone in a spooky town, a young g..."
8804,s8805,Movie,Zombieland,Ruben Fleischer,"Jesse Eisenberg, Woody Harrelson, Emma Stone, ...",United States,"November 1, 2019",2009,R,88 min,"Comedies, Horror Movies",Looking to survive in a world taken over by zo...
8805,s8806,Movie,Zoom,Peter Hewitt,"Tim Allen, Courteney Cox, Chevy Chase, Kate Ma...",United States,"January 11, 2020",2006,PG,88 min,"Children & Family Movies, Comedies","Dragged from civilian life, a former superhero..."


In [6]:
## number of rows and columns in the data frame
movie_data.shape

(8807, 12)

In [7]:
movie_data.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [8]:
## select the relevant features for recommendation
selected_features=['type','title', 'director', 'cast','rating','description']
print(selected_features)

['type', 'title', 'director', 'cast', 'rating', 'description']


In [9]:
## replace null values with null string
for feature in selected_features:
  movie_data[feature]=movie_data[feature].fillna('')

In [10]:
## combining all the selected features
combined_features=movie_data['type']+''+movie_data['title']+''+movie_data['director']+''+movie_data['cast']+''+movie_data['rating']+''+movie_data['description']

In [11]:
print(combined_features)

0       MovieDick Johnson Is DeadKirsten JohnsonPG-13A...
1       TV ShowBlood & WaterAma Qamata, Khosi Ngema, G...
2       TV ShowGanglandsJulien LeclercqSami Bouajila, ...
3       TV ShowJailbirds New OrleansTV-MAFeuds, flirta...
4       TV ShowKota FactoryMayur More, Jitendra Kumar,...
                              ...                        
8802    MovieZodiacDavid FincherMark Ruffalo, Jake Gyl...
8803    TV ShowZombie DumbTV-Y7While living alone in a...
8804    MovieZombielandRuben FleischerJesse Eisenberg,...
8805    MovieZoomPeter HewittTim Allen, Courteney Cox,...
8806    MovieZubaanMozez SinghVicky Kaushal, Sarah-Jan...
Length: 8807, dtype: object


In [12]:
## converting the text data to feature vectors
vectorizer=TfidfVectorizer()

In [13]:
feature_vectors=vectorizer.fit_transform(combined_features)

In [14]:
print(feature_vectors)

<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 348219 stored elements and shape (8807, 72100)>
  Coords	Values
  (0, 41364)	0.27640862223414514
  (0, 29557)	0.32937188979611814
  (0, 28348)	0.09102204735822172
  (0, 14972)	0.27640862223414514
  (0, 29574)	0.2644734947218758
  (0, 46)	0.22622826034777613
  (0, 25786)	0.08117197891748024
  (0, 19929)	0.1273878654442927
  (0, 45082)	0.23953273458648947
  (0, 64622)	0.0856494898999822
  (0, 18497)	0.17175465419639294
  (0, 46466)	0.049263056281837275
  (0, 26208)	0.13275354734365372
  (0, 34071)	0.09819228704766944
  (0, 20384)	0.18165893194583943
  (0, 31698)	0.2213109153974224
  (0, 61678)	0.21912949934054576
  (0, 15006)	0.14100000886217096
  (0, 27695)	0.05699968535675494
  (0, 28212)	0.24407025947593208
  (0, 3465)	0.04546220759044402
  (0, 12837)	0.24407025947593208
  (0, 68912)	0.17832302884865828
  (0, 65228)	0.04633920333080741
  (0, 25664)	0.12921317207313707
  :	:
  (8806, 1428)	0.09717690671208404
  (8806, 30809)

In [15]:
## getting the similarity score using cosine similarity
similarity= cosine_similarity(feature_vectors)

In [16]:
print(similarity)

[[1.         0.00923361 0.01727876 ... 0.00868384 0.01282541 0.02728346]
 [0.00923361 1.         0.00157484 ... 0.00102281 0.         0.        ]
 [0.01727876 0.00157484 1.         ... 0.00411564 0.00594397 0.03111679]
 ...
 [0.00868384 0.00102281 0.00411564 ... 1.         0.05115852 0.00311158]
 [0.01282541 0.         0.00594397 ... 0.05115852 1.         0.00273854]
 [0.02728346 0.         0.03111679 ... 0.00311158 0.00273854 1.        ]]


In [17]:
print(similarity.shape)

(8807, 8807)


In [18]:
## getting the movie name from the user
movie_name=input("Enter your favourite movie name : ")

Enter your favourite movie name : blood and water


In [19]:
## creating a movie list with all the movie name given in the dataset
list_of_all_titles=movie_data['title'].tolist()
print(list_of_all_titles)



In [20]:
## finding the close match for the movie name given by the user
find_close_match=difflib.get_close_matches(movie_name,list_of_all_titles)
print(find_close_match)

['Blood & Water', 'Blood Father', 'Good on Paper']


In [21]:
close_match=find_close_match[0]
print(close_match)

Blood & Water


In [26]:
## finding the index of movie with title
index_of_the_movie=movie_data[movie_data.title==close_match].index[0]
print(index_of_the_movie)

1


In [27]:
## getting a list of similar movies
similarity_score=list(enumerate(similarity[index_of_the_movie]))## enumerate is used to run a loop over a list
print(similarity_score)

[(0, np.float64(0.009233611211584999)), (1, np.float64(1.0)), (2, np.float64(0.0015748357183453766)), (3, np.float64(0.01388800259925696)), (4, np.float64(0.002660279300497351)), (5, np.float64(0.009109401630428324)), (6, np.float64(0.01388373187379412)), (7, np.float64(0.003537079537081358)), (8, np.float64(0.001927085714962281)), (9, np.float64(0.008168387343405543)), (10, np.float64(0.0033782952422368217)), (11, np.float64(0.0016037506501811434)), (12, np.float64(0.015928802100363296)), (13, np.float64(0.009001382422662702)), (14, np.float64(0.0023319764886128268)), (15, np.float64(0.018708366981099414)), (16, np.float64(0.01201021755304195)), (17, np.float64(0.00562240834542503)), (18, np.float64(0.012423434649572272)), (19, np.float64(0.007610711063634698)), (20, np.float64(0.0020680534755802937)), (21, np.float64(0.002178346696649292)), (22, np.float64(0.003866489468193109)), (23, np.float64(0.020742312802864524)), (24, np.float64(0.0)), (25, np.float64(0.007164784661745988)), (2

In [28]:
len(similarity_score)

8807

In [29]:
## sorting the movies based on their similarity score
sorted_similar_movies=sorted(similarity_score,key=lambda x:x[1],reverse=True) ## x=similarity_score, x[1]=similarity_score_value
print(sorted_similar_movies)

[(1, np.float64(1.0)), (1514, np.float64(0.09122532808000564)), (1593, np.float64(0.06562111746884386)), (5943, np.float64(0.05723756019457152)), (4271, np.float64(0.05461383143076959)), (1884, np.float64(0.05218495204289411)), (4258, np.float64(0.052090710759035694)), (5854, np.float64(0.051400757676430125)), (4692, np.float64(0.0502074700614505)), (6405, np.float64(0.048606458066621755)), (4176, np.float64(0.04827430916925888)), (7489, np.float64(0.04791548137893476)), (6238, np.float64(0.04786146969495543)), (4475, np.float64(0.04741086702538868)), (4202, np.float64(0.04668147212700582)), (7262, np.float64(0.04627082275761468)), (4123, np.float64(0.04619411804053482)), (5344, np.float64(0.04608070622235162)), (4487, np.float64(0.04604268003553155)), (4373, np.float64(0.0460419242379595)), (3220, np.float64(0.04504643103553803)), (4285, np.float64(0.044978619276932655)), (6277, np.float64(0.04486852278260492)), (715, np.float64(0.044280999014774135)), (1905, np.float64(0.043104656829

In [30]:
## print the name of similar movies based on the index
print("movies suggested for you : \n")

i=1
for movie in sorted_similar_movies:
  index=movie[0]
  title_from_index=movie_data[movie_data.index==index]['title'].values[0]
  if(i<20):
    print(i,'.',title_from_index)
    i+=1

movies suggested for you : 

1 . Blood & Water
2 . Diamond City
3 . Kings of Jo'Burg
4 . Adam: His Song Continues
5 . Lion Pride
6 . Walk Away from Love
7 . The Birth Reborn 3
8 . Frank and Cindy
9 . Mom
10 . Calico Critters: A Town of Dreams
11 . FYRE: The Greatest Party That Never Happened
12 . Monster High 13 Wishes
13 . Barbie Star Light Adventure
14 . Shirkers
15 . Horrid Henry
16 . Lady in the Water
17 . ReMastered: The Two Killings of Sam Cooke
18 . Message from the King
19 . Accidentally in Love


**Movie Recommendation System**

In [31]:
movie_name=input("Enter your favourite movie name : ")

list_of_all_titles=movie_data['title'].tolist()

find_close_match=difflib.get_close_matches(movie_name,list_of_all_titles)

close_match=find_close_match[0]

index_of_the_movie=movie_data[movie_data.title==close_match].index[0]

similarity_score=list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies=sorted(similarity_score,key=lambda x:x[1],reverse=True)

print("movies suggested for you : \n")

i=1
for movie in sorted_similar_movies:
  index=movie[0]
  title_from_index=movie_data[movie_data.index==index]['title'].values[0]
  if(i<20):
    print(i,'.',title_from_index)
    i+=1

Enter your favourite movie name : kota factory
movies suggested for you : 

1 . Kota Factory
2 . Girls Hostel
3 . Betaal
4 . Chaman Bahaar
5 . Yeh Meri Family
6 . Sat Sri Akal
7 . The Creative Indians
8 . Code 8
9 . Once Upon a Time in Mumbai Dobaara!
10 . The Great Gambler
11 . Chef's Table
12 . Guru Aur Bhole
13 . Mahi NRI
14 . Engineering Girls
15 . English Babu Desi Mem
16 . Saheb Biwi Aur Gangster Returns
17 . Paharganj
18 . Paan Singh Tomar
19 . Jagat


this is the top 20 similar movies as kota factory