# Netflix Movies Recommender System using Content Based Filtering

In [3]:
import numpy as np
import numpy.ma as ma
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

In [4]:
df=pd.read_csv('n_movies.csv')
df.head()

Unnamed: 0,title,year,certificate,duration,genre,rating,description,stars,votes
0,Cobra Kai,(2018– ),TV-14,30 min,"Action, Comedy, Drama",8.5,Decades after their 1984 All Valley Karate Tou...,"['Ralph Macchio, ', 'William Zabka, ', 'Courtn...",177031
1,The Crown,(2016– ),TV-MA,58 min,"Biography, Drama, History",8.7,Follows the political rivalries and romance of...,"['Claire Foy, ', 'Olivia Colman, ', 'Imelda St...",199885
2,Better Call Saul,(2015–2022),TV-MA,46 min,"Crime, Drama",8.9,The trials and tribulations of criminal lawyer...,"['Bob Odenkirk, ', 'Rhea Seehorn, ', 'Jonathan...",501384
3,Devil in Ohio,(2022),TV-MA,356 min,"Drama, Horror, Mystery",5.9,When a psychiatrist shelters a mysterious cult...,"['Emily Deschanel, ', 'Sam Jaeger, ', 'Gerardo...",9773
4,Cyberpunk: Edgerunners,(2022– ),TV-MA,24 min,"Animation, Action, Adventure",8.6,A Street Kid trying to survive in a technology...,"['Zach Aguilar, ', 'Kenichiro Ohashi, ', 'Emi ...",15413


In [6]:
df.isna().sum()

title             0
year            527
certificate    3453
duration       2036
genre            73
rating         1173
description       0
stars             0
votes          1173
dtype: int64

In [7]:
df.dropna(inplace=True)

In [11]:
df.duplicated().sum()

np.int64(0)

In [12]:
df.head()

Unnamed: 0,title,year,certificate,duration,genre,rating,description,stars,votes
0,Cobra Kai,(2018– ),TV-14,30 min,"Action, Comedy, Drama",8.5,Decades after their 1984 All Valley Karate Tou...,"['Ralph Macchio, ', 'William Zabka, ', 'Courtn...",177031
1,The Crown,(2016– ),TV-MA,58 min,"Biography, Drama, History",8.7,Follows the political rivalries and romance of...,"['Claire Foy, ', 'Olivia Colman, ', 'Imelda St...",199885
2,Better Call Saul,(2015–2022),TV-MA,46 min,"Crime, Drama",8.9,The trials and tribulations of criminal lawyer...,"['Bob Odenkirk, ', 'Rhea Seehorn, ', 'Jonathan...",501384
3,Devil in Ohio,(2022),TV-MA,356 min,"Drama, Horror, Mystery",5.9,When a psychiatrist shelters a mysterious cult...,"['Emily Deschanel, ', 'Sam Jaeger, ', 'Gerardo...",9773
4,Cyberpunk: Edgerunners,(2022– ),TV-MA,24 min,"Animation, Action, Adventure",8.6,A Street Kid trying to survive in a technology...,"['Zach Aguilar, ', 'Kenichiro Ohashi, ', 'Emi ...",15413


In [13]:
data=df[['title', 'description', 'votes', 'rating']]


In [14]:
data.head()

Unnamed: 0,title,description,votes,rating
0,Cobra Kai,Decades after their 1984 All Valley Karate Tou...,177031,8.5
1,The Crown,Follows the political rivalries and romance of...,199885,8.7
2,Better Call Saul,The trials and tribulations of criminal lawyer...,501384,8.9
3,Devil in Ohio,When a psychiatrist shelters a mysterious cult...,9773,5.9
4,Cyberpunk: Edgerunners,A Street Kid trying to survive in a technology...,15413,8.6


In [15]:
data.sort_values(by='rating', ascending=False)

Unnamed: 0,title,description,votes,rating
9444,BoJack Horseman,BoJack reconnects with faces from his past.,16066,9.9
8259,Dexter,"In the Season One finale, Dexter follows the c...",10604,9.6
8161,Avatar: The Last Airbender,The heroes work together to stop Azula's destr...,5221,9.6
8557,JoJo's Bizarre Adventure,In the aftermath of the battle between Risotto...,1442,9.6
8907,Avatar: The Last Airbender,As the Fire Nation continues its assault on th...,3953,9.6
...,...,...,...,...
3612,The Hype House,Get an inside look at social media's least tal...,2955,2.1
3357,Thomas & Friends: All Engines Go,"All aboard, everyone. It's all engines go as T...",604,2.1
6033,Hajwala: The Missing Engine,Khalid and Kehailan are two team leaders who a...,261,2.0
4987,Way of the Vampire,"After defeating Dracula, Van Helsing is grante...",1649,2.0


In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf=TfidfVectorizer(stop_words='english')
data['description']=data['description'].fillna(' ')
tfidf_matrix=tfidf.fit_transform(data['description'])
tfidf_matrix.shape

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['description']=data['description'].fillna(' ')


(5754, 16423)

In [17]:
from sklearn.metrics.pairwise import linear_kernel

cosine_sim=linear_kernel(tfidf_matrix, tfidf_matrix)

In [18]:
indices=pd.Series(data.index, index=data['title']).drop_duplicates()

In [24]:
def get_recommendations(title, cosine_sim=cosine_sim):
    idx = indices[title]
    
    sim_scores = list(enumerate(cosine_sim[idx]))
    
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    sim_scores = sim_scores[1:11]
    
    movie_indices = [i[0] for i in sim_scores]
    
    return data['title'].iloc[movie_indices]

In [35]:
data.head()

Unnamed: 0,title,description,votes,rating
0,Cobra Kai,Decades after their 1984 All Valley Karate Tou...,177031,8.5
1,The Crown,Follows the political rivalries and romance of...,199885,8.7
2,Better Call Saul,The trials and tribulations of criminal lawyer...,501384,8.9
3,Devil in Ohio,When a psychiatrist shelters a mysterious cult...,9773,5.9
4,Cyberpunk: Edgerunners,A Street Kid trying to survive in a technology...,15413,8.6


In [36]:
get_recommendations("Better Call Saul")

211     El Camino: A Breaking Bad Movie
28                        Modern Family
9939                      Modern Family
436      A Series of Unfortunate Events
144                          Doc Martin
4210      Jimmy Carr: His Dark Material
722             I Am Not Okay with This
205                        White Collar
3087                          Irmandade
5793            Springsteen on Broadway
Name: title, dtype: object