In [7]:
#netflix recommender system

import pandas as pd #data handling
import numpy as np 
from sklearn.feature_extraction.text import TfidfVectorizer #to convert text to numbers
from sklearn.metrics.pairwise import cosine_similarity #to find similarity

#importing libraries

In [8]:
#load dataset

df = pd.read_csv('/kaggle/input/netflix-shows/netflix_titles.csv')
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [9]:
#cleaning data

df = df.dropna(subset=['title', 'description'])  # Drop rows with missing title or description
df = df.drop_duplicates(subset='title')  # Keep only unique titles
df.reset_index(drop=True, inplace=True)


In [10]:
#turning show description to vectors
#removing common words like the, and.

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])


In [11]:
#find similarity

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [12]:
#recommendation function

indices = pd.Series(df.index, index=df['title'].str.lower())

def get_recommendations(title, cosine_sim=cosine_sim):
    title = title.lower()
    if title not in indices:
        return ["Title not found in the database."]
    
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:11]
    show_indices = [i[0] for i in sim_scores]
    
    return df['title'].iloc[show_indices].tolist()


In [13]:
#test

get_recommendations('Sherlock Holmes')


['Spy Kids 3: Game Over',
 'In Family I Trust',
 'Borderliner',
 'Skiptrace',
 'Monster High: Fright On!',
 'Jagga Jasoos',
 'Sprinter',
 'Truth or Dare',
 'Domino',
 'The 101-Year-Old Man Who Skipped Out on the Bill and Disappeared']