In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
df = pd.read_csv('/content/netflix_titles.csv.zip')
df.head()


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [4]:
# Drop missing descriptions
df = df.dropna(subset=['description'])

# Keep only 'title' and 'description'
df = df[['title', 'description']]

# Reset index for clean output
df = df.reset_index(drop=True)

# Preview
df.head()


Unnamed: 0,title,description
0,Dick Johnson Is Dead,"As her father nears the end of his life, filmm..."
1,Blood & Water,"After crossing paths at a party, a Cape Town t..."
2,Ganglands,To protect his family from a powerful drug lor...
3,Jailbirds New Orleans,"Feuds, flirtations and toilet talk go down amo..."
4,Kota Factory,In a city of coaching centers known to train I...


In [5]:
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['description'])

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)


In [6]:
# Create a mapping of movie/show title to its index
indices = pd.Series(df.index, index=df['title']).drop_duplicates()

# Function to get recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
    # Get the index of the given movie title
    idx = indices[title]

    # Get similarity scores for all titles with that index
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort shows based on similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Top 10 most similar (excluding the movie itself)
    sim_scores = sim_scores[1:11]

    # Get the indexes of those titles
    movie_indices = [i[0] for i in sim_scores]

    # Return their titles
    return df['title'].iloc[movie_indices]


In [7]:
get_recommendations('Kota Factory')


Unnamed: 0,title
2272,Drishyam
266,The Creative Indians
2091,The Bridge Curse
4076,She's Dating the Gangster
805,Racket Boys
2690,Code 8
1727,Girl's Revenge
8232,The Bye Bye Man
8612,Train of the Dead
2362,The Politician


In [8]:
get_recommendations('Narcos')
get_recommendations('The Crown')
get_recommendations('Delhi Crime')
get_recommendations('Breaking Bad')  # If dataset has it


Unnamed: 0,title
1920,The Book of Sun
1798,Alice Junior
4118,Iron Ladies
6616,Dismissed
2388,Alexa & Katie
1237,Mariposa
8303,The Five Venoms
2491,The School of Mischief
1559,The Mess You Leave Behind
1022,Taare Zameen Par


In [9]:
print(get_recommendations('Kota Factory').to_string(index=False))


                 Drishyam
     The Creative Indians
         The Bridge Curse
She's Dating the Gangster
              Racket Boys
                   Code 8
           Girl's Revenge
          The Bye Bye Man
        Train of the Dead
           The Politician


In [10]:
recs = get_recommendations('Drishyam')
for i, title in enumerate(recs, 1):
    print(f"{i}. {title}")


1. Catch.er
2. Red
3. Punjab 1984
4. Ugly
5. The Father Who Moves Mountains
6. El Reemplazante
7. Thottappan
8. Safe
9. Maroon
10. Bigfoot Family


In [11]:
def get_recommendations_with_desc(title, cosine_sim=cosine_sim):
    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:11]
    movie_indices = [i[0] for i in sim_scores]

    return df[['title', 'description']].iloc[movie_indices]

get_recommendations_with_desc('Kota Factory')


Unnamed: 0,title,description
2272,Drishyam,An unexceptional man becomes a murder suspect ...
266,The Creative Indians,From chefs and comedians to filmmakers and fas...
2091,The Bridge Curse,A group of university students decides to test...
4076,She's Dating the Gangster,"To make another woman jealous, a campus heartt..."
805,Racket Boys,A city kid is brought to the countryside by hi...
2690,Code 8,In a city where super-powered people are ostra...
1727,Girl's Revenge,After a sex video subjects her friend to mocke...
8232,The Bye Bye Man,Three college students move into an off-campus...
8612,Train of the Dead,Five teenagers on the lam hide out in an empty...
2362,The Politician,Rich kid Payton has always known he's going to...
