In [13]:
pip install seaborn

Note: you may need to restart the kernel to use updated packages.


In [14]:
pip install streamlit

Note: you may need to restart the kernel to use updated packages.


In [15]:
# 📦 Step 1: Import Required Libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import warnings
warnings.filterwarnings('ignore')

In [16]:
# 📥 Step 2: Load Dataset

df = pd.read_csv(r'C:\VITB\Personal Projects\Netflix\dataset\netflix_titles.csv')
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [17]:
# 🧹 Step 3: Basic Cleaning
df = df[['title', 'description', 'listed_in', 'type']]  # Keep only useful columns

# Drop rows with missing descriptions
df.dropna(subset=['description'], inplace=True)

# Fill missing genre values (just in case)
df['listed_in'].fillna('', inplace=True)

# Reset index after dropping
df.reset_index(drop=True, inplace=True)

print("Data shape after cleaning:", df.shape)


Data shape after cleaning: (8807, 4)


In [18]:
# 🗓️ Extract year from 'date_added'
original_df = pd.read_csv(r'C:\VITB\Personal Projects\Netflix\dataset\netflix_titles.csv')  # Reload full dataset for date_added
df['date_added'] = original_df['date_added']     # Add 'date_added' back

# Convert to datetime
df['date_added'] = pd.to_datetime(df['date_added'], errors='coerce')

# Extract release year
df['release_year'] = df['date_added'].dt.year

# Handle any missing years
df['release_year'].fillna(0, inplace=True)
df['release_year'] = df['release_year'].astype(int)


In [19]:
# 🔧 Step 4: Feature Engineering
# Combine description and listed_in (genres)

df['content'] = df['description'] + ' ' + df['listed_in']


In [20]:
# 📊 Step 5: TF-IDF Vectorization

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['content'])

print("TF-IDF matrix shape:", tfidf_matrix.shape)


TF-IDF matrix shape: (8807, 18895)


In [21]:
# 🤖 Step 6: Cosine Similarity Matrix

cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [22]:
# 🔍 Step 7: Title Index Mapping
# Map lowercase title to index for lookup

indices = pd.Series(df.index, index=df['title'].str.lower()).drop_duplicates()

In [23]:
# 💡 Step 8: Recommendation Function
def recommend(title, genre=None, year=None, cosine_sim=cosine_sim):
    title = title.lower()
    
    if title not in indices:
        return "❌ Sorry, this title is not in the dataset."
    
    # Get index of the given title
    idx = indices[title]
    
    # Get similarity scores
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get similar items (excluding the input title itself)
    sim_scores = sim_scores[1:]
    
    # Extract indices of similar titles
    movie_indices = [i[0] for i in sim_scores]

    # Filter by genre and/or year if specified
    recommendations = df.iloc[movie_indices].copy()
    
    if genre:
        recommendations = recommendations[recommendations['listed_in'].str.lower().str.contains(genre.lower())]
    
    if year:
        # You’ll need the release year column to do this
        if 'release_year' in df.columns:
            recommendations = recommendations[recommendations['release_year'] == year]
        else:
            return "⚠️ 'release_year' column is missing from dataset."
    
    # Return top 10 titles after filtering
    return recommendations['title'].head(10).tolist()



In [24]:
# ✅ Step 9: Test It Out

print("Recommendations for 'Narcos':")
print(recommend("Narcos"))

print("\nRecommendations for 'Black Mirror':")
print(recommend("Black Mirror"))


Recommendations for 'Narcos':
['Miss Dynamite', 'Narcos: Mexico', 'El Cartel 2', 'El Chapo', 'Ganglands', 'El final del paraíso', 'Cocaine Cowboys: The Kings of Miami', 'Historia de un clan', 'Sin senos no hay paraíso', 'Top Boy']

Recommendations for 'Black Mirror':
['Til Death Do Us Part', 'Creeped Out', 'Leila', 'Osmosis', 'Degrassi: Next Class', 'Rosario Tijeras', 'Behind Her Eyes', 'Travelers', 'Cleverman', 'The Five']


In [25]:
print("🎬 Top Recommendations like 'Narcos' in genre 'Crime':")
print(recommend("Narcos", genre="Crime"))

print("\n🎬 Top Recommendations like 'Stranger Things' released in 2016:")
print(recommend("Stranger Things", year=2016))

print("\n🎬 Top Recommendations like 'Black Mirror' in 'Sci-Fi' released in 2018:")
print(recommend("Black Mirror", genre="Sci-Fi", year=2018))

🎬 Top Recommendations like 'Narcos' in genre 'Crime':
['Miss Dynamite', 'Narcos: Mexico', 'El Cartel 2', 'El Chapo', 'Ganglands', 'El final del paraíso', 'Cocaine Cowboys: The Kings of Miami', 'Historia de un clan', 'Top Boy', 'Cocaine']

🎬 Top Recommendations like 'Stranger Things' released in 2016:
['The Messengers', 'Jonathan Strange & Mr Norrell', 'K', 'Penny Dreadful', 'Kaçak', 'Game Winning Hit', 'Ice Fantasy', 'Sense8', 'Conspiracy', 'Nobel']

🎬 Top Recommendations like 'Black Mirror' in 'Sci-Fi' released in 2018:
['Into the Forest', 'The Titan', 'Mute', 'The Humanity Bureau', 'Blood & Treasures', 'Transfers', 'TAU', 'Neo Yokio', 'The Shannara Chronicles', 'Helix']
