In [17]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

In [18]:
# model = SentenceTransformer('all-mpnet-base-v2')
# embeddings = model.encode(df['combined_text'], show_progress_bar = True, convert_to_tensor = False)
# embed_sim = cosine_similarity(embeddings)
# np.save('netflix_embeddings.npy', embeddings)
# print('embedding saved')

In [19]:

df = pd.read_csv('dataset/netflix_cleaned.csv')
print(df.isna().sum())
df['combined_text'] = (
    df['director'].fillna('') + ' ' +
    df['cast'].fillna('') + ' ' +
    df['listed_in'].fillna('') + ' ' +
    df['description'].fillna('') + ' ' +
    df['title'].fillna('')
).str.lower()

show_id         0
type            0
title           0
director        0
cast            0
country         0
date_added      0
release_year    0
rating          0
duration        0
listed_in       0
description     0
year_added      0
month_added     0
day_added       0
dtype: int64


In [20]:
### TF-IDF Recommender

tfidf = TfidfVectorizer(stop_words='english',max_features=10_000)
tfidf_matrix = tfidf.fit_transform(df['combined_text'])
tfidf_sim = cosine_similarity(tfidf_matrix)

In [21]:
### Sentence Transformer
pd.set_option('display.max_colwidth', 1000000)


embeddings = np.load('netflix_embeddings.npy')
print(embeddings.shape)
embed_sim = cosine_similarity(embeddings)
print(embed_sim.shape)

(8800, 768)
(8800, 8800)


In [32]:
def recommend(title, similarity_matrix, n=10):
    title_clean = title.lower().strip()
    matches = df[df['title'].str.lower() == title_clean]

    if matches.empty:
        return f" Title '{title}' not found."

    idx = matches.index[0]

    sim_scores = list(enumerate(similarity_matrix[idx]))
    sim_scores = sorted(sim_scores, key = lambda x: x[1], reverse = True)[1:n+1]

    indices = [i[0] for i in sim_scores]

    display(f"{title}", df[df['title'] == title])
    recs = df.iloc[indices][['title', 'listed_in', 'release_year', 'description']].copy()
    recs['similarity'] = [round(similarity_matrix[idx][i], 3) for i in indices]

    print(f"Recommendations based on {title}")
    return recs.reset_index(drop=True)

title = 'Friends'

print(f'TF-IDF Results for {title}')
recommend(title, tfidf_sim)

# print(f'Embedding Based Model Results for {title}')
# recommend(title, embed_sim)

TF-IDF Results for Friends


'Friends'

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,year_added,month_added,day_added,combined_text
6806,s6807,TV Show,Friends,Unkown,"Jennifer Aniston, Courteney Cox, Lisa Kudrow, Matt LeBlanc, Matthew Perry, David Schwimmer",United States,2019-01-01,2003,TV-14,10 Seasons,"Classic & Cult TV, TV Comedies","This hit sitcom follows the merry misadventures of six 20-something pals as they navigate the pitfalls of work, life and love in 1990s Manhattan.",2019,1,1,"unkown jennifer aniston, courteney cox, lisa kudrow, matt leblanc, matthew perry, david schwimmer classic & cult tv, tv comedies this hit sitcom follows the merry misadventures of six 20-something pals as they navigate the pitfalls of work, life and love in 1990s manhattan. friends"


Recommendations based on Friends


Unnamed: 0,title,listed_in,release_year,description,similarity
0,Episodes,"Classic & Cult TV, TV Comedies",2017,"Hoping to create an American version of their hit British sitcom, two screenwriters run into cultural obstacles, starting with the lead character.",0.237
1,Man with a Plan,TV Comedies,2020,"When his wife Andi returns to work, contractor Adam Burns becomes a stay-at-home dad and discovers that parenting is a tougher job than he realized.",0.165
2,"O-Negative, Love Can’t Be Designed","International TV Shows, Romantic TV Shows, TV Dramas",2016,"Five schoolmates who share a blood type navigate the vagaries of friendship, love and university life.",0.154
3,Dad's Army,"British TV Shows, Classic & Cult TV, TV Comedies",1977,This beloved sitcom follows the unlikely heroes of the volunteer British Home Guard as they prepare for German invasion in World War II.,0.149
4,Studio 54,Documentaries,2018,This documentary follows the rapid rise and fall of the Manhattan discotheque and the glittery debauchery that attracted the city's eccentric and elite.,0.147
5,Why Are You Like This,"International TV Shows, TV Comedies",2021,"Three best friends negotiate work, fun, identity politics, hookups and wild nights out in this razor-sharp satire of 20-something life in Melbourne.",0.137
6,Life Story,"British TV Shows, Docuseries, Science & Nature TV",2014,This documentary series follows wild creatures in their native habitats as they navigate the perilous journey from birth to adulthood and beyond.,0.131
7,Thomas and Friends,"British TV Shows, Classic & Cult TV, Kids' TV",2020,"This animated children's series follows the adventures of Thomas, a cheerful blue tank engine who lives on the island of Sodor.",0.127
8,The IT Crowd,"British TV Shows, Classic & Cult TV, International TV Shows",2013,"Aided by a tech-illiterate manager, a pair of basement-dwelling dweebs makes the most of life in IT at a corporation run by a cloddish new owner.",0.119
9,Still Game,"British TV Shows, Classic & Cult TV, International TV Shows",2018,"Scottish pensioners Jack and Victor make the most of their golden years, getting themselves into all kinds of scrapes in their Glasgow suburb.",0.119
