In [38]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/netflix-shows/netflix_titles.csv


### Objective


* Group titles based on genre, rating, and duration.

* Build a content based recommendation system using text similarity.

* Help users find similar shows or movies using data driven methods.

## Import libraries

In [39]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import hstack
import numpy as np

In [42]:
import ipywidgets as widgets
from IPython.display import display


## Load Data

In [33]:

df = pd.read_csv("/kaggle/input/netflix-shows/netflix_titles.csv")

## Cleaning

In [24]:
df.isnull().sum()

show_id                 0
type                    0
title                   0
director             2634
cast                  825
country               831
date_added             10
release_year            0
rating                  4
duration                3
listed_in               0
description             0
combined_features       0
dtype: int64

In [25]:

df['cast'] = df['cast'].fillna('')
df['director'] = df['director'].fillna('')


## TFI-DF 

In [26]:
tfidf_desc = TfidfVectorizer(stop_words='english', max_features=5000)
tfidf_desc_matrix = tfidf_desc.fit_transform(df['description'])

tfidf_cast = TfidfVectorizer(stop_words='english', max_features=2000)
tfidf_cast_matrix = tfidf_cast.fit_transform(df['cast'])

tfidf_dir = TfidfVectorizer(stop_words='english', max_features=1000)
tfidf_dir_matrix = tfidf_dir.fit_transform(df['director'])


## One-hot encode genres

In [27]:
mlb = MultiLabelBinarizer()
genre_matrix = mlb.fit_transform(df['listed_in'])


## Numeric features

Convert duration to number of minutes.

In [28]:
def convert_duration(x):
    if "min" in x:
        return int(x.split()[0])
    return 0

df['duration_num'] = df['duration'].fillna("0 min").apply(convert_duration)

scaler = MinMaxScaler()
duration_scaled = scaler.fit_transform(df[['duration_num']])


## Combine all feature matrices

In [29]:
final_matrix = hstack([
    tfidf_desc_matrix,
    tfidf_cast_matrix,
    tfidf_dir_matrix,
    genre_matrix,
    duration_scaled
])


## Compute cosine similarity

In [30]:
similarity_matrix = cosine_similarity(final_matrix)


## Recommendation function

In [31]:
def recommend(title, df, matrix, top_n=10):
    if title not in df['title'].values:
        return ["Title not found"]
    
    idx = df[df['title'] == title].index[0]
    scores = list(enumerate(matrix[idx]))
    scores = sorted(scores, key=lambda x: x[1], reverse=True)
    
    recs = []
    for i in scores[1: top_n+1]:
        recs.append(df.iloc[i[0]]['title'])
    return recs


> Check - working

In [40]:
recommend("Stranger Things", df, similarity_matrix)


['Nightflyers',
 'Chilling Adventures of Sabrina',
 'Helix',
 'The Umbrella Academy',
 'The Diabolical',
 'Sweetheart',
 'The Vampire Diaries',
 'Await Further Instructions',
 'The OA',
 'Manifest']

--------------

In [36]:
search_box = widgets.Text(
    value='',
    placeholder='Type a show name...',
    description='Search',
    disabled=False
)

output = widgets.Output()

def on_enter_change(change):
    with output:
        output.clear_output()
        title = change['new']
        recs = recommend(title, df, similarity_matrix, top_n=10)
        print(f"Recommendations for: {title}")
        for r in recs:
            print("â€¢", r)

search_box.observe(on_enter_change, names='value')

display(search_box, output)


Text(value='', description='Search', placeholder='Type a show name...')

Output()

 -----

**Conclusion**


The recommendation system returned relevant similar titles using description text.

The project shows how machine learning can enhance content discovery without relying on user ratings.