# Netflix search engine

In [7]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score
from sklearn.preprocessing import MinMaxScaler, MultiLabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer

# Load the dataset
netflix_data = pd.read_csv(r"D:\PYTON PROGRAMMING\PYTHON FILES\Scikit-Learn\PROJECT\DATA\netflix_titles.csv")

# Data Preprocessing
netflix_data['type_num'] = netflix_data['type'].map({'TV Show': 0, 'Movie': 1})

scaler = MinMaxScaler()
netflix_data['release_year_scaled'] = scaler.fit_transform(netflix_data[['release_year']])

def extract_duration(val):
    try:
        return int(val.split(' ')[0])
    except:
        return None
netflix_data['duration_num'] = netflix_data['duration'].apply(extract_duration)
netflix_data['duration_num_scaled'] = scaler.fit_transform(netflix_data[['duration_num']])

netflix_data['genre_list'] = netflix_data['listed_in'].apply(lambda x: x.split(', '))
mlb = MultiLabelBinarizer()
genre_netflix_data = pd.DataFrame(mlb.fit_transform(netflix_data['genre_list']), columns=mlb.classes_, index=netflix_data.index) 
netflix_data = pd.concat([netflix_data, genre_netflix_data], axis=1) 


netflix_data['cast'] = netflix_data['cast'].fillna('')  # fill missing
vectorizer = TfidfVectorizer(max_features=50)
cast_matrix = vectorizer.fit_transform(netflix_data['cast'])

features = np.hstack([
    netflix_data[['type_num', 'release_year_scaled', 'duration_num_scaled']].fillna(0).values,
    genre_netflix_data.values,
    cast_matrix.toarray()
])

# Model Training
model_knn = NearestNeighbors(n_neighbors=6, metric='cosine')
model_knn.fit(features)

# Testing the model
title = "Dark"
filtered_df = netflix_data[netflix_data['title'] == title]

if not filtered_df.empty:
    target_index = filtered_df.index[0]
    # Get feature vector for this title
    distances, indices = model_knn.kneighbors([features[target_index]])
    print("Recommendations for:", title)
    for i in indices[0][1:]:
        print(netflix_data.iloc[i]['title'])
else:
    print(f"❌ Title '{title}' not found in the dataset.")




Recommendations for: Dark
Khotey Sikkey
Deadwind
Futmalls.com
She
Sleepless Society: Two Pillows & A Lost Soul
