# 🎥Netflix TV Show and Movie Recommendation System

### 1. Importing the Libraries

In [65]:
import numpy as np
import pandas as pd
import ast
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import spacy
nlp = spacy.load("en_core_web_sm")
from getpass import getpass
from scipy.sparse import csr_matrix



### 2. Loading the Data 

In [98]:
df = pd.read_csv("titles.csv")

In [99]:
df

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,imdb_id,imdb_score,imdb_votes,tmdb_popularity,tmdb_score
0,ts300399,Five Came Back: The Reference Films,SHOW,This collection includes 12 World War II-era p...,1945,TV-MA,51,['documentation'],['US'],1.0,,,,0.600,
1,tm84618,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,114,"['drama', 'crime']",['US'],,tt0075314,8.2,808582.0,40.965,8.179
2,tm154986,Deliverance,MOVIE,Intent on seeing the Cahulawassee River before...,1972,R,109,"['drama', 'action', 'thriller', 'european']",['US'],,tt0068473,7.7,107673.0,10.010,7.300
3,tm127384,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,"['fantasy', 'action', 'comedy']",['GB'],,tt0071853,8.2,534486.0,15.461,7.811
4,tm120801,The Dirty Dozen,MOVIE,12 American military prisoners in World War II...,1967,,150,"['war', 'action']","['GB', 'US']",,tt0061578,7.7,72662.0,20.398,7.600
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5845,tm1014599,Fine Wine,MOVIE,A beautiful love story that can happen between...,2021,,100,"['romance', 'drama']",['NG'],,tt13857480,6.8,45.0,1.466,
5846,tm898842,C/O Kaadhal,MOVIE,A heart warming film that explores the concept...,2021,,134,['drama'],[],,tt11803618,7.7,348.0,,
5847,tm1059008,Lokillo,MOVIE,A controversial TV host and comedian who has b...,2021,,90,['comedy'],['CO'],,tt14585902,3.8,68.0,26.005,6.300
5848,tm1035612,Dad Stop Embarrassing Me - The Afterparty,MOVIE,"Jamie Foxx, David Alan Grier and more from the...",2021,PG-13,37,[],['US'],,,,,1.296,10.000


### 3. Preprocessing Data

In [100]:
def eda(dataframe):
    print(f"""
    
    -- NaN Values --
    
    {dataframe.isnull().sum()}

    -- Info --
    
    {dataframe.dtypes}
    
    -- Shape --
    
    {dataframe.shape}
    
    -- Unique --
    
    {df.apply(lambda x: x.nunique())}
    
    -- Head --
    """)
    
    
    return dataframe.head()
eda(df)


    
    -- NaN Values --
    
    id                         0
title                      1
type                       0
description               18
release_year               0
age_certification       2619
runtime                    0
genres                     0
production_countries       0
seasons                 3744
imdb_id                  403
imdb_score               482
imdb_votes               498
tmdb_popularity           91
tmdb_score               311
dtype: int64

    -- Info --
    
    id                       object
title                    object
type                     object
description              object
release_year              int64
age_certification        object
runtime                   int64
genres                   object
production_countries     object
seasons                 float64
imdb_id                  object
imdb_score              float64
imdb_votes              float64
tmdb_popularity         float64
tmdb_score              float64
dtype: obje

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,imdb_id,imdb_score,imdb_votes,tmdb_popularity,tmdb_score
0,ts300399,Five Came Back: The Reference Films,SHOW,This collection includes 12 World War II-era p...,1945,TV-MA,51,['documentation'],['US'],1.0,,,,0.6,
1,tm84618,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,114,"['drama', 'crime']",['US'],,tt0075314,8.2,808582.0,40.965,8.179
2,tm154986,Deliverance,MOVIE,Intent on seeing the Cahulawassee River before...,1972,R,109,"['drama', 'action', 'thriller', 'european']",['US'],,tt0068473,7.7,107673.0,10.01,7.3
3,tm127384,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,"['fantasy', 'action', 'comedy']",['GB'],,tt0071853,8.2,534486.0,15.461,7.811
4,tm120801,The Dirty Dozen,MOVIE,12 American military prisoners in World War II...,1967,,150,"['war', 'action']","['GB', 'US']",,tt0061578,7.7,72662.0,20.398,7.6


In [101]:
df['genres'] = df['genres'].apply(ast.literal_eval)
df['production_countries'] = df['production_countries'].apply(ast.literal_eval)
df['genres'] = df['genres'].apply(lambda x: ' '.join(x))
df['production_countries'] = df['production_countries'].apply(lambda x: ' '.join(x))


In [138]:
df

Unnamed: 0,id,title,type,description,release_year,age_certification,runtime,genres,production_countries,seasons,imdb_id,imdb_score,imdb_votes,tmdb_popularity,tmdb_score,content,processed_text,processed_genres
0,ts300399,Five Came Back: The Reference Films,SHOW,This collection includes 12 World War II-era p...,1945,TV-MA,51,documentation,US,1.0,,,,0.600,,Five Came Back: The Reference Films This colle...,five came back reference films collection incl...,documentation
1,tm84618,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,114,drama crime,US,,tt0075314,8.2,808582.0,40.965,8.179,Taxi Driver A mentally unstable Vietnam War ve...,taxi driver mentally unstable vietnam war vete...,drama crime
2,tm154986,Deliverance,MOVIE,Intent on seeing the Cahulawassee River before...,1972,R,109,drama action thriller european,US,,tt0068473,7.7,107673.0,10.010,7.300,Deliverance Intent on seeing the Cahulawassee ...,deliverance intent seeing cahulawassee river t...,drama action thriller european
3,tm127384,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,fantasy action comedy,GB,,tt0071853,8.2,534486.0,15.461,7.811,"Monty Python and the Holy Grail King Arthur, a...",monty python holy grail king arthur accompanie...,fantasy action comedy
4,tm120801,The Dirty Dozen,MOVIE,12 American military prisoners in World War II...,1967,,150,war action,GB US,,tt0061578,7.7,72662.0,20.398,7.600,The Dirty Dozen 12 American military prisoners...,dirty dozen 12 american military prisoners wor...,war action
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5845,tm1014599,Fine Wine,MOVIE,A beautiful love story that can happen between...,2021,,100,romance drama,NG,,tt13857480,6.8,45.0,1.466,,Fine Wine A beautiful love story that can happ...,fine wine beautiful love story happen two peop...,romance drama
5846,tm898842,C/O Kaadhal,MOVIE,A heart warming film that explores the concept...,2021,,134,drama,,,tt11803618,7.7,348.0,,,C/O Kaadhal A heart warming film that explores...,co kaadhal heart warming film explores concept...,drama
5847,tm1059008,Lokillo,MOVIE,A controversial TV host and comedian who has b...,2021,,90,comedy,CO,,tt14585902,3.8,68.0,26.005,6.300,Lokillo A controversial TV host and comedian w...,lokillo controversial tv host comedian built c...,comedy
5848,tm1035612,Dad Stop Embarrassing Me - The Afterparty,MOVIE,"Jamie Foxx, David Alan Grier and more from the...",2021,PG-13,37,,US,,,,,1.296,10.000,Dad Stop Embarrassing Me - The Afterparty Jami...,dad stop embarrassing afterparty jamie foxx da...,


In [103]:
df['content'] = df['title'] + ' '+ df['description'] 


In [104]:
stop_words = set(stopwords.words('english'))

def preprocess(text):
    if pd.isna(text) or not isinstance(text, str):  # Skip null or non-string values
        return ''
    
    text = text.lower()  # Lowercase
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    words = word_tokenize(text)  # Tokenization
    words = [word for word in words if word not in stop_words]  # Remove stopwords
    return ' '.join(words)

In [105]:
df['processed_text'] = df['content'].apply(preprocess)
df['processed_genres'] = df['genres'].apply(preprocess)


### 4. TfIDF and cosine similarity

In [114]:

vectorizer = TfidfVectorizer()
tfidf_genre = vectorizer.fit_transform(df['processed_genres'])
tfidf_processed_text = vectorizer.fit_transform(df['processed_text'])


In [115]:
tfidf_processed_text = csr_matrix(tfidf_processed_text)
tfidf_genre = csr_matrix(tfidf_genre)

In [116]:
max_rows = max(tfidf_processed_text.shape[0], tfidf_genre.shape[0])
max_cols = max(tfidf_processed_text.shape[1], tfidf_genre.shape[1])

In [117]:
tfidf_processed_text.resize((max_rows, max_cols))
tfidf_genre.resize((max_rows, max_cols))

In [118]:
# Assign custom weights
CONTENT_WEIGHT = 1.0
GENRE_WEIGHT = 3.0  # More importance to genre


# Apply weights to the vectors
weighted_tfidf = (
    CONTENT_WEIGHT * tfidf_processed_text +
    GENRE_WEIGHT * tfidf_genre
)


In [111]:
def extract_keywords(text):
    doc = nlp(text)
    keywords = [token.lemma_ for token in doc if token.pos_ in ['NOUN', 'ADJ']]
    return ' '.join(keywords)

In [136]:
user_query= "I like movies set in space"
processed_query = extract_keywords(user_query)  # "thrilling action movie space comedic twist"
query_vector = vectorizer.transform([processed_query])
cosine_sim = cosine_similarity(query_vector, weighted_tfidf)
top_indices = np.argsort(cosine_sim[0])[::-1][:10]  # Get top 10 recommendations
recommendations = df.iloc[top_indices][['title', 'description', 'genres']]

### 5. Results

In [137]:
print("\n🔍 Top 10 Recommendations:\n")
for i, row in recommendations.iterrows():
    print(f"{i+1}. 🎬 {row['title']}")
    print(f"      📖 {row['description']}")
    print(f"      🎭 Genres: {row['genres']}")
    print("-" * 50)  # Separator for readability


🔍 Top 10 Recommendations:

5750. 🎬 A StoryBots Space Adventure
      📖 Join the StoryBots and the space travelers of the historic Inspiration4 mission as they search for answers to kids' questions about space.
      🎭 Genres: animation
--------------------------------------------------
5614. 🎬 The Wonderful: Stories from the Space Station
      📖 In unusual circumstances, scientists from different countries work together to achieve a common scientific goal. Locked in their spinning space lab, they are isolated from the world — family and friends - and can only watch from the outside as life on Earth continues without them. The space station is a monument not only to the weaknesses of humanity, but also to its ability to do the impossible for the sake of life in space.
      🎭 Genres: documentation
--------------------------------------------------
4587. 🎬 Fukrey Boyzzz: Space Mein Fukrapanti
      📖 Delhi Boys Going To Space
      🎭 Genres: animation comedy family
--------------------