In [2]:
pip install pandas numpy nltk scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd

# Load Netflix dataset
df = pd.read_csv("netflix_titles.csv")

# Display first 5 rows
df.head()


Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [5]:
# Drop rows where description is missing
df = df.dropna(subset=['description'])
df = df.reset_index(drop=True)

# Check dataset info
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


In [16]:
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer

# Download VADER lexicon
nltk.download('vader_lexicon')

# Initialize SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

def get_sentiment_vader(text):
    sentiment_score = sia.polarity_scores(text)
    
    if sentiment_score['compound'] >= 0.05:
        return "positive"
    elif sentiment_score['compound'] <= -0.05:
        return "negative"
    else:
        return "neutral"

# Take user input
user_input = input("Enter your text: ")  # Fix: Define user_input
user_mood = get_sentiment_vader(user_input)
print(f"User Mood: {user_mood}")


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Dell\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Enter your text:  Happy


User Mood: positive


In [17]:
# Define mood-to-genre mapping
mood_genre_mapping = {
    "positive": ["Comedy", "Adventure", "Action", "Romance"],
    "neutral": ["Drama", "Sci-Fi", "Fantasy"],
    "negative": ["Horror", "Thriller", "Crime", "Documentary"]
}

# Function to get genres based on mood
def get_genre_by_mood(mood):
    return mood_genre_mapping.get(mood, ["Drama"])  # Default to "Drama" if mood is unknown

# Example: Get genres for detected mood
selected_genres = get_genre_by_mood(user_mood)
print(f"Recommended Genres: {selected_genres}")


Recommended Genres: ['Comedy', 'Adventure', 'Action', 'Romance']


In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Initialize TF-IDF Vectorizer
tfidf = TfidfVectorizer(stop_words="english")

# Convert movie descriptions into TF-IDF vectors
tfidf_matrix = tfidf.fit_transform(df["description"])

# Check shape of the matrix (Rows: Movies, Columns: Unique Words)
tfidf_matrix.shape


(8807, 18895)

In [20]:
from sklearn.metrics.pairwise import cosine_similarity

# Function to recommend movies based on user mood
def recommend_movies(user_input, df):
    # Analyze mood
    user_mood = get_sentiment_vader(user_input)
    selected_genres = get_genre_by_mood(user_mood)
    
    # Filter dataset for selected genres
    filtered_df = df[df["listed_in"].apply(lambda x: any(genre in x for genre in selected_genres))]
    
    # Convert filtered descriptions to TF-IDF
    tfidf_matrix_filtered = tfidf.transform(filtered_df["description"])
    
    # Convert user input to TF-IDF vector
    user_tfidf_vector = tfidf.transform([user_input])
    
    # Compute similarity
    similarity_scores = cosine_similarity(user_tfidf_vector, tfidf_matrix_filtered).flatten()
    
    # Get top 5 recommended movies
    top_indices = similarity_scores.argsort()[-5:][::-1]  # Get top matches
    recommendations = filtered_df.iloc[top_indices]
    
    return recommendations[["title", "listed_in", "description"]]


user_input = "I am happy!"
recommended_movies = recommend_movies(user_input, df)

print(recommended_movies)


                                               title  \
2221                 Jack Whitehall: I'm Only Joking   
6121                             All the Devil's Men   
6808                            From Paris with Love   
8792                                     Young Tiger   
3123  Ronny Chieng: Asian Comedian Destroys America!   

                                     listed_in  \
2221                           Stand-Up Comedy   
6121                        Action & Adventure   
6808  Action & Adventure, International Movies   
8792  Action & Adventure, International Movies   
3123                           Stand-Up Comedy   

                                            description  
2221  Jack Whitehall hits the stage with hilarious t...  
6121  A battle-scarred Special Ops military vet join...  
6808  A low-level intelligence agent gets pulled int...  
8792  Aided only by a tough female police officer, a...  
3123  Ronny Chieng ("The Daily Show," "Crazy Rich As...  


In [12]:
# Show recommended movies
for index, row in recommended_movies.iterrows():
    print(f" {row['title']} ({row['listed_in']})")
    print(f" {row['description']}\n")


 Vir Das: Losing It (Stand-Up Comedy)
 The world's got a lot of problems, but Vir Das has a lot of answers as he discusses travel, religion, his childhood and more in this stand-up special.

 Jo Koy: Live from Seattle (Stand-Up Comedy)
 Between raising a teenage boy and growing up with a Filipino mother, stand-up comic Jo Koy has been through a lot. He's here to tell you all about it.

 Once a Gangster (Action & Adventure, Comedies, International Movies)
 Two former triads are tapped to run in an election for leader of the underworld, but both want normal lives. Each must outwit the other to lose.

 Bumping Mics with Jeff Ross & Dave Attell (Stand-Up Comedy & Talk Shows, TV Comedies)
 When Jeff Ross and Dave Attell take the stage, no one is safe. With the help of special guests, they're packing a lot of laughs into one epic weekend.

 Lynne Koplitz: Hormonal Beast (Stand-Up Comedy)
 Unabashed comedian Lynne Koplitz offers a woman's take on being crazy, the benefits of childlessness and