<a href="https://colab.research.google.com/github/Manya123-max/Netflix_Chatbot/blob/main/Netflix_CHAT_BOT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install transformers nltk



In [None]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import OneHotEncoder
import numpy as np

In [None]:
# Download NLTK data (run this once)
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:
# Load dataset
def load_dataset():
    """
    Load the Netflix dataset containing titles, genres, release years, and descriptions.
    """
    # Replace 'netflix_dataset.csv' with your actual dataset file path
    df = pd.read_csv('/content/netflix_titles.csv', encoding='latin-1')
    return df

In [None]:
# Preprocessing steps
def preprocess_data(df):
    """
    Preprocess the dataset:
    1. Lowercase text.
    2. Tokenize.
    3. Remove stopwords.
    4. Perform stemming.
    """
    ps = PorterStemmer()
    stopwords = nltk.corpus.stopwords.words('english')

    def preprocess_text(text):
        # Convert to lowercase
        text = text.lower()
        # Tokenize
        tokens = word_tokenize(text)
        # Remove stopwords and apply stemming
        tokens = [ps.stem(word) for word in tokens if word.isalnum() and word not in stopwords]
        return " ".join(tokens)

    df['Processed_Description'] = df['description'].apply(preprocess_text)
    return df

In [None]:
# Create Bag of Words (BOW)
def create_bow(df):
    """
    Create a Bag of Words (BOW) representation for the processed descriptions.
    """
    vectorizer = CountVectorizer()
    bow_matrix = vectorizer.fit_transform(df['Processed_Description'])
    return vectorizer, bow_matrix

In [None]:
# Perform one-hot encoding for genres
def one_hot_encode_genres(df):
    """
    Perform one-hot encoding on the 'Genre' column.
    """
    encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore') # handle_unknown='ignore' to avoid errors if a new genre appears in user input.
    # Changed 'Genre' to 'listed_in' as that is the actual column name for genres
    genre_matrix = encoder.fit_transform(df[['listed_in']])
    genre_labels = encoder.categories_[0]
    return encoder, genre_matrix, genre_labels

In [None]:
# Find best match using cosine similarity
def find_match(user_input, df, vectorizer, bow_matrix):
    """
    Find the best matching title or description using cosine similarity.
    """
    ps = PorterStemmer()
    stopwords = nltk.corpus.stopwords.words('english')

    # Preprocess user input
    tokens = word_tokenize(user_input.lower())
    processed_input = " ".join([ps.stem(word) for word in tokens if word.isalnum() and word not in stopwords])

    # Transform user input into BOW vector
    user_vector = vectorizer.transform([processed_input])

    # Calculate cosine similarity
    from sklearn.metrics.pairwise import cosine_similarity
    similarity_scores = cosine_similarity(user_vector, bow_matrix)
    best_match_index = np.argmax(similarity_scores)
    best_score = similarity_scores[0, best_match_index]

    if best_score > 0.1:  # Adjust threshold as needed
        return df.iloc[best_match_index]
    else:
        return None

In [None]:
# Chatbot logic
def netflix_chatbot():
    """
    Main function for Netflix chatbot interaction.
    """
    # Load and preprocess dataset
    df = load_dataset()
    df = preprocess_data(df)
    vectorizer, bow_matrix = create_bow(df)
    encoder, genre_matrix, genre_labels = one_hot_encode_genres(df)

    print("Netflix Chatbot: Hi! I can help you find movies or TV shows. Ask me anything!")
    print("Type 'exit' to end the conversation.\n")

    while True:
        user_input = input("You: ").strip()  # Remove any surrounding whitespace

        # Check for exit command
        if user_input.lower() == 'exit':
            print("Netflix Chatbot: Goodbye! Enjoy your streaming!")
            break

        # Find best match for user input
        match = find_match(user_input, df, vectorizer, bow_matrix)

        if match is not None:
            response = (
                f"I found something for you!\n"
                f"Title: {match['title']}\n"
                f"Genre: {match['listed_in']}\n"
                f"Release Year: {match['release_year']}\n"
                f"Description: {match['description']}"
            )
        else:
            response = "I'm sorry, I couldn't find anything matching your query. Please try again with a different question."

        print(f"Netflix Chatbot: {response}\n")

if __name__ == "__main__":
    netflix_chatbot()

Netflix Chatbot: Hi! I can help you find movies or TV shows. Ask me anything!
Type 'exit' to end the conversation.

You: goa
Netflix Chatbot: I found something for you!
Title: Honeymoon Travels Pvt. Ltd.
Genre: Comedies, Dramas, International Movies
Release Year: 2007
Description: This offbeat comedy-drama follows six quirky newlywed couples as they set off on a bus from Mumbai to Goa on their honeymoons.

You: amaran
Netflix Chatbot: I'm sorry, I couldn't find anything matching your query. Please try again with a different question.

You: horror movie 
Netflix Chatbot: I found something for you!
Title: Scream 3
Genre: Horror Movies
Release Year: 2000
Description: This installment of the tongue-in-cheek horror franchise finds Sidney Prescott once again battling a crazed killer â this time, on a movie set.

You: comdy movie 
Netflix Chatbot: I found something for you!
Title: A Scandall
Genre: International Movies, Thrillers
Release Year: 2016
Description: A film school graduate is int