In [76]:
import pandas as pd
import numpy as np
import re
import nltk
import random
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score

In [77]:
emotion_df = pd.read_csv('/train.txt', sep=';', names=['Text', 'Emotion'])
movies_df = pd.read_csv('/tmdb_5000_movies.csv')

In [78]:
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text

emotion_df['Cleaned_Text'] = emotion_df['Text'].apply(clean_text)

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [79]:
def map_genre_to_emotion(genres):
    genres = genres.lower()
    if 'comedy' in genres or 'animation' in genres:
        return 'joy'
    elif 'horror' in genres or 'thriller' in genres:
        return 'fear'
    elif 'drama' in genres or 'romance' in genres:
        return 'love'
    elif 'action' in genres or 'adventure' in genres:
        return 'surprise'
    elif 'crime' in genres:
        return 'anger'
    else:
        return 'sadness'

movies_df['genres'] = movies_df['genres'].apply(lambda x: re.findall(r'"name": "(.*?)"', x))
movies_df['genres_str'] = movies_df['genres'].apply(lambda x: " ".join(x))
movies_df['assigned_emotion'] = movies_df['genres_str'].apply(map_genre_to_emotion)

In [80]:
X = emotion_df['Cleaned_Text']
y = emotion_df['Emotion']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model_pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('nb', MultinomialNB())
])

model_pipeline.fit(X_train, y_train)

In [81]:
def recommend_movies(user_input):
    cleaned_input = clean_text(user_input)
    predicted_emotion = model_pipeline.predict([cleaned_input])[0]

    recommended_movies = movies_df[movies_df['assigned_emotion'] == predicted_emotion]

    print(f"\nDetected Emotion: {predicted_emotion.upper()}")
    print("-" * 30)

    if not recommended_movies.empty:
        sample_size = min(5, len(recommended_movies))
        top_recom = recommended_movies.sample(n=sample_size)
        for i, row in top_recom.iterrows():
            print(f"ðŸŽ¬ {row['original_title']} | Rating: {row['vote_average']}")
    else:
        print("No movies found.")

user_feeling = input("How are you feeling right now?: ")
recommend_movies(user_feeling)

How are you feeling right now?: i am very hungry

Detected Emotion: JOY
------------------------------
ðŸŽ¬ Everyone Says I Love You | Rating: 6.5
ðŸŽ¬ Puss in Boots | Rating: 6.4
ðŸŽ¬ Small Time Crooks | Rating: 6.4
ðŸŽ¬ Renaissance | Rating: 6.7
ðŸŽ¬ Perrierâ€™s Bounty | Rating: 5.1
