In [1]:
import pandas as pd
import numpy as np
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from textblob import TextBlob

In [2]:
# Load datasets
movies_df = pd.read_csv("tmdb_5000_movies.csv")
credits_df = pd.read_csv("tmdb_5000_credits.csv")

In [3]:
# Merge datasets on movie ID
movies_df = movies_df.merge(credits_df, left_on='id', right_on='movie_id')

In [4]:
# Select relevant features
def parse_features(data):
    return ' '.join([i['name'] for i in ast.literal_eval(data)]) if isinstance(data, str) else ''

In [5]:
movies_df['genres'] = movies_df['genres'].apply(parse_features)
movies_df['keywords'] = movies_df['keywords'].apply(parse_features)
movies_df['cast'] = movies_df['cast'].apply(lambda x: ' '.join([i['name'] for i in ast.literal_eval(x)[:3]]) if isinstance(x, str) else '')
movies_df['crew'] = movies_df['crew'].apply(lambda x: ' '.join([i['name'] for i in ast.literal_eval(x) if i['job'] == 'Director']) if isinstance(x, str) else '')

In [6]:
# Combine features
def combine_features(row):
    return row['genres'] + ' ' + row['keywords'] + ' ' + row['cast'] + ' ' + row['crew']

movies_df['features'] = movies_df.apply(combine_features, axis=1)

In [7]:
# TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(movies_df['features'])


In [8]:
# Compute similarity scores
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [9]:
# Sentiment Analysis function
def get_sentiment(user_input):
    sentiment = TextBlob(user_input).sentiment.polarity
    if sentiment > 0:
        return "positive"
    elif sentiment < 0:
        return "negative"
    else:
        return "neutral"

In [10]:
# Recommendation function with sentiment analysis
def recommend_movies(movie_title, user_mood, num_recommendations=5):
    if movie_title not in movies_df['title_x'].values:
        return "Movie not found in dataset."
    
    idx = movies_df[movies_df['title_x'] == movie_title].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    top_indices = [i[0] for i in sim_scores[1:num_recommendations+1]]
    
    recommended_movies = movies_df['title_x'].iloc[top_indices].tolist()
    
    if user_mood == "positive":
        return recommended_movies[:3]
    elif user_mood == "negative":
        return recommended_movies[-3:]
    else:
        return recommended_movies

In [11]:
# Example Usage
movie_name = "The Dark Knight"
user_review = "I am feeling great today!"
user_mood = get_sentiment(user_review)
recommended_movies = recommend_movies(movie_name, user_mood)
print(f"Movies similar to '{movie_name}' based on mood ({user_mood}):", recommended_movies)

Movies similar to 'The Dark Knight' based on mood (positive): ['Batman Begins', 'The Dark Knight Rises', 'Batman Returns']
