In [4]:
# Install required libraries if not already
!pip install scikit-learn pandas

# Imports
import pandas as pd
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
movies_df = pd.read_csv("movies_metadata (5).csv", low_memory=False)
reviews_df = pd.read_csv("top_1000_movie_reviews (1).csv")

# Clean genres from JSON to string list
def extract_genres(genre_str):
    try:
        genres = ast.literal_eval(genre_str)
        return " ".join([g['name'] for g in genres])
    except:
        return ""

movies_df['clean_genres'] = movies_df['genres'].apply(extract_genres)

# Combine overview and genres to a single text field
movies_df['description'] = movies_df['overview'].fillna('') + " " + movies_df['clean_genres']

# Define mood keywords manually
mood_keywords = {
    "Relaxed": "calm soothing peaceful mellow gentle serene light-hearted",
    "Excited": "thrilling action-packed intense wild energetic fast-paced explosive",
    "Focused": "serious intellectual thoughtful deep cerebral complex intricate",
    "Adventurous": "adventure journey explore daring bold risky expedition",
    "Curious": "mystery investigation secrets unknown twist puzzle reveal",
    "Happy": "funny joyful cheerful heartwarming delightful amusing uplifting",
    "Nostalgic": "classic retro vintage memory emotional sentimental past",
    "Inspired": "motivational uplifting brave triumph perseverance heroic courageous"
}

# Create TF-IDF vectors for movie descriptions
tfidf = TfidfVectorizer(stop_words='english')
movie_tfidf_matrix = tfidf.fit_transform(movies_df['description'])

# Create mood vectors
mood_vectors = {mood: tfidf.transform([keywords]) for mood, keywords in mood_keywords.items()}

# Main function to recommend movies
def recommend_movies(mood, top_n=10):
    if mood not in mood_vectors:
        return pd.DataFrame(columns=['title', 'overview', 'clean_genres'])

    mood_vector = mood_vectors[mood]
    cosine_sim = cosine_similarity(mood_vector, movie_tfidf_matrix).flatten()
    top_indices = cosine_sim.argsort()[-top_n:][::-1]

    return movies_df.iloc[top_indices][['title', 'overview', 'clean_genres']]

# Example usage
recommend_movies("Excited", top_n=5)




Unnamed: 0,title,overview,clean_genres
40376,I'll Sleep When I'm Dead,An energetic and fast-paced bio-doc that exami...,Documentary
19121,On the Road,Dean and Sal are the portrait of the Beat Gene...,Adventure Drama
30589,Run,"RUN is a fast-paced, action/thriller, which ce...",Action Drama
28506,Vuonna 85,"The movie is about the Lokomo laborer, and gro...",Comedy
35264,Two Step,Two Step is a fast-paced Texas thriller in whi...,Thriller Crime Drama
