# Sentiment Analysis for Movie Recommender

## Enhancing recommendations with TMDB review sentiment analysis

In [None]:
import pandas as pd
import numpy as np
import requests
from transformers import pipeline
from tqdm import tqdm
import os

In [None]:
TMDB_API_KEY = os.getenv('TMDB_API_KEY', 'YOUR_API_KEY')
TMDB_BASE_URL = 'https://api.themoviedb.org/3'

def get_movie_reviews(tmdb_id, max_reviews=5):
    url = f'{TMDB_BASE_URL}/movie/{tmdb_id}/reviews'
    params = {'api_key': TMDB_API_KEY}
    try:
        r = requests.get(url, params=params, timeout=10)
        if r.status_code == 200:
            return [x.get('content', '') for x in r.json().get('results', [])[:max_reviews]]
        return []
    except Exception as e:
        print(f'Error: {e}')
        return []

In [None]:
print('Loading sentiment model...')
sentiment_analyzer = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english', max_length=512, truncation=True)
print('Ready!')

In [None]:
def analyze_sentiment(text):
    if not text: return 0.5
    try:
        result = sentiment_analyzer(text[:1000])[0]
        return result['score'] if result['label'] == 'POSITIVE' else 1 - result['score']
    except: return 0.5

def get_movie_sentiment(reviews):
    if not reviews: return 0.5
    scores = [analyze_sentiment(r) for r in reviews]
    return np.mean(scores)

In [None]:
movies = pd.read_csv('../ml-latest-small/movies.csv')
links = pd.read_csv('../ml-latest-small/links.csv')
movies_tmdb = movies.merge(links, on='movieId')
print(f'Loaded {len(movies_tmdb)} movies')

In [None]:
SAMPLE = 50
sample = movies_tmdb[movies_tmdb['tmdbId'].notna()].sample(n=min(SAMPLE, len(movies_tmdb)))
data = []
for i, row in tqdm(sample.iterrows(), total=len(sample)):
    reviews = get_movie_reviews(int(row['tmdbId']))
    sentiment = get_movie_sentiment(reviews)
    data.append({'movieId': row['movieId'], 'sentiment_score': sentiment, 'num_reviews': len(reviews)})
os.makedirs('../models', exist_ok=True)
df = pd.DataFrame(data)
df.to_csv('../models/movie_sentiments.csv', index=False)
print('Done!')