In [6]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Load dataset
movies_df = pd.read_csv("tmdb_5000_movies.csv")

# Select relevant columns for reviews
movies_df = movies_df[['id', 'original_title', 'vote_average', 'vote_count']]

# Normalize review-related columns
scaler = MinMaxScaler()
movies_df[['vote_average', 'vote_count']] = scaler.fit_transform(movies_df[['vote_average', 'vote_count']])

# Function to compute recommendations based on reviews and user ratings
def recommend_by_reviews_with_ratings(movie_ratings, num_recommendations=10):
    """
    Args:
        movie_ratings: A dictionary where keys are movie titles and values are user ratings.
        num_recommendations: Number of recommendations to return.
        
    Returns:
        A list of recommended movies or an error message.
    """
    # Validate input movies
    input_movies = movies_df[movies_df['original_title'].str.lower().isin([title.lower() for title in movie_ratings.keys()])]
    if input_movies.empty:
        return None, "None of the input movies were found in the dataset."

    # Normalize user ratings to a scale of 0-1
    max_rating = max(movie_ratings.values())
    min_rating = min(movie_ratings.values())
    normalized_ratings = {title: (rating - min_rating) / (max_rating - min_rating) for title, rating in movie_ratings.items()}

    # Compute the weighted profile
    input_movies['weight'] = input_movies['original_title'].str.lower().map(
        lambda title: normalized_ratings.get(title, 0)
    )
    weighted_profile = (input_movies[['vote_average', 'vote_count']].T * input_movies['weight']).sum(axis=1)
    weighted_profile = weighted_profile.values.reshape(1, -1)

    # Calculate cosine similarity with all movies
    similarity_scores = cosine_similarity(weighted_profile, movies_df[['vote_average', 'vote_count']])
    similarity_scores = similarity_scores.flatten()

    # Rank movies based on similarity, excluding input movies
    movies_df['similarity'] = similarity_scores
    recommendations = (
        movies_df[~movies_df['original_title'].str.lower().isin([title.lower() for title in movie_ratings.keys()])]
        .sort_values(by='similarity', ascending=False)
        .head(num_recommendations)
    )

    return recommendations['original_title'].tolist(), None

# Example Usage
user_ratings = {"Avatar": 4, "Titanic": 5}
recommendations, error = recommend_by_reviews_with_ratings(user_ratings)
if error:
    print(error)
else:
    print("Recommended Movies:", recommendations)


Recommended Movies: ["Pirates of the Caribbean: At World's End", 'Skin Trade', 'Awake', 'Polisse', 'Beneath Hill 60', 'The I Inside', 'Ultramarines: A Warhammer 40,000 Movie', 'Crocodile Dundee', 'Veronika Decides to Die', 'La Fille du RER']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  input_movies['weight'] = input_movies['original_title'].str.lower().map(
