In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

def setup_content_based(movies):
    try:
        tfidf = TfidfVectorizer(token_pattern=r'(?u)\b\w+\b', stop_words='english')
        tfidf_matrix = tfidf.fit_transform(movies['genres'])
        cosine_sim = cosine_similarity(tfidf_matrix)
        return cosine_sim
    except Exception as e:
        print(f"Error setting up content-based filtering: {str(e)}")
        return None

def get_content_recommendations(movie_title, movies, cosine_sim, n=10):
    try:
        if movie_title not in movies['title'].values:
            print(f"Movie '{movie_title}' not found.")
            return pd.DataFrame(columns=['movieId', 'title', 'genres', 'content_score'])
        idx = movies[movies['title'] == movie_title].index[0]
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:n+1]
        indices = [i[0] for i in sim_scores]
        return movies.iloc[indices][['movieId', 'title', 'genres']].assign(content_score=[score for _, score in sim_scores])
    except Exception as e:
        print(f"Error in content-based recommendations: {str(e)}")
        return pd.DataFrame(columns=['movieId', 'title', 'genres', 'content_score'])

def setup_collaborative(ratings):
    try:
        reader = Reader(rating_scale=(0.5, 5.0))
        data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
        trainset, testset = train_test_split(data, test_size=0.2, random_state=42)
        svd = SVD(random_state=42)
        svd.fit(trainset)
        predictions = svd.test(testset)
        return svd, predictions, testset
    except Exception as e:
        print(f"Error setting up collaborative filtering: {str(e)}")
        return None, None, None

def get_collab_recommendations(user_id, ratings, movies, svd, n=10):
    try:
        if user_id not in ratings['userId'].unique():
            print(f"User ID {user_id} not found.")
            return pd.DataFrame(columns=['movieId', 'title', 'genres', 'collab_score'])
        all_movie_ids = movies['movieId'].unique()
        rated = ratings[ratings['userId'] == user_id]['movieId'].values
        to_predict = np.setdiff1d(all_movie_ids, rated)
        if not to_predict.size:
            print(f"No unrated movies for user {user_id}.")
            return pd.DataFrame(columns=['movieId', 'title', 'genres', 'collab_score'])
        testset = [[user_id, mid, 4.0] for mid in to_predict]
        preds = svd.test(testset)
        top_preds = sorted(preds, key=lambda x: x.est, reverse=True)[:n]
        top_ids = [pred.iid for pred in top_preds]
        return movies[movies['movieId'].isin(top_ids)][['movieId', 'title', 'genres']].assign(collab_score=[pred.est for pred in top_preds])
    except Exception as e:
        print(f"Error in collaborative recommendations: {str(e)}")
        return pd.DataFrame(columns=['movieId', 'title', 'genres', 'collab_score'])