In [1]:
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy


ratings = pd.read_csv('ratings.csv')
movies = pd.read_csv('movies.csv')

# Preprocess genre data (x|y) and rating 
movies['genres'] = movies['genres'].apply(lambda x: x.split('|'))
reader = Reader(rating_scale=(0.5, 5.0))

# dataset into Surprise
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Split the dataset into training and testing sets
trainset, testset = train_test_split(data, test_size=0.3)


model = SVD()
model.fit(trainset)

# Function to get genre-based recommendations
def get_genre_recommendations(user_id, genre, n=10):
    # Filter movies by genre
    genre_movies = movies[movies['genres'].apply(lambda x: genre in x)]
    genre_movie_ids = genre_movies['movieId'].tolist()
    
    # Predict ratings for genre movies
    predictions = []
    for movie_id in genre_movie_ids:
        pred = model.predict(user_id, movie_id)
        predictions.append((movie_id, pred.est))  # predictions->(movie_id, estimated_rating)

    # Sort predictions by estimated rating
    recommendations = sorted(predictions, key=lambda x: x[1], reverse=True)
    
    return recommendations[:n]


# Output recommendations
user_recommendations = get_genre_recommendations(user_id=5, genre='Comedy', n=5)
recommended_movie_ids = [movie_id for movie_id, _ in user_recommendations]
recommended_movies = movies[movies['movieId'].isin(recommended_movie_ids)]

print(recommended_movies)


       movieId                                      title  \
475        480                       Jurassic Park (1993)   
1258      1291  Indiana Jones and the Last Crusade (1989)   
1922      2011          Back to the Future Part II (1989)   
67645   217971           Primal: Tales of Savagery (2020)   
71308   229773             Adventure Time: Islands (2017)   

                                        genres  
475      [Action, Adventure, Sci-Fi, Thriller]  
1258                       [Action, Adventure]  
1922               [Adventure, Comedy, Sci-Fi]  
67645           [Action, Adventure, Animation]  
71308  [Adventure, Animation, Comedy, Fantasy]  


In [3]:
# Evaluate the model
test_predictions = model.test(testset)

# Compute RMSE
rmse = accuracy.rmse(test_predictions)
print(f"RMSE: {rmse}")

# Compute MAE
mae = accuracy.mae(test_predictions)
print(f"MAE: {mae}")

RMSE: 0.7782
RMSE: 0.7782070819626404
MAE:  0.5852
MAE: 0.5851857275920634
