In [2]:
import os
import pandas as pd
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy


In [3]:
# Define file paths based on the current script location
current_dir = os.getcwd()  # Gets the current working directory
ratings_path = os.path.join(current_dir, 'rating.csv')
movies_path = os.path.join(current_dir, 'movie.csv')

# Load MovieLens dataset
ratings = pd.read_csv(ratings_path)
movies = pd.read_csv(movies_path)

ratings_sample = ratings.sample(n=20000, random_state=42)

# Prepare the data for the Surprise library
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings_sample[['userId', 'movieId', 'rating']], reader)

# Split data into training and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Define the KNN collaborative filtering algorithm
# Using item-based collaborative filtering with cosine similarity
algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': False})

# Train the algorithm on the training set
algo.fit(trainset)

# Make predictions on the test set
predictions = algo.test(testset)

# Calculate and print accuracy
print(f"RMSE: {accuracy.rmse(predictions):.4f}")

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 1.0497
RMSE: 1.0497


In [4]:
# def get_movie_recommendations(user_id, num_recommendations=5):
#     # Get a list of all unique movie IDs
#     movie_ids = movies['movieId'].unique()
# 
#     # Get the list of movie IDs the user has already rated
#     user_rated_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
# 
#     # Filter out movies the user has already rated
#     unrated_movies = [movie_id for movie_id in movie_ids if movie_id not in user_rated_movies]
# 
#     # Predict ratings for all unrated movies
#     predictions = [algo.predict(user_id, movie_id) for movie_id in unrated_movies]
# 
#     # Sort predictions by estimated rating in descending order
#     recommendations = sorted(predictions, key=lambda x: x.est, reverse=True)[:num_recommendations]
# 
#     # Get the movie titles for the recommended movie IDs
#     recommended_movie_ids = [int(pred.iid) for pred in recommendations]
#     recommended_movies = movies[movies['movieId'].isin(recommended_movie_ids)]
# 
#     return recommended_movies[['movieId', 'title']]
import re 
import random
def get_movie_recommendations(user_id, num_recommendations=5, year_range=None):
    
    # Get a list of all unique movie IDs
    movie_ids = movies['movieId'].unique()
    #extract year from title 
    movies['year'] = movies['title'].str.extract(r'\((\d{4})\)', expand=False).astype(float)


    # Get the list of movie IDs the user has already rated
    user_rated_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()

    # Filter out movies the user has already rated
    unrated_movies = [movie_id for movie_id in movie_ids if movie_id not in user_rated_movies]

    # Predict ratings for all unrated movies
    predictions = [algo.predict(user_id, movie_id) for movie_id in unrated_movies]

    # Find the highest-rated movie
    max_rated_movie = max(predictions, key=lambda x: x.est)
    max_movie_id = int(max_rated_movie.iid)
    max_movie_title = movies[movies['movieId'] == max_movie_id]['title'].values[0]

    print(f"Highest predicted rating movie for User {user_id}: '{max_movie_title}' with a predicted rating of {max_rated_movie.est:.2f}")

    # Apply year range filter if provided
    if year_range:
        start_year, end_year = year_range
        movies_filtered_by_year = movies[(movies['year'] >= start_year) & (movies['year'] <= end_year)]
    else:
        movies_filtered_by_year = movies

    # Get random recommendations from different genres
    genre_recommendations = []
    for genre in movies_filtered_by_year['genres'].unique():
        # Filter movies by genre and exclude already rated movies
        genre_movies = movies_filtered_by_year[(movies_filtered_by_year['genres'] == genre) & (movies_filtered_by_year['movieId'].isin(unrated_movies))]

        if not genre_movies.empty:
            # Pick a random movie from the current genre and predict its rating
            random_movie_id = random.choice(genre_movies['movieId'].tolist())
            prediction = algo.predict(user_id, random_movie_id)
            genre_recommendations.append((prediction.iid, prediction.est))

    # Sort genre recommendations by rating (remove for randomness)
    genre_recommendations = sorted(genre_recommendations, key=lambda x: x[1], reverse=True)[:num_recommendations]

    # Get movie titles for the recommended movie IDs
    recommended_movies = []

    print("\nMovies to Watch Next:")
    
    for movie_id, rating in genre_recommendations:
        movie_info = movies[movies['movieId'] == int(movie_id)][['title', 'genres']].values[0]
        title, genre = movie_info
        print(f"Title: '{title}'\n, Genre: {genre}\n, Predicted Rating: {rating:.2f}\n")
        recommended_movies.append((movie_id, title, genre, rating))

    return recommended_movies




In [5]:
#%%
# Test the recommendation function
user_id = 1  # Example user ID
## can change the years
recommended_movies = get_movie_recommendations(user_id, num_recommendations=5,year_range=[2007,2010])
print("Recommended movies for User", user_id)
print(recommended_movies)

Highest predicted rating movie for User 1: 'Toy Story (1995)' with a predicted rating of 3.52
Title: 'Asterix and the Vikings (Astérix et les Vikings) (2006)'
, Genre: Adventure|Animation|Children|Comedy|Fantasy
, Predicted Rating: 3.52

Title: 'Back to the Secret Garden (2001)'
, Genre: Adventure|Children|Fantasy
, Predicted Rating: 3.52

Title: 'Wedding Date, The (2005)'
, Genre: Comedy|Romance
, Predicted Rating: 3.52

Title: 'Tamara Drewe (2010)'
, Genre: Comedy|Drama|Romance
, Predicted Rating: 3.52

Title: 'Sex Up Your Life! (2005)'
, Genre: Comedy
, Predicted Rating: 3.52

Recommended movies for User 1
[(91355, 'Asterix and the Vikings (Astérix et les Vikings) (2006)', 'Adventure|Animation|Children|Comedy|Fantasy', 3.51971875), (105819, 'Back to the Secret Garden (2001)', 'Adventure|Children|Fantasy', 3.51971875), (31433, 'Wedding Date, The (2005)', 'Comedy|Romance', 3.51971875), (80775, 'Tamara Drewe (2010)', 'Comedy|Drama|Romance', 3.51971875), (131068, 'Sex Up Your Life! (200

In [6]:
# Test the recommendation function
user_id2 = 2  # Example user ID
recommended_movies = get_movie_recommendations(user_id2, num_recommendations=5)
print("Recommended movies for User", user_id2)
print(recommended_movies)

Highest predicted rating movie for User 2: 'Toy Story (1995)' with a predicted rating of 3.52
Title: 'The Magic Crystal (2011)'
, Genre: Adventure|Animation|Children|Comedy|Fantasy
, Predicted Rating: 3.52

Title: 'Thief Lord, The (2006)'
, Genre: Adventure|Children|Fantasy
, Predicted Rating: 3.52

Title: 'Decoy Bride, The (2011)'
, Genre: Comedy|Romance
, Predicted Rating: 3.52

Title: 'Surviving Desire (1991)'
, Genre: Comedy|Drama|Romance
, Predicted Rating: 3.52

Title: 'Stuart Saves His Family (1995)'
, Genre: Comedy
, Predicted Rating: 3.52

Recommended movies for User 2
[(117454, 'The Magic Crystal (2011)', 'Adventure|Animation|Children|Comedy|Fantasy', 3.51971875), (46762, 'Thief Lord, The (2006)', 'Adventure|Children|Fantasy', 3.51971875), (94503, 'Decoy Bride, The (2011)', 'Comedy|Romance', 3.51971875), (26757, 'Surviving Desire (1991)', 'Comedy|Drama|Romance', 3.51971875), (312, 'Stuart Saves His Family (1995)', 'Comedy', 3.51971875)]


In [7]:
user_id3 = 3  # Example user ID
recommended_movies = get_movie_recommendations(user_id3, num_recommendations=5)
print("Recommended movies for User", user_id3)
print(recommended_movies)

Highest predicted rating movie for User 3: 'Jumanji (1995)' with a predicted rating of 3.52
Title: 'Shrek the Third (2007)'
, Genre: Adventure|Animation|Children|Comedy|Fantasy
, Predicted Rating: 3.52

Title: 'Bridge to Terabithia (2007)'
, Genre: Adventure|Children|Fantasy
, Predicted Rating: 3.52

Title: 'Friends with Benefits (2011)'
, Genre: Comedy|Romance
, Predicted Rating: 3.52

Title: 'Fools Rush In (1997)'
, Genre: Comedy|Drama|Romance
, Predicted Rating: 3.52

Title: 'Fat People (Gordos) (2009)'
, Genre: Comedy
, Predicted Rating: 3.52

Recommended movies for User 3
[(53121, 'Shrek the Third (2007)', 'Adventure|Animation|Children|Comedy|Fantasy', 3.51971875), (50601, 'Bridge to Terabithia (2007)', 'Adventure|Children|Fantasy', 3.51971875), (88405, 'Friends with Benefits (2011)', 'Comedy|Romance', 3.51971875), (1457, 'Fools Rush In (1997)', 'Comedy|Drama|Romance', 3.51971875), (78919, 'Fat People (Gordos) (2009)', 'Comedy', 3.51971875)]


In [8]:
user_id50 = 50  # Example user ID
recommended_movies = get_movie_recommendations(user_id50, num_recommendations=5)
print("Recommended movies for User", user_id50)
print(recommended_movies)

Highest predicted rating movie for User 50: 'Toy Story (1995)' with a predicted rating of 3.52
Title: 'Toy Story (1995)'
, Genre: Adventure|Animation|Children|Comedy|Fantasy
, Predicted Rating: 3.52

Title: 'Return to Oz (1985)'
, Genre: Adventure|Children|Fantasy
, Predicted Rating: 3.52

Title: 'Four more years (Fyra år till) (2010)'
, Genre: Comedy|Romance
, Predicted Rating: 3.52

Title: 'Very Ordinary Couple (Yeonaeui Wondo) (2013)'
, Genre: Comedy|Drama|Romance
, Predicted Rating: 3.52

Title: 'Strange Wilderness (2008)'
, Genre: Comedy
, Predicted Rating: 3.52

Recommended movies for User 50
[(1, 'Toy Story (1995)', 'Adventure|Animation|Children|Comedy|Fantasy', 3.51971875), (2093, 'Return to Oz (1985)', 'Adventure|Children|Fantasy', 3.51971875), (98378, 'Four more years (Fyra år till) (2010)', 'Comedy|Romance', 3.51971875), (114119, 'Very Ordinary Couple (Yeonaeui Wondo) (2013)', 'Comedy|Drama|Romance', 3.51971875), (57536, 'Strange Wilderness (2008)', 'Comedy', 3.51971875)]
