In [2]:
import os
import pandas as pd
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split
from surprise import accuracy


: 

In [None]:
# Define file paths based on the current script location
current_dir = os.getcwd()  # Gets the current working directory
ratings_path = os.path.join(current_dir, 'rating.csv')
movies_path = os.path.join(current_dir, 'movie.csv')

# Load MovieLens dataset
ratings = pd.read_csv(ratings_path)
movies = pd.read_csv(movies_path)

ratings_sample = ratings.sample(n=50000, random_state=42)

# Prepare the data for the Surprise library
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(ratings_sample[['userId', 'movieId', 'rating']], reader)

# Split data into training and test sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Define the KNN collaborative filtering algorithm
# Using item-based collaborative filtering with cosine similarity
algo = KNNBasic(sim_options={'name': 'cosine', 'user_based': False})

# Train the algorithm on the training set
algo.fit(trainset)

# Make predictions on the test set
predictions = algo.test(testset)

# Calculate and print accuracy
print(f"RMSE: {accuracy.rmse(predictions):.4f}")

In [None]:
import matplotlib.pyplot as plt

# Extract actual and predicted ratings
actual_ratings = [pred.r_ui for pred in predictions]
predicted_ratings = [pred.est for pred in predictions]

# Scatter plot of actual vs predicted ratings
plt.figure(figsize=(10, 6))
plt.scatter(actual_ratings, predicted_ratings, alpha=0.6, label='Predictions')
plt.plot([0.5, 5], [0.5, 5], color='red', linestyle='--', label='Perfect Prediction (y = x)')

# Add labels, title, and legend
plt.title("Actual vs Predicted Ratings", fontsize=14)
plt.xlabel("Actual Ratings", fontsize=12)
plt.ylabel("Predicted Ratings", fontsize=12)
plt.xlim(0.5, 5)
plt.ylim(0.5, 5)
plt.legend()
plt.grid(True, linestyle='--', alpha=0.6)
plt.show()


In [4]:
import random
def get_movie_recommendations(user_id, num_recommendations=5, year_range=None):
    # Get a list of all unique movie IDs
    movie_ids = movies['movieId'].unique()
    #extract year from title 
    movies['year'] = movies['title'].str.extract(r'\((\d{4})\)', expand=False).astype(float)


    # Get the list of movie IDs the user has already rated
    user_rated_movies = ratings[ratings['userId'] == user_id]['movieId'].tolist()
    
    # Filter out movies the user has already rated
    unrated_movies = [movie_id for movie_id in movie_ids if movie_id not in user_rated_movies]

# Predict ratings for all unrated movies
    predictions = [algo.predict(user_id, movie_id) for movie_id in unrated_movies]

    # Find the highest-rated movie
    max_rated_movie = max(predictions, key=lambda x: x.est)
    max_movie_id = int(max_rated_movie.iid)
    max_movie_title = movies[movies['movieId'] == max_movie_id]['title'].values[0]

    print(f"Highest predicted rating movie for User {user_id}: '{max_movie_title}' with a predicted rating of {max_rated_movie.est:.2f}")

    # Apply year range filter if provided
    if year_range:
        start_year, end_year = year_range
        movies_filtered_by_year = movies[(movies['year'] >= start_year) & (movies['year'] <= end_year)]
    else:
        movies_filtered_by_year = movies
           
    # Get random recommendations from different genres
    genre_recommendations = []
    for genre in movies_filtered_by_year['genres'].unique():
        # Filter movies by genre and exclude already rated movies
        genre_movies = movies_filtered_by_year[(movies_filtered_by_year['genres'] == genre) & (movies_filtered_by_year['movieId'].isin(unrated_movies))]
        
        if not genre_movies.empty:
            # Pick a random movie from the current genre and predict its rating
            random_movie_id = random.choice(genre_movies['movieId'].tolist())
            prediction = algo.predict(user_id, random_movie_id)
            genre_recommendations.append((prediction.iid, prediction.est))


    # Sort genre recommendations by rating (can remove for more randomness)
    genre_recommendations = sorted(genre_recommendations, key=lambda x: x[1], reverse=True)[:num_recommendations]

    # Get movie titles for the recommended movie IDs
    recommended_movies = []

    print("\nMovies to Watch Next:")
    
    for movie_id, rating in genre_recommendations:
        movie_info = movies[movies['movieId'] == int(movie_id)][['title', 'genres']].values[0]
        title, genre = movie_info
        print(f"Title: '{title}'\n, Genre: {genre}\n, Predicted Rating: {rating:.2f}\n")
        recommended_movies.append((movie_id, title, genre, rating))

    return recommended_movies

In [None]:
# Test the recommendation function
user_id = 1  # Example user ID
#get_movie_recommendations(user_id, number of movie recommendations, range of year) 
recommended_movies = get_movie_recommendations(user_id, num_recommendations=7,year_range=[2007,2010])
print("Recommended movies for User", user_id)
print(recommended_movies)

In [None]:
# Test the recommendation function
user_id2 = 2  # Example user ID
recommended_movies = get_movie_recommendations(user_id2, num_recommendations=5)
print("Recommended movies for User", user_id2)
print(recommended_movies)

In [None]:
user_id3 = 3  # Example user ID
recommended_movies = get_movie_recommendations(user_id3, num_recommendations=5)
print("Recommended movies for User", user_id3)
print(recommended_movies)

In [None]:
user_id50 = 50  # Example user ID
recommended_movies = get_movie_recommendations(user_id50, num_recommendations=5)
print("Recommended movies for User", user_id50)
print(recommended_movies)

In [None]:
import matplotlib.pyplot as plt

# 1. Visualize the distribution of user ratings
plt.figure(figsize=(10, 6))
ratings['rating'].hist(bins=10, color='skyblue', edgecolor='black')
plt.title('Distribution of User Ratings', fontsize=16)
plt.xlabel('Rating', fontsize=14)
plt.ylabel('Frequency', fontsize=14)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.show()

# 2. Visualize recommendations for a specific user
def visualize_recommendations(user_id, num_recommendations=5):
    # Get movie recommendations
    recommended_movies = get_movie_recommendations(user_id, num_recommendations)

    # Create a bar chart for the recommendations
    plt.figure(figsize=(10, 6))
    plt.barh(recommended_movies['title'], [algo.predict(user_id, movie_id).est for movie_id in recommended_movies['movieId']], color='lightgreen', edgecolor='black')
    plt.title(f'Top {num_recommendations} Recommended Movies for User {user_id}', fontsize=16)
    plt.xlabel('Predicted Rating', fontsize=14)
    plt.ylabel('Movie Titles', fontsize=14)
    plt.gca().invert_yaxis()  # Invert y-axis for better readability
    plt.grid(axis='x', linestyle='--', alpha=0.7)
    plt.show()

# Visualize recommendations for the example user ID
visualize_recommendations(user_id=1, num_recommendations=5)
