Recommender System 1

In [1]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import pandas as pd
from collections import defaultdict

In [2]:
# Create a Surprise Reader
reader = Reader(rating_scale=(1, 5))

In [3]:
# Load MovieLens dataset using Surprise library
data = Dataset.load_builtin('ml-100k')

In [4]:
# Split data into training and testing sets
trainset, testset = train_test_split(data, test_size=0.2)

In [5]:
# Use SVD for collaborative filtering
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x13558292190>

In [6]:
# Get the top N recommendations for a user
def get_top_recommendations(user_id, num_recommendations=10):
    # Build anti-test set for the given user
    anti_testset = trainset.build_anti_testset()
    anti_testset = [elem for elem in anti_testset if elem[0] == user_id]

    # Predict ratings for unrated items
    predictions = model.test(anti_testset)

    # Organize predictions by movie ID
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Sort predictions by estimated rating
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:num_recommendations]

    # Display the top N movie recommendations
    print(f'\nTop {num_recommendations} Recommended Movies for User {user_id}:')
    for i, (movie_id, estimated_rating) in enumerate(top_n[user_id], start=1):
        print(f"{i}. MovieID: {movie_id}, Estimated Rating: {estimated_rating}")

# Example usage
#get_top_recommendations(user_id="15")
#get_top_recommendations(user_id="21")

In [67]:
# Simple Command-Line Interface for User Interaction
while True:
    try:
        # Get user input for userId and movieId
        user_id = str(input("Enter userId (or type 'exit' to end): "))
        if user_id.lower() == 'exit':
            break

        # Get the top N recommendations for the user
        get_top_recommendations(user_id)

    except ValueError as e:
        print(str(e))

Enter userId (or type 'exit' to end): 15

Top 10 Recommended Movies for User 15:
1. MovieID: 64, Estimated Rating: 4.442799511860893
2. MovieID: 318, Estimated Rating: 4.342392217854538
3. MovieID: 22, Estimated Rating: 4.204762073752003
4. MovieID: 427, Estimated Rating: 4.1339288808736026
5. MovieID: 172, Estimated Rating: 4.1032433141901485
6. MovieID: 313, Estimated Rating: 4.093795306573339
7. MovieID: 199, Estimated Rating: 4.066429653129308
8. MovieID: 151, Estimated Rating: 4.039588349976615
9. MovieID: 357, Estimated Rating: 3.9743703780525594
10. MovieID: 12, Estimated Rating: 3.9426108644419773
Enter userId (or type 'exit' to end): exit


Recommender System 1 Evaluation Metrics

In [65]:
# Evaluate the model on the test set
predictions = model.test(testset)# Calculate RMSE
rmse = accuracy.rmse(predictions)
print(f'\nRMSE on the test set: {rmse}')

RMSE: 0.9357

RMSE on the test set: 0.935693575561328


In [66]:
from collections import defaultdict
import numpy as np

# Create a dictionary to store top-N recommendations for each user
top_n = defaultdict(list)
for uid, iid, true_r, est, _ in predictions:
    top_n[uid].append((iid, est))

# Function to calculate Discounted Cumulative Gain (DCG) at k
def dcg_at_k(r, k):
    r = np.asfarray(r)[:k]
    return np.sum((2 ** r - 1) / np.log2(np.arange(2, r.size + 2)))

# Function to calculate Normalized Discounted Cumulative Gain (NDCG) at k
def ndcg_at_k(predictions, k):
    ndcg_sum = 0
    for uid, user_ratings in top_n.items():
        # Extract true ratings for the user
        true_ratings = [true_r for (_, true_r) in user_ratings]

        # Extract predicted ratings for the recommended items
        predicted_ratings = [est for (_, est) in user_ratings]

        # Calculate NDCG for the user
        ndcg = dcg_at_k(predicted_ratings, k) / dcg_at_k(true_ratings, k)

        # Accumulate NDCG for all users
        ndcg_sum += ndcg

    # Average NDCG across all users
    average_ndcg = ndcg_sum / len(top_n)
    return average_ndcg

# Calculate NDCG at k (adjust k as needed)
k_ndcg = 10
average_ndcg = ndcg_at_k(predictions, k_ndcg)
print(f'Average NDCG at {k_ndcg}: {average_ndcg}')

Average NDCG at 10: 1.0


Recommender System 2

In [10]:
import tensorflow as tf

# RS2: Collaborative Filtering with Neural Network
# Convert Surprise trainset to DataFrame for movie names
ratings_df = pd.DataFrame(trainset.all_ratings(), columns=['user', 'item', 'rating'])

# Build a neural network model for collaborative filtering
num_users = trainset.n_users + 1
num_items = trainset.n_items + 1

user_input = tf.keras.layers.Input(shape=(1,))
item_input = tf.keras.layers.Input(shape=(1,))

user_embedding = tf.keras.layers.Embedding(input_dim=num_users, output_dim=50)(user_input)
item_embedding = tf.keras.layers.Embedding(input_dim=num_items, output_dim=50)(item_input)

# Concatenate user and item embeddings
concatenated = tf.keras.layers.Concatenate()([user_embedding, item_embedding])
flatten = tf.keras.layers.Flatten()(concatenated)

dense_layer = tf.keras.layers.Dense(128, activation='relu')(flatten)
output_layer = tf.keras.layers.Dense(1, activation='linear')(dense_layer)  # Changed output activation function

model_rs2 = tf.keras.models.Model(inputs=[user_input, item_input], outputs=output_layer)

model_rs2.compile(optimizer='adam', loss='mean_squared_error')

# Train the neural network model
model_rs2.fit([ratings_df['user'], ratings_df['item']], ratings_df['rating'], epochs=5, verbose=1)






Epoch 1/5

Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x13566236dc0>

In [68]:
import numpy as np

# Function to get movie recommendations for a user using the deep learning model (RS2)
def get_movie_recommendations(user_id, model, trainset, num_recommendations=10):
    # Get all item IDs
    all_item_ids = np.array(list(range(1, trainset.n_items + 1)))

    # Repeat the user ID for all items
    user_ids = np.full_like(all_item_ids, user_id)

    # Predict ratings for the user and all items
    ratings = model.predict([user_ids, all_item_ids])

    # Create DataFrame with item IDs and predicted ratings
    predictions_df = pd.DataFrame({'item': all_item_ids, 'predicted_rating': ratings.flatten()})

    # Exclude items the user has already rated
    user_ratings = trainset.ur[user_id]
    rated_item_ids = [iid for (iid, _) in user_ratings]
    predictions_df = predictions_df[~predictions_df['item'].isin(rated_item_ids)]

    # Sort items by predicted rating in descending order
    top_recommendations = predictions_df.sort_values(by='predicted_rating', ascending=False).head(num_recommendations)

    return top_recommendations


user_id_to_recommend = int(input("userID is?:"))
num_recs = 10
recommendations = get_movie_recommendations(user_id_to_recommend, model_rs2, trainset)

# Display the top N movie recommendations
print(f'\nTop {num_recs} Recommended Movies for User {user_id_to_recommend}:')
for i, row in recommendations.iterrows():
    print(f"{i + 1}. MovieID: {row['item']}, Predicted Rating: {row['predicted_rating']}")

userID is?:15

Top 10 Recommended Movies for User 15:
1182. MovieID: 1182.0, Predicted Rating: 4.904898643493652
119. MovieID: 119.0, Predicted Rating: 4.728442192077637
763. MovieID: 763.0, Predicted Rating: 4.693665504455566
486. MovieID: 486.0, Predicted Rating: 4.616649150848389
604. MovieID: 604.0, Predicted Rating: 4.613925933837891
1072. MovieID: 1072.0, Predicted Rating: 4.606298923492432
991. MovieID: 991.0, Predicted Rating: 4.6060991287231445
357. MovieID: 357.0, Predicted Rating: 4.605318069458008
63. MovieID: 63.0, Predicted Rating: 4.58200216293335
1176. MovieID: 1176.0, Predicted Rating: 4.581923961639404


Recommender System 2 Evaluation Metrics

In [69]:
from sklearn.metrics import mean_squared_error
# Convert user_ids and item_ids to integers
user_ids_rs2 = np.array([int(uid) for uid, _, _ in testset])
item_ids_rs2 = np.array([int(iid) for _, iid, _ in testset])

# Ensure that user_ids and item_ids are within the expected range
user_ids_rs2 = np.clip(user_ids_rs2, 0, num_users - 1)
item_ids_rs2 = np.clip(item_ids_rs2, 0, num_items - 1)

# Use model_rs2.predict for each pair of user_id and item_id
predictions_rs2 = model_rs2.predict([user_ids_rs2, item_ids_rs2])
estimated_ratings = [pred[0] for pred in predictions_rs2]

rmse_rs2 = np.sqrt(mean_squared_error(estimated_ratings, [rating for _, _, rating in testset]))
print(f"RMSE for RS2 (Neural Collaborative Filtering): {rmse_rs2}")

RMSE for RS2 (Neural Collaborative Filtering): 1.3237130534847015


In [70]:
from sklearn.metrics import ndcg_score

# Get the true ratings for the test set
true_ratings_rs2 = np.array([rating for _, _, rating in testset])

# Use model_rs2.predict for each pair of user_id and item_id
predictions_rs2 = model_rs2.predict([user_ids_rs2, item_ids_rs2])
estimated_ratings = np.array([pred[0] for pred in predictions_rs2])

# Ensure that estimated ratings are within the valid range (e.g., between 1 and 5)
estimated_ratings = np.clip(estimated_ratings, 1, 5)

# Calculate NDCG
ndcg_rs2 = ndcg_score([true_ratings_rs2], [estimated_ratings])
print(f"NDCG for RS2 (Neural Collaborative Filtering): {ndcg_rs2}")

NDCG for RS2 (Neural Collaborative Filtering): 0.9688771666404502


In [71]:
import traceback

# Function to calculate novelty based on item popularity
def calculate_novelty(trainset):
    try:
        # Count the number of ratings for each item
        item_ratings_count = defaultdict(int)
        for uid, iid, _ in trainset.all_ratings():
            item_ratings_count[iid] += 1

        # Calculate novelty scores for each item
        novelty_scores = {iid: 1 / (1 + item_ratings_count[iid]) for iid in range(1, trainset.n_items + 1)}

        return novelty_scores

    except Exception as e:
        print(f"Error in calculate_novelty: {e}")
        traceback.print_exc()
        return None

# Function to get movie recommendations for a user using the deep learning model (RS2) with novelty scores
def get_movie_recommendations_with_novelty(user_id, model, trainset, num_recommendations=10):
    try:
        # Get all item IDs
        all_item_ids = np.array(list(range(1, trainset.n_items + 1)))

        # Repeat the user ID for all items
        user_ids = np.full_like(all_item_ids, user_id)

        # Predict ratings for the user and all items
        ratings = model.predict([user_ids, all_item_ids])

        # Create DataFrame with item IDs and predicted ratings
        predictions_df = pd.DataFrame({'item': all_item_ids, 'predicted_rating': ratings.flatten()})

        # Exclude items the user has already rated
        user_ratings = trainset.ur[user_id]
        rated_item_ids = [iid for (iid, _) in user_ratings]
        predictions_df = predictions_df[~predictions_df['item'].isin(rated_item_ids)]

        # Sort items by predicted rating in descending order
        top_recommendations = predictions_df.sort_values(by='predicted_rating', ascending=False).head(num_recommendations)

        # Calculate novelty scores for the recommended items
        novelty_scores = calculate_novelty(trainset)

        # Add novelty scores to the recommendations
        top_recommendations['novelty_score'] = top_recommendations['item'].map(novelty_scores)

        return top_recommendations

    except Exception as e:
        print(f"Error in get_movie_recommendations_with_novelty: {e}")
        traceback.print_exc()  # Print the traceback for more details
        return None

user_id_to_recommend = int(input("userID is?:"))
recommendations_with_novelty = get_movie_recommendations_with_novelty(user_id_to_recommend, model_rs2, trainset)

# Display the top N recommended movies with novelty scores
print(f'\nTop {num_recs} Recommended Movies with Novelty Scores for User {user_id_to_recommend}:')
for i, row in recommendations_with_novelty.iterrows():
    print(f"{i + 1}. MovieID: {row['item']}, Predicted Rating: {row['predicted_rating']}, Novelty Score: {row['novelty_score']}")

userID is?:15

Top 10 Recommended Movies with Novelty Scores for User 15:
1182. MovieID: 1182.0, Predicted Rating: 4.904898643493652, Novelty Score: 0.1111111111111111
119. MovieID: 119.0, Predicted Rating: 4.728442192077637, Novelty Score: 0.003952569169960474
763. MovieID: 763.0, Predicted Rating: 4.693665504455566, Novelty Score: 0.011235955056179775
486. MovieID: 486.0, Predicted Rating: 4.616649150848389, Novelty Score: 0.005154639175257732
604. MovieID: 604.0, Predicted Rating: 4.613925933837891, Novelty Score: 0.006134969325153374
1072. MovieID: 1072.0, Predicted Rating: 4.606298923492432, Novelty Score: 0.16666666666666666
991. MovieID: 991.0, Predicted Rating: 4.6060991287231445, Novelty Score: 0.017543859649122806
357. MovieID: 357.0, Predicted Rating: 4.605318069458008, Novelty Score: 0.002136752136752137
63. MovieID: 63.0, Predicted Rating: 4.58200216293335, Novelty Score: 0.010752688172043012
1176. MovieID: 1176.0, Predicted Rating: 4.581923961639404, Novelty Score: 0.0909

Code to find Movie through MovieID using movies.csv

In [62]:
movies_df = pd.read_csv('./ml-latest-small/movies.csv')

# Define a function to get movie details based on MovieID
def get_movie_details(movie_id):
    if movie_id in movies_df['movieId'].unique():
        movie_title = movies_df.loc[movies_df['movieId'] == movie_id, 'title'].values[0]
        return movie_title
    else:
        return f"Movie with ID {movie_id} not found in the dataset."

movie_id_to_check = int(input("Enter MovieID: "))

# Check if MovieID is within the range of MovieIDs in the dataset
if movie_id_to_check <= movies_df['movieId'].max():
    movie_title = get_movie_details(movie_id_to_check)
    print(f"Movie Title for MovieID {movie_id_to_check}: {movie_title}")
else:
    print(f"MovieID {movie_id_to_check} is out of range.")

Enter MovieID: 3
Movie Title for MovieID 3: Grumpier Old Men (1995)
