This is a basic description of how a recommender system should work

In [28]:
import numpy as np
import pandas as pd
data = {
    'Item1': [5, 3, np.nan, 1],
    'Item2': [4, np.nan, np.nan, 1],
    'Item3': [np.nan, 3, 4, 1],
    'Item4': [2, 5, np.nan, 1]
}
user_item_matrix = pd.DataFrame(data, index=['User1', 'User2', 'User3', 'User4'])
print(user_item_matrix)

       Item1  Item2  Item3  Item4
User1    5.0    4.0    NaN    2.0
User2    3.0    NaN    3.0    5.0
User3    NaN    NaN    4.0    NaN
User4    1.0    1.0    1.0    1.0


This is the prediction function that predicts the non rates items with the mean of rated items

In [29]:
def predict_rating(user_index, item_index, user_item_matrix):
    user_ratings = user_item_matrix.iloc[user_index]
    rated_items = user_ratings[~user_ratings.isna()]
    if len(rated_items) > 0:
        return rated_items.mean()
    return np.nan


The main loop performs leave-one-out cross-validation by iterating over each user and their items. It checks for existing ratings, storing them for comparison, and temporarily sets one rating to NaN to simulate leaving it out. This tests the model's ability to predict unseen ratings.

In [30]:
actual_ratings = []
predicted_ratings = []

for user_index in range(user_item_matrix.shape[0]):
    for item_index in range(user_item_matrix.shape[1]):
        if pd.isna(user_item_matrix.iat[user_index, item_index]):
            continue  # Skip if there's no rating

        # Save the actual rating
        actual_rating = user_item_matrix.iat[user_index, item_index]
        actual_ratings.append(actual_rating)

        # Temporarily set the rating to NaN
        original_rating = user_item_matrix.iat[user_index, item_index]
        user_item_matrix.iat[user_index, item_index] = np.nan

        # Predict the left-out rating
        predicted_rating = predict_rating(user_index, item_index, user_item_matrix)
        predicted_ratings.append(predicted_rating)

        # Restore the original rating
        user_item_matrix.iat[user_index, item_index] = original_rating

In [31]:
def calculate_mse(actual, predicted):
    # Convert lists to numpy arrays for easier manipulation
    actual = np.array(actual)
    predicted = np.array(predicted)
    
    # Mask to filter out NaN values
    mask = ~np.isnan(actual) & ~np.isnan(predicted)
    
    # Calculate Mean Squared Error
    if np.any(mask):
        mse = np.mean((actual[mask] - predicted[mask]) ** 2)
        return mse
    else:
        return np.nan

mse = calculate_mse(actual_ratings, predicted_ratings)
print(mse)

1.65


In [32]:
print(user_item_matrix)

       Item1  Item2  Item3  Item4
User1    5.0    4.0    NaN    2.0
User2    3.0    NaN    3.0    5.0
User3    NaN    NaN    4.0    NaN
User4    1.0    1.0    1.0    1.0


In [33]:

print(predicted_ratings)
print(actual_ratings)

[3.0, 3.5, 4.5, 4.0, 4.0, 3.0, nan, 1.0, 1.0, 1.0, 1.0]
[5.0, 4.0, 2.0, 3.0, 3.0, 5.0, 4.0, 1.0, 1.0, 1.0, 1.0]
