# Matrix Factorization

In [None]:
import numpy as np

In [None]:
# Example user-item rating matrix with some missing values represented by zeros
R = np.array([
    [5, 3, 0, 1],
    [4, 0, 0, 1],
    [1, 1, 0, 5],
    [1, 0, 0, 4],
    [0, 1, 5, 4],
])

In [None]:
num_users, num_items = R.shape
num_latent_features = 2

In [None]:
print(f"Number of users: {num_users}")
print(f"Number of items: {num_items}")

Number of users: 5
Number of items: 4


In [None]:
# Initialize user and item latent feature matrices with random values
np.random.seed(0)  # For reproducibility
U = np.random.rand(num_users, num_latent_features)
V = np.random.rand(num_items, num_latent_features)

In [None]:
# Parameters for matrix factorization
learning_rate = 0.01
regularization = 0.02
iterations = 1000

In [None]:
# Perform matrix factorization with Gradient Descent
for iteration in range(iterations):
    for i in range(num_users):
        for j in range(num_items):
            if R[i, j] > 0:  # Only consider non-zero ratings
                # Compute prediction and error
                prediction = np.dot(U[i, :], V[j, :].T)
                error = R[i, j] - prediction

                # Update latent features
                U[i, :] += learning_rate * (error * V[j, :] - regularization * U[i, :])
                V[j, :] += learning_rate * (error * U[i, :] - regularization * V[j, :])

In [None]:
# Compute the full matrix of predictions
R_pred = U.dot(V.T)

In [None]:
print("Original Ratings Matrix:")
print(R)
print("\nPredicted Ratings Matrix:")
print(R_pred)

Original Ratings Matrix:
[[5 3 0 1]
 [4 0 0 1]
 [1 1 0 5]
 [1 0 0 4]
 [0 1 5 4]]

Predicted Ratings Matrix:
[[4.95467514 2.96325763 5.85170569 1.00518157]
 [3.96436552 2.38600311 4.86672084 1.00229108]
 [1.0124011  0.96513617 5.61539947 4.94592121]
 [0.99449224 0.88053745 4.68623725 3.9683649 ]
 [1.26732816 1.04045614 4.96844253 3.98078422]]


In [None]:
# Rounding the predicted ratings matrix
R_pred_rounded = np.round(R_pred, decimals=0)

R_pred_rounded

array([[5., 3., 6., 1.],
       [4., 2., 5., 1.],
       [1., 1., 6., 5.],
       [1., 1., 5., 4.],
       [1., 1., 5., 4.]])

# Collaborative Filtering (User-User Similarity)

In [None]:
! pip install scikit-surprise



In [None]:
from surprise import SVD, Dataset, Reader
from surprise.model_selection import train_test_split
from surprise.accuracy import rmse
import pandas as pd

# Example data: user, item, rating
ratings_dict = {
    'userID': [1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5],
    'itemID': [1, 2, 3, 1, 3, 1, 2, 3, 2, 3, 1, 3],
    'rating': [5, 3, 1, 4, 2, 5, 4, 3, 1, 2, 5, 4],
}

df = pd.DataFrame(ratings_dict)

# Define a Reader and load the dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

# Split the dataset for training and testing
trainset, testset = train_test_split(data, test_size=0.25)

# Use the SVD algorithm
algo = SVD()

# Train and test the algorithm
algo.fit(trainset)
predictions = algo.test(testset)

# Compute RMSE
print("RMSE on Test Set:")
print(rmse(predictions))

RMSE on Test Set:
RMSE: 1.1520
1.151980477406613


In [None]:
# Define a Reader and load the dataset
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['userID', 'itemID', 'rating']], reader)

# Split the dataset for training and testing
trainset, testset = train_test_split(data, test_size=0.25)

# Use the SVD algorithm
algo = SVD()

# Train and test the algorithm
algo.fit(trainset)
predictions = algo.test(testset)

# Compute RMSE
print("RMSE on Test Set:")
rmse_val = rmse(predictions)

# Print predictions
print("\nPredictions:")
for prediction in predictions:
    print(prediction)

RMSE on Test Set:
RMSE: 1.1389

Predictions:
user: 1          item: 1          r_ui = 5.00   est = 3.41   {'was_impossible': False}
user: 1          item: 2          r_ui = 3.00   est = 2.60   {'was_impossible': False}
user: 5          item: 3          r_ui = 4.00   est = 2.90   {'was_impossible': False}


# Hybrid approach

In [None]:
# Hypothetical content-based prediction for the same testset
content_based_predictions = [2.5, 3.5, 4.0, 3.0]  # Example content-based predictions

# Combine the predictions (assuming both sets are aligned and of the same length)
combined_predictions = [(cb + cf.est)/2 for cb, cf in zip(content_based_predictions, predictions)]

print("Combined Predictions:")
print(combined_predictions)

Combined Predictions:
[2.9545777454532516, 3.05219344417636, 3.4514390668942605]
