In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error

df = pd.read_excel('Food_2024_Fall.xlsx')
# Remove the 'Item' column to keep only the ratings
ratings_matrix = df.drop(columns=['Item']).values
ratings_matrix = ratings_matrix.T
print(ratings_matrix)

[[4 5 5 ... 0 0 4]
 [5 1 0 ... 0 0 5]
 [3 2 5 ... 3 4 3]
 ...
 [4 3 5 ... 2 2 3]
 [2 3 3 ... 0 3 2]
 [3 2 4 ... 0 2 5]]


In [4]:
# Simulating a new user who rated more items out of 30
new_user_ratings = np.array([
    4, 3, np.nan, 5, 3, 4, 2, 3, np.nan, 5,  # First 10 items
    4, 5, np.nan, 1, 4, 3, 2, 5, 3, np.nan,  # Next 10 items
    4, 3, 4, 5, np.nan, 1, 3, 4, 5, np.nan   # Last 10 items
])
# Calculate the mean rating for each item from the existing users
item_means = np.nanmean(ratings_matrix, axis=0)
# Fill missing values in the new user's ratings with the corresponding item mean
new_user_filled_ratings = np.where(np.isnan(new_user_ratings), item_means, new_user_ratings)

In [6]:
# Define the Latent Matrix Factorization (LMF) function using ALS
def LMF_ALS(R, K, steps=5000, alpha=0.0002, beta=0.02):
    num_users, num_items = R.shape
    U = np.random.rand(num_users, K)
    V = np.random.rand(num_items, K)
    
    for step in range(steps):
        for i in range(num_users):
            for j in range(num_items):
                if R[i, j] > 0:
                    # Compute the prediction error
                    eij = R[i, j] - np.dot(U[i, :], V[j, :].T)
                    
                    # Update U and V
                    for k in range(K):
                        U[i][k] = U[i][k] + alpha * (2 * eij * V[j][k] - beta * U[i][k])
                        V[j][k] = V[j][k] + alpha * (2 * eij * U[i][k] - beta * V[j][k])
        
        # Compute the total error
        error = 0
        for i in range(num_users):
            for j in range(num_items):
                if R[i, j] > 0:
                    error += pow(R[i, j] - np.dot(U[i, :], V[j, :].T), 2)
                    for k in range(K):
                        error += (beta / 2) * (pow(U[i][k], 2) + pow(V[j][k], 2))
        if step % 100 == 0:
            print(f'Step: {step}, Error: {error}')
        if error < 0.001:
            break
            
    return U, V

# Apply the LMF function on the rating matrix
K = 2  # Number of latent factors
U, V = LMF_ALS(ratings_matrix, K)

# Predict the full rating matrix
R_pred = np.dot(U, V.T)
print(R_pred)

Step: 0, Error: 7625.958044207806
Step: 100, Error: 1119.2577851178269
Step: 200, Error: 1091.5127598883266
Step: 300, Error: 1075.7954283347199
Step: 400, Error: 1055.734672882699
Step: 500, Error: 1031.3872505837487
Step: 600, Error: 1006.6471455171057
Step: 700, Error: 985.8477155209349
Step: 800, Error: 970.504477108758
Step: 900, Error: 959.6638039844094
Step: 1000, Error: 951.8066966045927
Step: 1100, Error: 945.7646814626747
Step: 1200, Error: 940.797918895277
Step: 1300, Error: 936.4663595863417
Step: 1400, Error: 932.5169191652132
Step: 1500, Error: 928.8126169038088
Step: 1600, Error: 925.2906962385113
Step: 1700, Error: 921.936658357662
Step: 1800, Error: 918.7660179013634
Step: 1900, Error: 915.809704525469
Step: 2000, Error: 913.1018511277964
Step: 2100, Error: 910.6702838827948
Step: 2200, Error: 908.530461240165
Step: 2300, Error: 906.6831808147235
Step: 2400, Error: 905.115583000708
Step: 2500, Error: 903.8043439768069
Step: 2600, Error: 902.7197552758181
Step: 2700, Er

In [8]:
# Solve for the new user's latent vector U_new by using the filled-in ratings
U_new = np.linalg.lstsq(V, new_user_filled_ratings, rcond=None)[0]

# Predict the new user's ratings for all items using the learned U_new
new_user_predicted_ratings = np.dot(U_new, V.T)

# Show the predicted ratings for the new user
print("Predicted Ratings for New User (ALS):")
print(new_user_predicted_ratings)

Predicted Ratings for New User (ALS):
[3.4061905  3.38044769 3.58196664 2.77720205 3.99419266 2.50358651
 3.3079176  2.56230824 2.77477656 2.6368701  3.86376397 3.40999554
 2.72766536 1.62175754 3.3368418  3.30393045 2.58923726 3.63888163
 3.2793618  3.30525625 3.86412942 4.08956669 4.14483388 2.38240012
 4.02493676 3.06218401 3.79334074 2.50595264 3.51642236 3.89340822]


In [10]:
from sklearn.decomposition import NMF
from sklearn.impute import SimpleImputer

# Handle any missing values in the matrix (if 0 represents missing values, we can fill with mean)
imputer = SimpleImputer(missing_values=0, strategy='mean')
ratings_matrix_imputed = imputer.fit_transform(ratings_matrix)

# Define the number of latent factors
K = 2  # You can adjust this based on your needs

# Apply NMF (Non-negative Matrix Factorization)
nmf_model = NMF(n_components=K, init='random', random_state=42,max_iter=500)
Un = nmf_model.fit_transform(ratings_matrix_imputed)  # User latent matrix
Vn = nmf_model.components_  # Item latent matrix (transposed)

# Predict the full rating matrix
R_pred = np.dot(Un, Vn)

# Show the predicted matrix
print("Predicted Ratings Matrix:")
print(R_pred)

# If you want to compare the imputed original matrix with the predicted one
print("\nOriginal Matrix (with missing values filled in):")
print(ratings_matrix_imputed)

Predicted Ratings Matrix:
[[4.33535017 3.72194433 4.30475564 ... 2.36257626 3.25746769 3.22649206]
 [4.32999127 3.80017293 4.33797134 ... 2.49779872 3.44864077 3.46451914]
 [3.26576623 3.25279894 3.4516678  ... 2.528716   3.51219106 3.74269424]
 ...
 [3.86849646 3.49131779 3.92037014 ... 2.39197517 3.3077228  3.37626543]
 [3.37571776 3.19666582 3.49080956 ... 2.33758442 3.24006469 3.38470293]
 [2.95753886 2.97003599 3.13717284 ... 2.33048085 3.23783228 3.4602812 ]]

Original Matrix (with missing values filled in):
[[4.         5.         5.         ... 2.5        3.4516129  4.        ]
 [5.         1.         3.56756757 ... 2.5        3.4516129  5.        ]
 [3.         2.         5.         ... 3.         4.         3.        ]
 ...
 [4.         3.         5.         ... 2.         2.         3.        ]
 [2.         3.         3.         ... 2.5        3.         2.        ]
 [3.         2.         4.         ... 2.5        2.         5.        ]]


In [12]:
new_user_latent = np.linalg.lstsq(Vn.T, new_user_filled_ratings, rcond=None)[0]
new_user_predicted_ratings = np.dot(new_user_latent, Vn)

print("Predicted Ratings for New User (NMF):")
print(new_user_predicted_ratings)

Predicted Ratings for New User (NMF):
[3.43127118 3.35093162 3.59555443 3.04612875 4.23351311 2.44844587
 3.34457783 2.50046271 2.67273208 2.31619922 3.51996748 3.04131972
 2.71008811 1.5123246  3.53526521 3.24778354 3.38994208 3.85296581
 3.28255943 3.23036038 3.57080368 3.84537682 3.99780359 2.47153976
 3.74014109 3.55677292 3.61751418 2.54560058 3.53296036 3.73743729]
