# Model evaluation
We evaluate the proposed KNN and SVD models to determine which one to select.

In [43]:
import sys
import os

sys.path.append('../src')
module_path = os.path.abspath("..")
if module_path not in sys.path:
    sys.path.append(module_path)

from src.evaluation import knn_mean_reciprocal_rank, knn_ndcg_at_k, knn_precision_at_k, knn_recall_at_k
from src.evaluation import svd_mrr, svd_ndcg_at_k, svd_precision_at_k, svd_recall_at_k

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
import pickle
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder
from scipy.stats import rankdata

order_data = pd.read_parquet("../data/processed/order_data.parquet")

with open("../models/collaborative_filtering/knn_model.pkl", "rb") as f:
    knn_model = pickle.load(f)

print(order_data.head())


                                   id_x order_id  \
0  de08e731-2794-4015-9222-100a0d5103d7   s00051   
1  00575705-ae1d-4c0c-b770-37e86f499da1   s00050   
2  8bb92f20-4b20-41c8-9bc3-1527e96c3886   s00050   
3  12b1bb8a-41e3-421a-9d47-71aff7b9bbcc   s00049   
4  46d37b4c-ab52-48c4-a360-4aecaaeaa40b   s00049   

                                 product_name  quantity  unit_price  subtotal  \
0  celular samsung a35 256gb negro 356ezkggto       1.0     26995.0   26995.0   
1                   sandwichera nikkei 264124       1.0       995.0     995.0   
2                    pan sobao buenhorno 10/1       4.0        60.0     240.0   
3  celular samsung a35 256gb negro 356ezkggto       1.0     26995.0   26995.0   
4         galletas dulces oreo regular 12 und       1.0       198.0     198.0   

  partner_invoice    partner_shipping          date_order state  ...  \
0  lilianna perez  harvard university 2025-01-29 00:23:15  sale  ...   
1   milena garcia           microsoft 2025-01-28 21:39:3

### KNN model evaluation
We use cross-validation to determine how many nearest neighbors are optimal for our KNN model's efficiency. Keep in mind that I added additional features to KNN to improve its performance.

In [44]:

label_encoder_user = LabelEncoder()
label_encoder_product = LabelEncoder()

order_data["user_id"] = label_encoder_user.fit_transform(order_data["partner_invoice"])
order_data["product_id"] = label_encoder_product.fit_transform(order_data["product_name"])

X = order_data[["user_id", "product_id"]].values
y = order_data["unit_price"].values  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

knn = KNeighborsRegressor()


param_grid = {"n_neighbors": range(1, 21)}

grid_search = GridSearchCV(knn, param_grid, cv=5, scoring="neg_mean_squared_error")
grid_search.fit(X_train, y_train)

best_k = grid_search.best_params_["n_neighbors"]
print(f"📌 The best value of K is: {best_k}")

📌 The best value of K is: 2


Basically, our model indicates that out of the 2 highest recommendations, 50% (Precision) are correct, while only 11.11% (Recall) of the relevant items were recommended in the top 2 positions. The model is recommending the relevant items in the first position (MRR) on average. Additionally, the first two recommendations are perfectly relevant, with the maximum value (NDCG). The -29.7794 score shows that the model is performing poorly, as it is well below zero. However, it's important to note that this model only has about 60 rows and few columns, so KNN doesn't perform well with such a small dataset. The more data and distances available, the better the model will perform, which means it could be useful for larger models or as the data set grows.

In [45]:


user = "lilianna perez"
knn_user_index = order_data[order_data["partner_invoice"] == user].index[0]

user_vector = knn_model._fit_X[knn_user_index].reshape(1, -1)
distances, indices = knn_model.kneighbors(user_vector)
knn_fav_products = order_data.iloc[indices[0]]["product_name"].tolist()

purchased_products = order_data[order_data["partner_invoice"] == user]["product_name"].tolist()

K = 2
precision = knn_precision_at_k(purchased_products, knn_fav_products, K)
recall = knn_recall_at_k(purchased_products, knn_fav_products, K)
mrr = knn_mean_reciprocal_rank(purchased_products, knn_fav_products)
ndcg = knn_ndcg_at_k(purchased_products, knn_fav_products, K)

actual_prices = order_data[order_data["product_name"].isin(purchased_products)]["quantity"].values
predicted_prices = order_data[order_data["product_name"].isin(knn_fav_products)]["quantity"].values

min_length = min(len(actual_prices), len(predicted_prices))
actual_prices = actual_prices[:min_length]
predicted_prices = predicted_prices[:min_length]

if len(actual_prices) > 0 and len(predicted_prices) > 0:
    rmse = np.sqrt(mean_squared_error(actual_prices, predicted_prices))
    mae = mean_absolute_error(actual_prices, predicted_prices)
    r2 = r2_score(actual_prices, predicted_prices)
else:
    rmse, mae, r2 = np.nan, np.nan, np.nan

print(f"Precision@{K}: {precision:.4f}")
print(f"Recall@{K}: {recall:.4f}")
print(f"MRR: {mrr:.4f}")
print(f"NDCG@{K}: {ndcg:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")


Precision@2: 0.5000
Recall@2: 0.1111
MRR: 1.0000
NDCG@2: 1.0000
RMSE: 6.0954
MAE: 3.6154
R²: -29.7794


### SVD Model evaluation
We load the data so that it can be predicted

In [46]:
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

with open("../models/collaborative_filtering/svd_model.pkl", "rb") as f:
    U, Sigma, Vt = pickle.load(f)

user_product_matrix = order_data.pivot_table(index='partner_invoice', columns='product_name', values='quantity', aggfunc='sum', fill_value=0)

Sigma_matrix = np.diag(Sigma)

predictions = np.dot(np.dot(U, Sigma_matrix), Vt)

predicted_ratings_df = pd.DataFrame(predictions, columns=user_product_matrix.columns, index=user_product_matrix.index)
print(predicted_ratings_df)


product_name        audifonos argom bluetooth arg-hs-2552b  \
partner_invoice                                              
brandon freeman                                  -1.776865   
harvard university                               -1.838017   
humberto marcebo                                  6.794311   
jordana alphy                                    -1.178445   
joshua barom                                      1.726329   
kendry murkan                                     0.019859   
lilianna perez                                   -1.605675   
milena garcia                                    -1.081994   
mylan ramos                                      -1.059503   

product_name        batidora cuisinart pedestal sm-50bc  \
partner_invoice                                           
brandon freeman                               -2.162077   
harvard university                            -1.900093   
humberto marcebo                               6.055864   
jordana alphy         

For now, our model indicates that out of the top 12 recommendations, 83.33% (Precision) are correct, and 100% (Recall) of the relevant items were recommended in the first 12 positions. The model is recommending the relevant items in the first position (MRR) on average, and the first 10 recommendations are perfectly relevant, with the maximum value (NDCG). The -0.3857 score suggests that the model is doing somewhat poorly in terms of prediction (though better than KNN), as it's significantly below zero. However, it's important to note that this model has only around 60 rows, and I made an effort to prevent overfitting, which resulted in a lower R².

This version ensures the relationship between R² and overfitting is clearer.

In [47]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split

user_product_matrix = order_data.pivot_table(index='partner_invoice', columns='product_name', values='quantity', aggfunc='sum', fill_value=0)

Sigma_matrix = np.diag(Sigma)

predictions = np.dot(np.dot(U, Sigma_matrix), Vt)
predicted_ratings_df = pd.DataFrame(predictions, columns=user_product_matrix.columns, index=user_product_matrix.index)

user_name = 'lilianna perez'
user_ratings = predicted_ratings_df.loc[user_name]
actual_ratings = user_product_matrix.loc[user_name] 

user_ratings_sorted = user_ratings.sort_values(ascending=False)  
recommended_items = predicted_ratings_df.loc[user_name].sort_values(ascending=False)  

precision_12 = svd_precision_at_k(actual_ratings, recommended_items, 12)
recall_12 = svd_recall_at_k(actual_ratings, recommended_items, 12)

print(f"Precision@12: {precision_12:.4f}")
print(f"Recall@12: {recall_12:.4f}")

mrr_value = svd_mrr(recommended_items, actual_ratings)
print(f"MRR: {mrr_value:.4f}")

ndcg_10 = svd_ndcg_at_k(actual_ratings, recommended_items, 10)
print(f"NDCG@10: {ndcg_10:.4f}")

rmse = np.sqrt(mean_squared_error(actual_ratings[actual_ratings >= 1], user_ratings[actual_ratings >= 1]))
mae = mean_absolute_error(actual_ratings[actual_ratings >= 1], user_ratings[actual_ratings >= 1])
r2 = r2_score(actual_ratings[actual_ratings >= 1], user_ratings[actual_ratings >= 1])

print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"R²: {r2:.4f}")



Precision@12: 0.8333
Recall@12: 1.0000
MRR: 1.0000
NDCG@10: 1.0000
RMSE: 9.9285
MAE: 7.8449
R²: -0.3857
