In [12]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity

In [13]:
# Load models
with open('../models/svd_model.pkl', 'rb') as f:
    svd = pickle.load(f)

with open('../models/content_model.pkl', 'rb') as f:
    content_model = pickle.load(f)

with open('../models/collaborative_model.pkl', 'rb') as f:
    collaborative_model = pickle.load(f)

with open('../models/hybrid_model.pkl', 'rb') as f:
    hybrid_model = pickle.load(f)

In [14]:
# Load test data
test_user_item_matrix = pd.read_csv('../data/test_user_item_matrix.csv')

# Convert to numpy array if needed
actual_ratings = test_user_item_matrix.values


In [15]:
train_df = pd.read_csv('../data/train_user_item_matrix.csv')
print(train_df.head())

                                             user_id  B002PD61Y4  B002SZEOLG  \
0  AESPOE5Z2FMNU577LDO7HKJCEDOA,AGMOIJFKHOE7RTSMQ...         0.0         0.0   
1  AGCWHOWHOTWSN4J2TFAXUEZZUBXQ,AF7COMJXY3YJUCEUE...         0.0         0.0   
2  AG6TQFT2J2BQW67NBTLB4X6XYC5A,AGGFJ5HSIY4FHH4F7...         0.0         0.0   
3  AFGPLYW6L6FYUGQDND7QGCRL2H2Q,AE3W6H2PP2KKMVEDI...         0.0         0.0   
4  AGNE5T4E7SEMJUDM4COI6JBNJQBQ,AES63PZGZP33ZVO55...         0.0         0.0   

   B003B00484  B003L62T7W  B004IO5BMQ  B005FYNT3G  B005LJQMCK  B005LJQMZC  \
0         0.0         0.0         0.0         0.0         0.0         0.0   
1         0.0         0.0         0.0         0.0         0.0         0.0   
2         0.0         0.0         0.0         0.0         0.0         0.0   
3         0.0         0.0         0.0         0.0         0.0         0.0   
4         0.0         0.0         0.0         0.0         0.0         0.0   

   B006LW0WDQ  ...  B0BNXFDTZ2  B0BP18W8TM  B0BP7XLX48  

In [16]:
print(test_user_item_matrix.head())


                                             user_id  B002PD61Y4  B002SZEOLG  \
0  AH6ZYHC4ECJ56T4GGZCL6MITCTMA,AE4TXVJAXHK5R7IEL...         0.0         0.0   
1  AEVL6TZWDKICBU5K36HGBG65WXKQ,AFF5UYOSGQEATDUIO...         0.0         0.0   
2  AEWV35IS77VEIX4T7GIMS7WUPLLA,AHJJPJEYJI7CTBSWR...         0.0         0.0   
3  AEJYXUANEWSM2G7VHRZDANCYLPYQ,AGGUTNEUBYL4DWFU3...         0.0         0.0   
4  AFVTO4K2IG5AYWZPOAEA2QGPZZ4A,AHWTCTHLUKPJP2IUI...         0.0         0.0   

   B003B00484  B003L62T7W  B004IO5BMQ  B005FYNT3G  B005LJQMCK  B005LJQMZC  \
0         0.0         0.0         0.0         0.0         0.0         0.0   
1         0.0         0.0         0.0         0.0         0.0         0.0   
2         0.0         0.0         0.0         0.0         0.0         0.0   
3         0.0         0.0         0.0         0.0         0.0         0.0   
4         0.0         0.0         0.0         0.0         0.0         0.0   

   B006LW0WDQ  ...  B0BNXFDTZ2  B0BP18W8TM  B0BP7XLX48  

In [17]:
# SVD Model Evaluation
def predict_ratings_svd(user_item_matrix, svd_model):
    user_item_matrix_svd = svd_model.transform(user_item_matrix)
    svd_matrix = np.dot(user_item_matrix_svd, svd_model.components_)
    return np.clip(svd_matrix, 0, 5)

predicted_ratings_svd = predict_ratings_svd(test_user_item_matrix, svd)
mse_svd = mean_squared_error(actual_ratings, predicted_ratings_svd)
print(f"SVD Model MSE: {mse_svd}")

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- user_id


In [19]:
# Content-Based Model Evaluation
item_profiles = content_model
item_similarity = cosine_similarity(item_profiles)

def predict_ratings_content(user_item_matrix, item_similarity):
    return np.dot(user_item_matrix, item_similarity)

predicted_ratings_content = predict_ratings_content(test_user_item_matrix, item_similarity)
mse_content = mean_squared_error(actual_ratings, predicted_ratings_content)
print(f"Content-Based Model MSE: {mse_content}")

ValueError: shapes (239,1349) and (1462,1462) not aligned: 1349 (dim 1) != 1462 (dim 0)

In [20]:
# Collaborative Filtering Model Evaluation
def predict_ratings_collaborative(user_item_matrix, model):
    distances, indices = model.kneighbors(user_item_matrix, n_neighbors=10)
    return np.mean(user_item_matrix.values[indices], axis=1)

predicted_ratings_collaborative = predict_ratings_collaborative(test_user_item_matrix, collaborative_model)
mse_collaborative = mean_squared_error(actual_ratings, predicted_ratings_collaborative)
print(f"Collaborative Filtering Model MSE: {mse_collaborative}")

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- user_id


In [21]:
# Hybrid Model Evaluation
def predict_ratings_hybrid(user_item_matrix, hybrid_model):
    user_item_matrix_svd = hybrid_model['svd'].transform(user_item_matrix)
    svd_predictions = np.dot(user_item_matrix_svd, hybrid_model['svd'].components_)
    content_predictions = np.dot(user_item_matrix, hybrid_model['item_profiles'])
    combined_predictions = (svd_predictions + content_predictions) / 2
    return np.clip(combined_predictions, 0, 5)

predicted_ratings_hybrid = predict_ratings_hybrid(test_user_item_matrix, hybrid_model)
mse_hybrid = mean_squared_error(actual_ratings, predicted_ratings_hybrid)
print(f"Hybrid Model MSE: {mse_hybrid}")

ValueError: The feature names should match those that were passed during fit.
Feature names unseen at fit time:
- user_id
