In [1]:
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import sys
sys.path.append('..')
from src.recommenders.dnn_recommender import DNNRecommender
from src.utils.data_preprocessing import preprocess2

books = pd.read_csv('../data/Books.csv', delimiter=';', dtype={'ISBN': str, 'Title': str, 'Author': str, 'Year': np.int16, 'Publisher': str})
users = pd.read_csv('../data/Users.csv', delimiter=';', low_memory=False)
ratings = pd.read_csv('../data/Ratings.csv', delimiter=';', dtype={'User-ID': np.int32, 'ISBN': str, 'Rating': np.int8})

books, users, ratings = preprocess2(items=books, users=users, ratings=ratings)

recommender = DNNRecommender()
data, ratings = recommender.preprocess(items=books, users=users, ratings=ratings)

features_to_normalize = ['Age', 'Year'] 
scaler = StandardScaler()
data[features_to_normalize] = scaler.fit_transform(data[features_to_normalize])

train_data, test_data, train_ratings, test_ratings = train_test_split(
    data,
    ratings,
    test_size=0.2,
    random_state=42
)
train_users = train_data.values[:, :2]
train_items = train_data.values[:, 2:]

test_users = test_data.values[:, :2]
test_items = test_data.values[:, 2:]

In [2]:
recommender.fit(items=train_items, users=train_users, ratings=train_ratings.values)

Epoch 1/20, Loss: 2746.76611328125
Epoch 2/20, Loss: 105474.7890625
Epoch 3/20, Loss: 1758.8427734375
Epoch 4/20, Loss: 71249.3046875
Epoch 5/20, Loss: 42192.67578125
Epoch 6/20, Loss: 2255.166015625
Epoch 7/20, Loss: 1171.29248046875
Epoch 8/20, Loss: 543.1190185546875
Epoch 9/20, Loss: 269.3381042480469
Epoch 10/20, Loss: 267.4319763183594
Epoch 11/20, Loss: 446.2208251953125
Epoch 12/20, Loss: 467.2278137207031
Epoch 13/20, Loss: 483.6711730957031
Epoch 14/20, Loss: 495.6233825683594
Epoch 15/20, Loss: 503.2602233886719
Epoch 16/20, Loss: 506.82843017578125
Epoch 17/20, Loss: 506.80841064453125
Epoch 18/20, Loss: 506.44757080078125
Epoch 19/20, Loss: 505.77703857421875
Epoch 20/20, Loss: 504.8271179199219


In [3]:
metrics = recommender.eval(users=test_users, items=test_items, ratings=test_ratings.values)
print(metrics)

Test Loss: 506.1771545410156
Average Precision: 0.003188494543937089
None


In [4]:
import numpy as np

def precision_at_k(y_true, y_pred, k=10):
    assert len(y_true) == len(y_pred)
    precision_scores = []
    
    for true, pred in zip(y_true, y_pred):
        top_k_pred = np.argsort(pred)[-k:]  # Get top k predictions
        true_set = set(true)  # Set of true items
        pred_set = set(top_k_pred)  # Set of predicted items
        
        # Calculate precision for the current user/item
        precision_scores.append(len(true_set & pred_set) / float(k))
    
    return np.mean(precision_scores)

# Assuming you have true_ratings and predictions as numpy arrays
true_ratings = test_ratings.values.reshape(-1, 1)
predictions = recommender.predict(test_users, test_items)
predictions = predictions.reshape(-1, 1)

# Calculate precision at k
precision_k = precision_at_k(true_ratings, predictions, k=10)
print(f'Precision at 10: {precision_k}')

Precision at 10: 0.0
