In [1]:
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import sys
sys.path.append('..')
from src.recommenders.dnn_recommender import DNNRecommender
from src.utils.data_preprocessing import preprocess2

books = pd.read_csv('../data/Books.csv', delimiter=';', dtype={'ISBN': str, 'Title': str, 'Author': str, 'Year': np.int16, 'Publisher': str})
users = pd.read_csv('../data/Users.csv', delimiter=';', low_memory=False)
ratings = pd.read_csv('../data/Ratings.csv', delimiter=';', dtype={'User-ID': np.int32, 'ISBN': str, 'Rating': np.int8})

books, users, ratings = preprocess2(items=books, users=users, ratings=ratings)

recommender = DNNRecommender()
data, ratings = recommender.preprocess(items=books, users=users, ratings=ratings)

features_to_normalize = ['Age', 'Year'] 
scaler = StandardScaler()
data[features_to_normalize] = scaler.fit_transform(data[features_to_normalize])

train_data, test_data, train_ratings, test_ratings = train_test_split(
    data,
    ratings,
    test_size=0.2,
    random_state=42
)
train_users = train_data.values[:, :2]
train_items = train_data.values[:, 2:]

test_users = test_data.values[:, :2]
test_items = test_data.values[:, 2:]

In [2]:
recommender.fit(items=train_items, users=train_users, ratings=train_ratings.values)

Epoch 1/20, Loss: 62464.55859375
Epoch 2/20, Loss: 8650783.0
Epoch 3/20, Loss: 697115.375
Epoch 4/20, Loss: 925614.875
Epoch 5/20, Loss: 1439362.375
Epoch 6/20, Loss: 757093.125
Epoch 7/20, Loss: 678043.4375
Epoch 8/20, Loss: 586239.25
Epoch 9/20, Loss: 490250.6875
Epoch 10/20, Loss: 395429.59375
Epoch 11/20, Loss: 307166.125
Epoch 12/20, Loss: 298733.0625
Epoch 13/20, Loss: 290178.53125
Epoch 14/20, Loss: 281548.5
Epoch 15/20, Loss: 272881.1875
Epoch 16/20, Loss: 264210.46875
Epoch 17/20, Loss: 263338.71875
Epoch 18/20, Loss: 262456.03125
Epoch 19/20, Loss: 261563.09375
Epoch 20/20, Loss: 260660.625


In [3]:
recommender.eval(users=test_users, items=test_items, ratings=test_ratings.values)

Test Loss: 259574.609375
Average Precision: 0.0034363360933488047


In [4]:
import numpy as np
from sklearn.metrics import precision_score

def precision_at_k(y_true, y_pred, k=10):
    assert len(y_true) == len(y_pred)
    precision_scores = []
    
    for true, pred in zip(y_true, y_pred):
        top_k_pred = np.argsort(pred)[-k:]  # Get top k predictions
        true_set = set(true)  # Set of true items
        pred_set = set(top_k_pred)  # Set of predicted items
        
        # Calculate precision for the current user/item
        precision_scores.append(len(true_set & pred_set) / float(k))
    
    return np.mean(precision_scores)

# Assuming you have true_ratings and predictions as numpy arrays
true_ratings = test_ratings.values.reshape(-1, 1)
predictions = recommender.predict(test_users, test_items)
predictions = predictions.reshape(-1, 1)

# Calculate precision at k
precision_k = precision_at_k(true_ratings, predictions, k=10)
print(f'Precision at 10: {precision_k}')


Precision at 10: 0.0
