In [1]:
# Load preprocessed Data
import sys
sys.path.append('/home/alexabades/recsys')

import torch
from torch import nn, optim
from torch.utils.data import DataLoader

from src.models.contextNFC.context_nfc import DeepNCF


from src.data.BinaryClassifictionDataLoader import \
    ContextDataLoaderBinaryClasifictaion
from src.utils.tools.tools import ROOT_PATH

In [2]:
batch_size = 100
_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_data = ContextDataLoaderBinaryClasifictaion(
    ROOT_PATH + "/data/processed/frappeCtxA", split='train'
)

test_data = ContextDataLoaderBinaryClasifictaion(
    ROOT_PATH + "/data/processed/frappeCtxA", split='test', num_negative_samples=10,
)

train_loader = DataLoader(train_data, batch_size)
test_loader = DataLoader(test_data, batch_size)

num_users = train_data.num_users
num_items = train_data.num_items
num_context = train_data.num_context

model = DeepNCF(
        num_users=num_users,
        num_items=num_items,
        num_context=num_context,
        mf_dim=8,
        layers=[31, 10, 20],
    ).to(_device)


In [3]:
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
# TODO: We have to be careful, the layers minus the num of contextual features have to give a even number

for batch in train_loader:
        user_input = batch["user"].to(_device)
        item_input = batch["item"].to(_device)
        context_input = batch["context"].to(_device)
        ratings = batch["rating"].to(_device)
        # labels = labels.view(-1, 1)

        output = model(user_input, item_input, context_input)
        # loss = loss_fn(output, ratings)
        # optimizer.zero_grad()
        # loss.backward()
        # optimizer.step()
        break

In [42]:
import numpy as np
from collections import defaultdict
from src.utils.eval import getHR,  getRR, getBinaryIDCG, getBinaryDCG


def evaluate_model(model_pos, data_loader, topK: int = 5):
    global _device
    # Set Model to evaluation
    model_pos.eval()

    # Initialize containers for users, items, and predictions
    all_users = []
    all_items = []
    all_predictions = []
    all_gtItems = []

    with torch.no_grad():
        for batch in data_loader:
            user_input = batch["user"].to(_device)
            item_input = batch["item"].to(_device)
            gtItems = batch["gtItem"]
            context_input = batch["context"].to(_device)
            ratings = batch["rating"].to(_device)
            ratings = ratings.view(-1, 1)

            batch_predictions = model_pos(user_input, item_input, context_input)

            all_predictions.append(batch_predictions.cpu().numpy())
            all_users.append(user_input.cpu().numpy())
            all_items.append(item_input.cpu().numpy())
            all_gtItems.append(gtItems.numpy())

    # Concatenate all arrays into single NumPy arrays
    all_predictions = np.concatenate(all_predictions, axis=0).flatten()
    all_users = np.concatenate(all_users, axis=0).flatten()
    all_items = np.concatenate(all_items, axis=0).flatten()
    all_gtItems = np.concatenate(all_gtItems, axis=0).flatten()

    # Initialize a defaultdict to store lists of (item, score) tuples for each user
    user_predictions = defaultdict(list)

    for user, item, score, gtItem in zip(
        all_users, all_items, all_predictions, all_gtItems
    ):
        user_predictions[user].append((item, score, gtItem))

    hrs, rrs, ndcgs = [], [], []
    for user, items_scores in user_predictions.items():
        # Sort items based on scores in descending order and select top-K
        topK_items = sorted(items_scores, key=lambda x: x[1], reverse=True)[:topK]
        gtItem = topK_items[0][2]
        topK_items = [item for item, score, gt in topK_items]

        # Evaluation
        hrs.append(getHR(topK_items, [gtItem]))
        rrs.append(getRR(topK_items, [gtItem]))
        ndcgs.append(getBinaryDCG(topK_items, [gtItem]))

        
    return np.mean(hrs), np.mean(rrs), np.mean(ndcgs)

In [43]:
user_predictions = evaluate_model(model, test_loader)

In [44]:
user_predictions

(0.434715821812596, 0.18850486431131594, 0.24867970564572153)

In [36]:
from src.utils.eval import getHR,  getRR, getBinaryIDCG, getBinaryDCG


# getHR(user_predictions[0], [384])

c

0.5

In [None]:
import numpy as np
from typing import List

from torch import Tensor


def rmse(
    predictions: List[float | int] | Tensor, ground_truth: List[float | int] | Tensor
) -> float:
    """
    Function to calculate the RMSE from 2 lists or tensors

    Parameters:
        - predictions (List[float | int] or Tensor): Predictions made by the model
        - ground_truth (List[float | int] or Tensor): Ground truth values

    Returns:
        - RMSE (float): The root mean square error between predictions and ground truth
    """
    if len(predictions) != len(ground_truth):
        raise ValueError("Predictions and ground truth must be of the same length.")

    if isinstance(predictions, Tensor):
        if predictions.requires_grad:
            predictions = predictions.detach()
        if predictions.is_cuda:
            predictions = predictions.cpu()
        predictions = predictions.numpy()

    if isinstance(ground_truth, Tensor):
        if ground_truth.requires_grad:
            ground_truth = ground_truth.detach()
        if ground_truth.is_cuda:
            ground_truth = ground_truth.cpu()
        ground_truth = ground_truth.numpy()

    error = np.array(predictions) - np.array(ground_truth)
    squared_error = np.square(error)
    mean_squared_error = np.mean(squared_error)
    root_mean_squared_error = np.sqrt(mean_squared_error)

    return root_mean_squared_error


rmse(ratings.view(-1, 1), output)

3.851875

In [None]:
rsme_all = []
for i in range(10):
  rsme_all.append(rmse(ratings.view(-1, 1), output))

In [None]:
np.mean(rsme_all)

3.8518748

In [None]:
from torch import Tensor

type(output) == Tensor
isinstance(output, Tensor)

True

In [None]:
predictions = torch.tensor([1, 2, 3])
predictions = [1,2]

if isinstance(predictions, Tensor) and predictions.is_cuda:
  print('To CP')

In [None]:

if not 2 % 1:
  print('aa')

aa
