# Import necessary libraries

In [1]:
from tensorflow.keras.models import load_model
import numpy as np
import pandas as pd
from sklearn.metrics import ndcg_score

2025-05-14 15:58:42.513755: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Prepare environment

In [2]:
FEATURES_PATH = "data/features/"

# Load Data

In [3]:
model = load_model(FEATURES_PATH + "ai_model.keras", safe_mode=False)
small_matrix_final = pd.read_parquet(FEATURES_PATH + "small_matrix_final.parquet")

# Recommender system

### Define function to get recomendations of a given user

In [4]:
def get_user_recommendation_data(user: int):
    preds = model.predict(small_matrix_final[small_matrix_final["user_id"] == user][["watch_ratio_mean", "watch_ratio_prior_mean"]], batch_size=2048, verbose=None).flatten()
    user_mat = small_matrix_final[small_matrix_final["user_id"] == user].reset_index(drop=True)
    to_recommend_index = np.flip(preds.argsort())
    to_recommend = user_mat["video_id"][to_recommend_index].values

    ground_truth_index = np.flip(user_mat["watch_ratio"].values.argsort())
    ground_truth = user_mat["video_id"][ground_truth_index].values

    return to_recommend, ground_truth

In [9]:
sample_user = 6190
preds, ground_truth = get_user_recommendation_data(sample_user)
print(f"predictions: {preds[:10]}")
print(f"ground_truth: {ground_truth[:10]}")

predictions: [6222  600 9178 1305 2130 8366 4123 4040 5525 8524]
ground_truth: [ 2607  5222  7079  2436  1288  7181  4646 10091  9569  1182]


# Evaluating metrics

### Define metrics function

In [5]:
def precision_at_k(y_true, y_pred, k):
    y_true_k = y_true[:k]
    y_pred_k = y_pred[:k]
    return len(set(y_true_k) & set(y_pred_k)) / k

def recall_at_k(y_true, y_pred, k):
    y_pred_k = y_pred[:k]
    return len(set(y_true) & set(y_pred_k)) / len(y_true) if y_true is not None else 0

def average_precision_at_k_continuous(actual_scores, predicted_scores, k, top_n_relevant=5):
    """
    Compute AP@k based on ranking from predicted scores.
    Top-N videos with highest actual scores are considered relevant.
    """
    if len(actual_scores) == 0:
        return 0.0

    # Get the indices of top-N most relevant items based on actual scores
    actual_top_indices = np.argsort(actual_scores)[::-1][:top_n_relevant]
    relevant_set = set(actual_top_indices)

    # Get the indices of top-k predicted scores
    pred_top_indices = np.argsort(predicted_scores)[::-1][:k]

    hits = 0
    score = 0.0
    for i, idx in enumerate(pred_top_indices):
        if idx in relevant_set:
            hits += 1
            score += hits / (i + 1)

    return score / min(len(relevant_set), k)

In [6]:
user = 6190
recommendations, ground_truth = get_user_recommendation_data(user)
print(ndcg_score([ground_truth], [recommendations], k=5))
print(ndcg_score([ground_truth], [recommendations], k=10))
print(ndcg_score([ground_truth], [recommendations], k=50))
print(ndcg_score([ground_truth], [recommendations], k=100))
print(ndcg_score([ground_truth], [recommendations], k=200))

0.33616071351043364
0.34715504043386836
0.42774752969637797
0.4232367864280283
0.4534373990977657


### Evaluate mean NDCG metric on all users
We are using NDCG because it is a metrics that look into the ranking of the predictions

In [None]:
from tqdm import tqdm
ndcg_5_list = []
for user_id in tqdm(small_matrix_final["user_id"].unique()):
    user_df = small_matrix_final[small_matrix_final["user_id"] == user_id]
    recommendations, ground_truth = get_user_recommendation_data(user_id)
    
    if len(ground_truth) >= 5:
        ndcg_5_list.append(ndcg_score([ground_truth], [recommendations], k=5))

avg_ndcg_5 = np.mean(ndcg_5_list)
print(f"Average NDCG@5 over users: {avg_ndcg_5:.4f}")

  0%|          | 0/1354 [00:00<?, ?it/s]

100%|██████████| 1354/1354 [01:15<00:00, 17.94it/s]

Average NDCG@5 over users: 0.4714



