In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import random

In [34]:
TEST_SIZE = 0.2
K_RECS = 10 # recommendation item count

RATINGS_DATA_PATH = './data/ratings.csv'
USER_DATA_PATH = './data/users.csv'
ITEM_DATA_PATH = './data/movies.csv'

In [35]:
# Load data
ratings_df = pd.read_csv(RATINGS_DATA_PATH)
user_df = pd.read_csv(USER_DATA_PATH)
item_df = pd.read_csv(ITEM_DATA_PATH)

In [None]:
# train/test split
train_df, test_df = train_test_split(
    ratings_df, test_size=TEST_SIZE,
    stratify=ratings_df['user_id'], # user-wise stratified
    random_state=42
)

# lookups
all_movies = set(item_df['movie_id'])
user_seen_movies_dict_train = train_df.groupby('user_id')['item_id'].apply(set).to_dict()
user_seen_movies_dict_test = test_df.groupby('user_id')['item_id'].apply(set).to_dict() # later for evaluation

In [37]:
# random recommendation generator
def get_random_recommendations(user_id, k=K_RECS):
    seen_movies = user_seen_movies_dict_train.get(user_id, set())
    unseen_movies = list(all_movies - seen_movies)

    # random sample k movies
    return random.sample(unseen_movies, k) if len(unseen_movies) >= k else unseen_movies

In [None]:
# evaluation
def evaluate_random_recommender(user_seen_movies_dict:dict, k=K_RECS):
    hits, precision_sum, recall_sum, ndcg_sum = 0, 0.0, 0.0, 0.0
    total_users = 0

    for user_id in user_seen_movies_dict:
        true_set = set(user_seen_movies_dict[user_id])
        recs = get_random_recommendations(user_id, k=k)
        hit_set = true_set & set(recs)
        num_hits = len(hit_set)
        hits += int(num_hits > 0)
        precision_sum += num_hits / k
        recall_sum += num_hits / len(true_set)

        # NDCG@k
        dcg = 0.0
        for i, movie in enumerate(recs):
            if movie in true_set:
                dcg += 1 / np.log2(i + 2)
        idcg = sum(1 / np.log2(i + 2) for i in range(min(len(true_set), k)))
        ndcg = dcg / idcg if idcg > 0 else 0
        ndcg_sum += ndcg

        total_users += 1

    return {
        'HitRate@k': hits / total_users,
        'Precision@k': precision_sum / total_users,
        'Recall@k': recall_sum / total_users,
        'NDCG@k': ndcg_sum / total_users
    }

# evaluate on test
metrics = evaluate_random_recommender(user_seen_movies_dict_test, k=K_RECS)
print(f"Random Recommender Evaluation (k={K_RECS}):")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

Random Recommender Evaluation (k=10):
HitRate@k: 0.1262
Precision@k: 0.0143
Recall@k: 0.0057
NDCG@k: 0.0142
