# Libraries

In [1]:
#import recbole
#print(recbole.__version__)

import os
import torch
from recbole.quick_start import run_recbole
from recbole.model.general_recommender import NeuMF
from recbole.config import Config
from recbole.data.interaction import Interaction


  from .autonotebook import tqdm as notebook_tqdm
2025-05-31 11:05:16,638	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-05-31 11:05:16,923	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


# Read data

### Model configuration

In [2]:

config_dict = {
    'model': 'NeuMF',
    'dataset': 'movielens',
    'data_path': './dataset/',

  'field_separator': '\t',
    'USER_ID_FIELD': 'user_id',
    'ITEM_ID_FIELD': 'item_id',
    'RATING_FIELD': 'rating',
    'TIME_FIELD': 'timestamp',

    'load_col': {
        'inter': ['user_id', 'item_id', 'rating', 'timestamp']
    },
    'LABEL_FIELD': 'rating', # Still refers to the 'rating' column
    'threshold': {'rating': 4.5}, # <-- NEW: Ratings >= 4.5 are positive interactions for ranking

    'eval_task': 'ranking', # <-- CHANGED: Problem now framed as a ranking task
    'normalize_field': {},
    'loss_type': 'BPR', # <-- CHANGED: BPRLoss for ranking problems

    'eval_args': {
        'split': {'RS': [0.9, 0.05, 0.05]},
        'order': 'TO',
        'group_by': 'user',
        'mode': {'valid': 'uni10', 'test': 'uni10'},
        'neg_sample_args': None, # No specific negative sampling for evaluation metrics, RecBole handles this.
        'topk': [10, 20, 50], # <-- NEW: K values for ranking metrics (e.g., Recall@10, NDCG@20)
    },
    # Metrics for Ranking task
    'metrics': ['Recall', 'NDCG', 'MRR'], # <-- CHANGED: Ranking metrics
    'valid_metric': 'NDCG@10', # <-- CHANGED: Use a ranking metric for validation
    'valid_metric_bigger': True, # For ranking metrics, bigger is better

    'train_neg_sample_args': {'distribution': 'uniform', 'sample_num': 1}, # <-- ESSENTIAL for BPRLoss
    # NeuMF parameters (same as before)
    'mf_embedding_size': 64,
    'mlp_embedding_size': 64,
    'layers': [128, 64, 32],
    'dropout_prob': 0.3,

    'learning_rate': 0.001,
    'train_batch_size': 1024,
    'epochs': 14,
    'eval_step': 5,

    'eval_batch_size': 512,
    'log_wandb': False,
    'show_progress': False,
    'log_file': 'recbole_ml25m_neumf_ranking_log.txt', # Changed log file name
    'checkpoint_dir': 'saved_models_ml25m_ranking', # Changed checkpoint dir
    'device': 'cuda' if torch.cuda.is_available() else 'cpu',
}

print(f"\nTraining {config_dict['model']} on {config_dict['dataset']}...")
print(f"Data Split: Train={config_dict['eval_args']['split']['RS'][0]*100}%, Valid={config_dict['eval_args']['split']['RS'][1]*100}%, Test={config_dict['eval_args']['split']['RS'][2]*100}%")
print(f"Task: {config_dict['eval_task']}, Loss: {config_dict['loss_type']}")
print(f"Device: {config_dict['device']}")



Training NeuMF on movielens...
Data Split: Train=90.0%, Valid=5.0%, Test=5.0%
Task: ranking, Loss: BPR
Device: cpu


### Execute model

In [3]:


trainer, dataset, dataloaders = run_recbole(config_dict=config_dict)

print("\nTraining complete. Logs saved to:")
print(f"  {config_dict['log_file']}")
print(f"Best model saved in: {config_dict['checkpoint_dir']}")



31 May 11:06    INFO  ['/opt/miniconda3/envs/recbole_new_env/lib/python3.11/site-packages/ipykernel_launcher.py', '--f=/Users/vitalii/Library/Jupyter/runtime/kernel-v3e40b363fe3b61142012798ea81be6c665db35280.json']
31 May 11:06    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = ./dataset/movielens
checkpoint_dir = saved_models_ml25m_ranking
show_progress = False
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 14
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 5
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.9, 0.05, 0.05]}, 'order': 'TO', 'group_by': 'user', 'mode': {'val

UnpicklingError: Weights only load failed. In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
Please file an issue with the following so that we can make `weights_only=True` compatible with your use case: WeightsUnpickler error: Unsupported operand 149

Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.

In [None]:
# --- Example Prediction for one user ---
print("\n--- Example Prediction for One User ---")

if trainer.best_model_path and os.path.exists(trainer.best_model_path):
    print(f"Loading best model from: {trainer.best_model_path}")
    
    config_loaded = Config(model=config_dict['model'], dataset=config_dict['dataset'], config_dict=config_dict)
    model_loaded = NeuMF(config_loaded, dataset).to(config_loaded.device)
    model_loaded.load_state_dict(torch.load(trainer.best_model_path)['state_dict'])
    model_loaded.eval()

    uid_field = dataset.uid_field
    iid_field = dataset.iid_field
    
    user_id_internal = None
    if dataloaders and len(dataloaders) > 2 and dataloaders[2].dataset:
            for interaction_batch in dataloaders[2]:
                if interaction_batch[uid_field].numel() > 0:
                    user_id_internal = interaction_batch[uid_field][0].item()
                    if user_id_internal != 0:
                        break
            if user_id_internal == 0:
                user_id_internal = None
                for interaction_batch in dataloaders[2]:
                    for uid_tensor in interaction_batch[uid_field]:
                        if uid_tensor.item() != 0:
                            user_id_internal = uid_tensor.item()
                            break
                    if user_id_internal is not None:
                        break
    
    if user_id_internal is None:
        print("Could not find a valid user for example prediction in the test set.")
    else:
        original_user_id = dataset.id2token(uid_field)[user_id_internal]
        print(f"\nExample Predictions for User (Original ID): {original_user_id}")
        print(f"Example Predictions for User (RecBole Internal ID): {user_id_internal}")

        all_item_ids_internal = dataset.token2id(iid_field).values()
        all_item_ids_internal = [item_id for item_id in all_item_ids_internal if item_id != 0]

        user_interacted_items_internal = set()
        for loader in dataloaders:
            for item_id_tensor in loader.dataset.get_user_item_feedback(user_id_internal).keys():
                user_interacted_items_internal.add(item_id_tensor)

        candidate_items_internal = [item_id for item_id in all_item_ids_internal if item_id not in user_interacted_items_internal]
        
        import random
        if len(candidate_items_internal) > 20:
            candidate_items_internal = random.sample(candidate_items_internal, 20)
        elif not candidate_items_internal:
            print("No candidate movies found for prediction (user might have rated all movies).")
            candidate_items_internal = random.sample(all_item_ids_internal, min(len(all_item_ids_internal), 10))
            print("Predicting for random movies, even if the user might have interacted with them.")


        if not candidate_items_internal:
            print("Cannot perform predictions. No available candidate movies.")
        else:
            inter_dict = {
                uid_field: torch.tensor([user_id_internal] * len(candidate_items_internal), dtype=torch.long, device=config_loaded.device),
                iid_field: torch.tensor(candidate_items_internal, dtype=torch.long, device=config_loaded.device),
            }
            predict_interaction = Interaction(inter_dict)

            with torch.no_grad():
                # For ranking, predict() returns a score (higher is better)
                scores = model_loaded.predict(predict_interaction)
                # Sort items by predicted score in descending order
                ranked_indices = torch.argsort(scores, descending=True)
                
                print("\nTop 5 Recommended Movies (by predicted score):")
                for i in range(min(5, len(ranked_indices))):
                    idx = ranked_indices[i].item()
                    item_id_internal = candidate_items_internal[idx]
                    original_item_id = dataset.id2token(iid_field)[item_id_internal]
                    predicted_score = scores[idx].item()
                    print(f"  Movie {original_item_id}: Score {predicted_score:.4f}")
else:
    print("Could not load best model for example prediction. Training might not have completed successfully or model was not saved.")



--- Example Prediction for One User ---


NameError: name 'trainer' is not defined

: 