# Libraries

In [None]:
# Python 3.11 required (code will not work with Python 3.12 or 3.13 because of a bug in the ReCBOLE library)
# Rebole version 1.2.1 is required ------- available for Python 3.11

import torch
from recbole.quick_start import load_data_and_model
from recbole.data.interaction import Interaction
from recbole.data.utils import create_dataset
from recbole.config import Config # Import Config
import pandas as pd
import os 
import importlib

  from .autonotebook import tqdm as notebook_tqdm
2025-06-03 00:05:40,176	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-06-03 00:05:40,559	INFO util.py:159 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [2]:
torch.cuda.is_available()

False

In [13]:
# Define a USER ID for predictions
user_raw_id_to_predict = '345'

## 1) Model version 5: predictions

The model training lasts 50 epochs with 'uni100' evaluation mode (but the model saved after 40th epoch to avoid overfitting)

(the duration of training & evaluation was about **18 hours**)

### 1.1 Model configuration

In [11]:
prediction_config_dict = {
    'model': 'NeuMF',
    'dataset': 'movielens',
    'data_path': '../movies-database/',

  'field_separator': '\t',
    'USER_ID_FIELD': 'user_id',
    'ITEM_ID_FIELD': 'item_id',
    'RATING_FIELD': 'rating',
    'TIME_FIELD': 'timestamp',

    'load_col': {
        'inter': ['user_id', 'item_id', 'rating', 'timestamp']
    },
    'LABEL_FIELD': 'rating', 
    'threshold': {'rating': 4.5}, 

    'eval_task': 'ranking', 
    'normalize_field': {},
    'loss_type': 'BPR', 

    'eval_args': {
        'split': {'RS': [0.9, 0.05, 0.05]},
        'order': 'TO',
        'group_by': 'user',
        'mode': {'valid': 'uni100', 'test': 'uni100'},
        'neg_sample_args': None, 
        'topk': [3, 10, 20], 
    },
    'metrics': ['Recall', 'NDCG', 'MRR'], 
    'valid_metric': 'NDCG@10',
    'valid_metric_bigger': True,

    'train_neg_sample_args': {'distribution': 'uniform', 'sample_num': 1}, 
    'mf_embedding_size': 64,
    'mlp_embedding_size': 64,
    'layers': [128, 64, 32],
    'dropout_prob': 0.3,

    'learning_rate': 0.001,
    'train_batch_size': 1024,
    'epochs': 50,
    'eval_step': 5,
    'stopping_step': 4,

    'eval_batch_size': 512,
    'log_wandb': False,
    'show_progress': False,
    'log_file': 'main_NeuMF.txt',
    'checkpoint_dir': 'main_NeuMF',
}

### 1.2 Execute model

In [12]:

saved_model_path = './NeuMF-May-31-2025_23-14-35.pth' 

config = Config(model=prediction_config_dict['model'], dataset=prediction_config_dict['dataset'], config_dict=prediction_config_dict)
dataset = create_dataset(config)
print("Dataset loaded successfully.")

model_name = config['model']
model_module = importlib.import_module(f"recbole.model.general_recommender")
model_class = getattr(model_module, model_name)

model = model_class(config, dataset)
print(f"Model '{model_name}' initialized.")

checkpoint = torch.load(saved_model_path, weights_only=False)
model.load_state_dict(checkpoint['state_dict'])
print("Model state dictionary loaded successfully.")

# small paragraph about model weights ---
print(f"MF User Embedding Weight Mean: {model.user_mf_embedding.weight.mean().item():.4f}")
print(f"MLP Item Embedding Weight Mean: {model.item_mlp_embedding.weight.mean().item():.4f}")
print(f"Prediction Layer Weight Mean: {model.predict_layer.weight.mean().item():.4f}")
# --- end of small paragraph ---

model.eval()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)


Dataset loaded successfully.
Model 'NeuMF' initialized.
Model state dictionary loaded successfully.
MF User Embedding Weight Mean: -0.1944
MLP Item Embedding Weight Mean: -0.0596
Prediction Layer Weight Mean: -0.0093


NeuMF(
  (user_mf_embedding): Embedding(162542, 64)
  (item_mf_embedding): Embedding(59048, 64)
  (user_mlp_embedding): Embedding(162542, 64)
  (item_mlp_embedding): Embedding(59048, 64)
  (mlp_layers): MLPLayers(
    (mlp_layers): Sequential(
      (0): Dropout(p=0.3, inplace=False)
      (1): Linear(in_features=128, out_features=128, bias=True)
      (2): ReLU()
      (3): Dropout(p=0.3, inplace=False)
      (4): Linear(in_features=128, out_features=64, bias=True)
      (5): ReLU()
    )
  )
  (predict_layer): Linear(in_features=128, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (loss): BCEWithLogitsLoss()
)

### 1.3 Predictions


In [14]:

# Define a USER ID
#user_raw_id_to_predict = '345'

print(f"\n--- Preparing predictions for user: '{user_raw_id_to_predict}' ---")

user_internal_id = dataset.token2id(dataset.uid_field, user_raw_id_to_predict)

all_item_internal_ids = torch.arange(dataset.item_num, dtype=torch.long).tolist()

all_interactions = dataset.inter_feat
user_ids_tensor = all_interactions[dataset.uid_field]
item_ids_tensor = all_interactions[dataset.iid_field]
user_mask = (user_ids_tensor == user_internal_id)
interacted_items_tensor = item_ids_tensor[user_mask]
user_interacted_items_internal_ids = set(interacted_items_tensor.tolist())
print(f"User {user_raw_id_to_predict} has interacted with {len(user_interacted_items_internal_ids)} items via manual filtering.")

# Filter to get only the unrated items
unrated_item_internal_ids = [
    item_id for item_id in all_item_internal_ids
    if item_id not in user_interacted_items_internal_ids
]





--- Preparing predictions for user: '345' ---
User 345 has interacted with 340 items via manual filtering.


In [21]:

# Prepare tensors for prediction
user_tensor_for_prediction = torch.full(
    (len(unrated_item_internal_ids),),
    user_internal_id,
    dtype=torch.long
)
item_tensor_for_prediction = torch.tensor(
    unrated_item_internal_ids,
    dtype=torch.long
)

# Create an Interaction object - this is the STANDARD input for RecBole models
interaction_for_prediction = Interaction({
    # Use the field names directly as keys for the Interaction object
    config.USER_ID_FIELD: user_tensor_for_prediction,
    config.ITEM_ID_FIELD: item_tensor_for_prediction 
})

## Make predictions
with torch.no_grad(): 
    predicted_scores = model.predict(interaction_for_prediction)

#  Map internal item IDs back to raw IDs and display results
predictions = []
for i, score in enumerate(predicted_scores):
    item_internal_id = unrated_item_internal_ids[i]
    item_raw_id = dataset.id2token(dataset.iid_field, item_internal_id)
    predictions.append({'item_id': item_raw_id, 'predicted_rating': score.item()})

predictions_df = pd.DataFrame(predictions).sort_values(by='predicted_rating', ascending=False)

print(f"\n--- Top 3 Predicted Ratings for User '{user_raw_id_to_predict}' ---")
print(predictions_df.head(3))

print(f"\n--- Bottom 3 Predicted Ratings for User '{user_raw_id_to_predict}' ---")
print(predictions_df.tail(3))

print(f"\n--- Number of Highly recommended films (>0.99) for User '{user_raw_id_to_predict}' ---")
print(predictions_df[predictions_df['predicted_rating'] > 0.99].count())

print(f"\n--- Number of not recommended films (<0.01) for User '{user_raw_id_to_predict}' ---")
print(predictions_df[predictions_df['predicted_rating'] < 0.01].count())



--- Top 3 Predicted Ratings for User '345' ---
    item_id  predicted_rating
232    2959          0.999935
228    2329          0.999676
158    4993          0.999579

--- Bottom 3 Predicted Ratings for User '345' ---
      item_id  predicted_rating
44186  178911      6.871721e-10
31696  181895      5.355051e-10
19628  158942      4.342869e-10

--- Number of Highly recommended films (>0.99) for User '345' ---
item_id             171
predicted_rating    171
dtype: int64

--- Number of not recommended films (<0.01) for User '345' ---
item_id             51950
predicted_rating    51950
dtype: int64


In [12]:
print(f"MF User Embedding Weight Mean: {model.user_mf_embedding.weight.mean().item():.4f}")
print(f"MLP Item Embedding Weight Mean: {model.item_mlp_embedding.weight.mean().item():.4f}")
print(f"Prediction Layer Weight Mean: {model.predict_layer.weight.mean().item():.4f}")

MF User Embedding Weight Mean: -0.1944
MLP Item Embedding Weight Mean: -0.0596
Prediction Layer Weight Mean: -0.0093


## 2) Model version 4: predictions 

The model training lasts 14 epochs with 'uni10' evaluation mode 

(the duration of training & evaluation was about 6 hours)

### 2.1 Model configuration and eval

In [22]:
prediction_config_dict_2 = {
    'model': 'NeuMF',
    'dataset': 'movielens',
    'data_path': '../movies-database/',

  'field_separator': '\t',
    'USER_ID_FIELD': 'user_id',
    'ITEM_ID_FIELD': 'item_id',
    'RATING_FIELD': 'rating',
    'TIME_FIELD': 'timestamp',

    'load_col': {
        'inter': ['user_id', 'item_id', 'rating', 'timestamp']
    },
    'LABEL_FIELD': 'rating', 
    'threshold': {'rating': 4.5}, #  Ratings >= 4.5 are positive interactions for ranking

    'eval_task': 'ranking', 
    'normalize_field': {},
    'loss_type': 'BPR', 

    'eval_args': {
        'split': {'RS': [0.9, 0.05, 0.05]},
        'order': 'TO',
        'group_by': 'user',
        'mode': {'valid': 'uni10', 'test': 'uni10'},
        'neg_sample_args': None, 
        'topk': [10, 20, 50], 
    },

    'metrics': ['Recall', 'NDCG', 'MRR'], 
    'valid_metric': 'NDCG@10', 
    'valid_metric_bigger': True,

    'train_neg_sample_args': {'distribution': 'uniform', 'sample_num': 1}, 
    
    'mf_embedding_size': 64,
    'mlp_embedding_size': 64,
    'layers': [128, 64, 32],
    'dropout_prob': 0.3,

    'learning_rate': 0.001,
    'train_batch_size': 1024,
    'epochs': 14,
    'eval_step': 5,

    'eval_batch_size': 512,
    'log_wandb': False,
    'show_progress': False,
    'log_file': 'recbole_ml25m_neumf_ranking_log.txt', 
    'checkpoint_dir': 'saved_models_ml25m_ranking',
}

saved_model_path_2 = './NeuMF-May-31-2025_11-08-41.pth'  ## <---- model path

config = Config(model=prediction_config_dict_2['model'], dataset=prediction_config_dict_2['dataset'], config_dict=prediction_config_dict_2)
dataset = create_dataset(config)
print("Dataset loaded successfully.")

model_name = config['model']
model_module = importlib.import_module(f"recbole.model.general_recommender")
model_class = getattr(model_module, model_name)

model_2 = model_class(config, dataset)
print(f"Model '{model_name}' initialized.")

checkpoint = torch.load(saved_model_path_2, weights_only=False)
model_2.load_state_dict(checkpoint['state_dict'])
print("Model state dictionary loaded successfully.")

# small paragraph about model weights ---
print(f"MF User Embedding Weight Mean: {model_2.user_mf_embedding.weight.mean().item():.4f}")
print(f"MLP Item Embedding Weight Mean: {model_2.item_mlp_embedding.weight.mean().item():.4f}")
print(f"Prediction Layer Weight Mean: {model_2.predict_layer.weight.mean().item():.4f}")
# --- end of small paragraph ---

model_2.eval()



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)


Dataset loaded successfully.
Model 'NeuMF' initialized.
Model state dictionary loaded successfully.
MF User Embedding Weight Mean: 0.0037
MLP Item Embedding Weight Mean: 0.0544
Prediction Layer Weight Mean: -0.0137


NeuMF(
  (user_mf_embedding): Embedding(162542, 64)
  (item_mf_embedding): Embedding(59048, 64)
  (user_mlp_embedding): Embedding(162542, 64)
  (item_mlp_embedding): Embedding(59048, 64)
  (mlp_layers): MLPLayers(
    (mlp_layers): Sequential(
      (0): Dropout(p=0.3, inplace=False)
      (1): Linear(in_features=128, out_features=128, bias=True)
      (2): ReLU()
      (3): Dropout(p=0.3, inplace=False)
      (4): Linear(in_features=128, out_features=64, bias=True)
      (5): ReLU()
    )
  )
  (predict_layer): Linear(in_features=128, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (loss): BCEWithLogitsLoss()
)

### 2.2 Predictions

In [23]:

# Define a USER ID
#user_raw_id_to_predict = '345'

print(f"\n--- Preparing predictions for user: '{user_raw_id_to_predict}' ---")

user_internal_id = dataset.token2id(dataset.uid_field, user_raw_id_to_predict)

all_item_internal_ids = torch.arange(dataset.item_num, dtype=torch.long).tolist()

all_interactions = dataset.inter_feat
user_ids_tensor = all_interactions[dataset.uid_field]
item_ids_tensor = all_interactions[dataset.iid_field]
user_mask = (user_ids_tensor == user_internal_id)
interacted_items_tensor = item_ids_tensor[user_mask]
user_interacted_items_internal_ids = set(interacted_items_tensor.tolist())
print(f"User {user_raw_id_to_predict} has interacted with {len(user_interacted_items_internal_ids)} items via manual filtering.")

# Filter to get only the unrated items
unrated_item_internal_ids = [
    item_id for item_id in all_item_internal_ids
    if item_id not in user_interacted_items_internal_ids
]



# Prepare tensors for prediction
user_tensor_for_prediction = torch.full(
    (len(unrated_item_internal_ids),),
    user_internal_id,
    dtype=torch.long
)
item_tensor_for_prediction = torch.tensor(
    unrated_item_internal_ids,
    dtype=torch.long
)

# Create an Interaction object - this is the STANDARD input for RecBole models
interaction_for_prediction = Interaction({
    # Use the field names directly as keys for the Interaction object
    config.USER_ID_FIELD: user_tensor_for_prediction,
    config.ITEM_ID_FIELD: item_tensor_for_prediction 
})

## Make predictions
with torch.no_grad(): 
    predicted_scores = model_2.predict(interaction_for_prediction)

#  Map internal item IDs back to raw IDs and display results
predictions = []
for i, score in enumerate(predicted_scores):
    item_internal_id = unrated_item_internal_ids[i]
    item_raw_id = dataset.id2token(dataset.iid_field, item_internal_id)
    predictions.append({'item_id': item_raw_id, 'predicted_rating': score.item()})

predictions_df = pd.DataFrame(predictions).sort_values(by='predicted_rating', ascending=False)

print(f"\n--- Top 3 Predicted Ratings for User '{user_raw_id_to_predict}' ---")
print(predictions_df.head(3))

print(f"\n--- Bottom 3 Predicted Ratings for User '{user_raw_id_to_predict}' ---")
print(predictions_df.tail(3))

print(f"\n--- Number of Highly recommended films (>0.99) for User '{user_raw_id_to_predict}' ---")
print(predictions_df[predictions_df['predicted_rating'] > 0.99].count())

print(f"\n--- Number of not recommended films (<0.01) for User '{user_raw_id_to_predict}' ---")
print(predictions_df[predictions_df['predicted_rating'] < 0.01].count())



--- Preparing predictions for user: '345' ---
User 345 has interacted with 340 items via manual filtering.

--- Top 3 Predicted Ratings for User '345' ---
     item_id  predicted_rating
232     2959          0.999828
1382    3897          0.999680
228     2329          0.999637

--- Bottom 3 Predicted Ratings for User '345' ---
      item_id  predicted_rating
56697  169558      8.658309e-08
33754  140104      8.468910e-08
37328  180583      8.282374e-08

--- Number of Highly recommended films (>0.99) for User '345' ---
item_id             250
predicted_rating    250
dtype: int64

--- Number of not recommended films (<0.01) for User '345' ---
item_id             51041
predicted_rating    51041
dtype: int64


Movies with ID 2959 and 2329 were recomended by both variations of the NeuMF model. 

Overall, **the 5th version of the model** (whoose predictions observed first) is slightly better since it strongly recommend (>0.99) lower number of movies then **the 4th version of the model** (171 against 250 items respectivelly).

The recall@10 is better for **the 4th version of the model**, but this metric **for the 5th version of the model** was calculated much more appropriatelly (based on sample of 100 instead of 10 movies), so we cannot compare models based on this metric.