**Collaborative Filtering Approaches**
1. Memory-based:    ItemKNN
2. Model-based:     BPR, LightGCN
3. Context-based:   FM, DeepFM, WideDeep

**Content-based Approaches:** TFIDF (Cornac Models)

**Knowledge-based Approaches:** KGCN, KGAT, KGIN

**Hybrid Systems:** NeuMF

In [14]:
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation

from recbole.model.knowledge_aware_recommender import KGCN, KGIN, KGAT
from recbole.model.general_recommender import BPR, LightGCN, ItemKNN, NeuMF


from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger, get_trainer, get_model

# configurations initialization
config = Config(model='BPR', dataset='ml-100k')

# init random seed
init_seed(config['seed'], config['reproducibility'])

# logger initialization
init_logger(config)
logger = getLogger()

# write config info into log
logger.info(config)

# dataset creating and filtering
dataset = create_dataset(config)
logger.info(dataset)

# dataset splitting
train_data, valid_data, test_data = data_preparation(config, dataset)

# model loading and initialization
model = BPR(config, train_data._dataset).to(config['device'])
logger.info(model)

# trainer loading and initialization
trainer_class = get_trainer(config["MODEL_TYPE"], config["model"])
trainer = trainer_class(config, model)

# model training
best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

# model evaluation
test_result = trainer.evaluate(test_data)
print(test_result)

21 Jul 01:39    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = C:\Users\Sharjeel Mustafa\miniconda3\envs\recbole\Lib\site-packages\recbole\config\../dataset_example/ml-100k
checkpoint_dir = saved
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 2048
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [10]
valid_metric = MRR@10
valid_met

OrderedDict([('recall@10', 0.2388), ('mrr@10', 0.482), ('ndcg@10', 0.2862), ('hit@10', 0.772), ('precision@10', 0.1914)])


In [7]:
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation


from recbole.model.general_recommender import BPR, LightGCN, ItemKNN, NeuMF
from recbole.model.context_aware_recommender import FM, DeepFM, WideDeep
from recbole.model.knowledge_aware_recommender import KGCN, KGIN, KGAT

from recbole.trainer import HyperTuning
from recbole.quick_start import objective_function

from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger, get_trainer

models = {
    'BPR': BPR,
    # 'LightGCN': LightGCN,
    # 'ItemKNN': ItemKNN, 
    # 'FM': FM,
    # 'DeepFM': DeepFM,
    # 'WideDeep': WideDeep,
    # 'KGCN': KGCN,
    # 'KGIN': KGIN,
    # 'KGAT': KGAT,
    # 'NeuMF': NeuMF
    }

for model_name, model_class in models.items():
    print(f"\033[93mRunning model: {model_name}\033[0m")

    

    config = Config(model=model_name, dataset='ml-100k')

    hp = HyperTuning(objective_function=objective_function, algo='exhaustive', early_stop=10,
                max_evals=100, params_file='params.hyper', fixed_config_file_list=config)

    # run
    hp.run()
    # export result to the file
    hp.export_result(output_file='hyper_example.result')
    # print best parameters
    print('best params: ', hp.best_params)
    # print best result
    print('best result: ')
    print(hp.params2result[hp.params2str(hp.best_params)])


    # init random seed
    init_seed(config['seed'], config['reproducibility'])

    # logger initialization
    init_logger(config)
    logger = getLogger()

    # write config info into log
    logger.info(config)

    # dataset creating and filtering
    dataset = create_dataset(config)
    logger.info(dataset)

    # dataset splitting
    train_data, valid_data, test_data = data_preparation(config, dataset)

    # model loading and initialization
    model = model_class(config, train_data._dataset).to(config['device'])
    logger.info(model)

    # trainer loading and initialization
    trainer_class = get_trainer(config["MODEL_TYPE"], config["model"])
    trainer = trainer_class(config, model)

    # model training
    best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

    # model evaluation
    test_result = trainer.evaluate(test_data)
    print(test_result)

Running model: BPR
ERROR in rec_eval                                    
EXCEPTION                                            
<class 'AttributeError'>                             
'numpy.random.mtrand.RandomState' object has no attribute 'integers'
NODE                                                 
0 randint                                            
1   Literal{2}
2  size =
3   len
4     array_union
5       array_union
6         array_union
7           Literal{new_ids}
8  rng =
9   Literal{rng-placeholder}
  0%|          | 0/6 [00:00<?, ?trial/s, best loss=?]


AttributeError: 'numpy.random.mtrand.RandomState' object has no attribute 'integers'

In [None]:
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation

from recbole.model.general_recommender import BPR, LightGCN, ItemKNN, NeuMF
from recbole.model.context_aware_recommender import FM, DeepFM, WideDeep
from recbole.model.knowledge_aware_recommender import KGCN, KGIN, KGAT

from recbole.trainer import HyperTuning
from recbole.quick_start import objective_function

from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger, get_trainer

models = {
    'BPR': BPR,
    # other models commented out
}

for model_name, model_class in models.items():
    print(f"\033[93mRunning model: {model_name}\033[0m")

    config = Config(model=model_name, dataset='ml-100k')
    config_dict = {'model': model_name, 'dataset': 'ml-100k'}
    # Initialize hyperparameter tuning for the model
    hp = HyperTuning(
        objective_function=objective_function,
        algo='exhaustive',
        early_stop=10,
        max_evals=100,
        params_file='params.hyper',
        fixed_config_file_list=['config.yaml']  # Pass config object here
        # fixed_config_file_list=[config]  # Pass config object here
    )

    # Run the hyperparameter tuning
    hp.run()
    hp.export_result(output_file='hyper_example.result')

    print('best params: ', hp.best_params)
    print('best result: ')
    print(hp.params2result[hp.params2str(hp.best_params)])

    # Now run training with best hyperparameters
    

    # Seed and logger
    init_seed(config['seed'], config['reproducibility'])
    init_logger(config)
    logger = getLogger()
    logger.info(config)

    # Dataset loading and preparation
    dataset = create_dataset(config)
    logger.info(dataset)
    train_data, valid_data, test_data = data_preparation(config, dataset)

    # Model initialization
    model = model_class(config, train_data._dataset).to(config['device'])
    logger.info(model)

    # Trainer initialization and training
    trainer_class = get_trainer(config["MODEL_TYPE"], config["model"])
    trainer = trainer_class(config, model)
    best_valid_score, best_valid_result = trainer.fit(train_data, valid_data)

    # Evaluation
    test_result = trainer.evaluate(test_data)
    print(test_result)


[93mRunning model: BPR[0m
running parameters:                                  
{'embedding_size': 64, 'learning_rate': 0.0009845219514902307, 'mlp_hidden_size': '[128,128]'}
  0%|          | 0/6 [00:00<?, ?trial/s, best loss=?]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

21 Jul 02:27    INFO  [1;35m[Training]: [0m[1;36mtrain_batch_size[0m = [1;33m[2048][0m[1;36m train_neg_sample_args[0m: [

current best valid score: 0.3896                     
current best valid result:                           
OrderedDict([('recall@10', 0.2078), ('mrr@10', 0.3896), ('ndcg@10', 0.2294), ('hit@10', 0.7391), ('precision@10', 0.1572)])
current test result:                                 
OrderedDict([('recall@10', 0.2447), ('mrr@10', 0.4889), ('ndcg@10', 0.2917), ('hit@10', 0.7805), ('precision@10', 0.1949)])
running parameters:                                                 
{'embedding_size': 128, 'learning_rate': 0.08179343529093307, 'mlp_hidden_size': '[64,64,64]'}
 17%|█▋        | 1/6 [00:31<02:38, 31.78s/trial, best loss: -0.3896]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

21 Jul 02:27    INFO  [1;35m[Training]: [0m[1;36mtrain_batch_size[0m = [1;33m[2048][0m[1;36m train_neg_sample_args[0m: [

running parameters:                                                 
{'embedding_size': 64, 'learning_rate': 0.8723162187044113, 'mlp_hidden_size': '[64,64,64]'}
 33%|███▎      | 2/6 [00:55<01:47, 26.75s/trial, best loss: -0.3896]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

21 Jul 02:28    INFO  [1;35m[Training]: [0m[1;36mtrain_batch_size[0m = [1;33m[2048][0m[1;36m train_neg_sample_args[0m: [

running parameters:                                                 
{'embedding_size': 64, 'learning_rate': 0.05665634889695432, 'mlp_hidden_size': '[128,128]'}
 50%|█████     | 3/6 [01:04<00:56, 18.96s/trial, best loss: -0.3896]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

21 Jul 02:28    INFO  [1;35m[Training]: [0m[1;36mtrain_batch_size[0m = [1;33m[2048][0m[1;36m train_neg_sample_args[0m: [

running parameters:                                                 
{'embedding_size': 128, 'learning_rate': 0.6768339182018485, 'mlp_hidden_size': '[64,64,64]'}
 67%|██████▋   | 4/6 [01:23<00:37, 18.84s/trial, best loss: -0.3896]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

21 Jul 02:28    INFO  [1;35m[Training]: [0m[1;36mtrain_batch_size[0m = [1;33m[2048][0m[1;36m train_neg_sample_args[0m: [

running parameters:                                                 
{'embedding_size': 128, 'learning_rate': 0.5301673241370076, 'mlp_hidden_size': '[128,128]'}
 83%|████████▎ | 5/6 [01:31<00:15, 15.03s/trial, best loss: -0.3896]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)

21 Jul 02:28    INFO  [1;35m[Training]: [0m[1;36mtrain_batch_size[0m = [1;33m[2048][0m[1;36m train_neg_sample_args[0m: [

100%|██████████| 6/6 [01:43<00:00, 17.24s/trial, best loss: -0.3896]

21 Jul 02:29    INFO  
[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m C:\Users\Sharjeel Mustafa\miniconda3\envs\recbole\Lib\site-packages\recbole\config\../dataset_example/ml-100k[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 300[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0m
[1;36mlearning_rate[0m =[1;33m 0.001[0m
[1;36mtrain_neg_sample_args[0m =[1;33m {'distribution': 'uniform', 'sample_num': 1, 'alpha':


best params:  {'embedding_size': 64, 'learning_rate': 0.0009845219514902307, 'mlp_hidden_size': '[128,128]'}
best result: 
{'model': 'BPR', 'best_valid_score': 0.3896, 'valid_score_bigger': True, 'best_valid_result': OrderedDict([('recall@10', 0.2078), ('mrr@10', 0.3896), ('ndcg@10', 0.2294), ('hit@10', 0.7391), ('precision@10', 0.1572)]), 'test_result': OrderedDict([('recall@10', 0.2447), ('mrr@10', 0.4889), ('ndcg@10', 0.2917), ('hit@10', 0.7805), ('precision@10', 0.1949)])}


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)
21 Jul 02:29    INFO  [1;35mml-100k[0m
[1;34mThe number of users[0m: 944
[1;34mAverage actions of users[0m: 106.0445387062566

OrderedDict([('recall@10', 0.2388), ('mrr@10', 0.482), ('ndcg@10', 0.2862), ('hit@10', 0.772), ('precision@10', 0.1914)])


In [17]:
import numpy as np
import scipy.sparse as sp
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics.pairwise import cosine_similarity
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.utils import init_seed, init_logger, get_model
from recbole.evaluator import Evaluator
from recbole.data.interaction import Interaction
import logging # Import logging module for logger


def build_item_content_matrix(dataset):
    """
    Builds a sparse item-content matrix from the dataset's item features.
    Assumes item genres are in a 'genre' column, pipe-separated.

    Args:
        dataset: RecBole Dataset object.

    Returns:
        scipy.sparse.csr_matrix: A sparse matrix where rows are items and columns are genres (multi-hot encoded).
    """
    item_df = dataset.item_feat.to_df()

    # Extract genres, handling potential missing values
    raw_genres = item_df['genre'].fillna('')

    # Split genres by "|"
    genres_list = [g.split('|') for g in raw_genres]

    # MultiLabelBinarizer for efficient multi-hot encoding
    mlb = MultiLabelBinarizer(sparse_output=True)
    item_content_matrix = mlb.fit_transform(genres_list)  # sparse matrix, shape: (num_items, num_genres)

    return item_content_matrix


def generate_user_recommendations(train_interaction, similarity_matrix, user_ids, top_k=10):
    """
    Generates top-K item recommendations for specified users based on item-item similarity.

    Args:
        train_interaction (recbole.data.interaction.Interaction): RecBole Interaction object for training data.
        similarity_matrix (scipy.sparse.csr_matrix): Sparse item-item cosine similarity matrix.
        user_ids (list or np.ndarray): List or array of user indices for whom to recommend.
        top_k (int): The number of top recommendations to generate for each user.

    Returns:
        dict: A dictionary where keys are user IDs and values are lists of recommended item IDs.
    """
    recommendations = dict()
    item_num = similarity_matrix.shape[0]

    # For sparse matrix indexing efficiency, convert train_interaction to CSR format
    # The `user_id_field` and `item_id_field` are important for correctly accessing user-item interactions.
    # RecBole internally uses remap_id, so direct integer user_id and item_id are usually 0-indexed.
    user_field = train_interaction.user_id_field
    item_field = train_interaction.item_id_field

    # Create a sparse matrix representation of user-item interactions from train_interaction
    # Ensure to use the correct RecBole internal IDs for user and item.
    rows = train_interaction[user_field].numpy()
    cols = train_interaction[item_field].numpy()
    data = np.ones_like(rows, dtype=int) # We just need to mark interaction, value isn't critical
    train_coo = sp.coo_matrix((data, (rows, cols)), shape=(len(user_ids), item_num)).tocsr()


    for user_id in user_ids:
        # Items the user interacted with (row = user)
        # Ensure user_id corresponds to the row index in the train_coo matrix.
        # If RecBole remaps user IDs, you might need to convert them back or work with remapped IDs.
        # Assuming user_ids here are the remapped 0-indexed IDs used internally by RecBole.
        if user_id >= train_coo.shape[0]: # Handle cases where a user_id might not be in training data
            recommendations[user_id] = []
            continue

        user_interactions = train_coo[user_id].indices

        if len(user_interactions) == 0:
            recommendations[user_id] = []
            continue

        # Sum the similarity scores for these items (sparse vector)
        # similarity_matrix[user_interactions] will give a sub-matrix of similarities for interacted items.
        # .sum(axis=0) sums these similarities column-wise to get a score for each potential recommendation.
        scores = similarity_matrix[user_interactions].sum(axis=0).A1  # .A1 flattens to 1D array

        # Remove already interacted items by setting score to -inf
        scores[user_interactions] = -np.inf

        # Get top-K item indices
        # np.argpartition is efficient for finding the k-th smallest/largest element and partitioning.
        # Then, sort only those top-k elements.
        top_items = np.argpartition(scores, -top_k)[-top_k:]
        top_items = top_items[np.argsort(scores[top_items])[::-1]]  # sort top-k descending

        recommendations[user_id] = top_items.tolist()

    return recommendations


def evaluate_recommendations(recommendations, test_interaction, k=10):
    """
    Evaluates recommendations using hit rate, precision, and recall.

    Args:
        recommendations (dict): A dictionary of user_id to list of recommended item_ids.
        test_interaction (recbole.data.interaction.Interaction): RecBole Interaction object for test data.
        k (int): The number of recommendations considered for evaluation (e.g., Hit Rate@k).
    """
    # RecBole's Evaluator expects a specific format for input (e.g., scores or ranked lists).
    # Since we have direct recommendation lists, we'll manually calculate common metrics.
    # If you want to use RecBole's Evaluator fully, you'd need to convert `recommendations`
    # into a `Prediction` object or similar format that `Evaluator` understands.

    hit_rates = []
    precisions = []
    recalls = []

    # Map RecBole's internal user/item IDs to your recommendation dictionary's keys
    # if they are different (they should be consistent if you used dataset.num('user') for user_ids)
    test_user_ids = test_interaction.user_id.numpy()
    test_item_ids = test_interaction.item_id.numpy()

    # Group true items by user from test_interaction
    true_items_by_user = {}
    for u, i in zip(test_user_ids, test_item_ids):
        true_items_by_user.setdefault(u, []).append(i)

    # Iterate through users for whom we have recommendations
    for user_id, rec_items in recommendations.items():
        if user_id not in true_items_by_user:
            # If the user is not in the test set, we skip them for evaluation
            # or treat them as having no relevant items for recall (depends on desired behavior).
            # For simplicity, we'll skip if no true items are in the test set.
            continue

        true_items = set(true_items_by_user[user_id])
        rec_items_at_k = set(rec_items[:k]) # Consider only top-k recommendations for metrics

        hits = len(rec_items_at_k.intersection(true_items))

        hit_rate = 1.0 if hits > 0 else 0.0
        precision = hits / k if k > 0 else 0.0
        recall = hits / len(true_items) if len(true_items) > 0 else 0.0

        hit_rates.append(hit_rate)
        precisions.append(precision)
        recalls.append(recall)

    if len(hit_rates) > 0:
        print(f"Hit Rate@{k}: {np.mean(hit_rates):.4f}")
        print(f"Precision@{k}: {np.mean(precisions):.4f}")
        print(f"Recall@{k}: {np.mean(recalls):.4f}")
    else:
        print("No users with recommendations found in the test set for evaluation.")


def main():
    """
    Main function to run the content-based recommendation system.
    """
    # Configure logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    # Config and seed
    # You might want to adjust the 'metrics' in config if you're using RecBole's
    # built-in evaluation methods, but for manual calculation, it's not strictly needed here.
    config = Config(model='BPR', dataset='ml-100k', config_file_list=[]) # Add config_file_list if you have a config file
    init_seed(config['seed'], config['reproducibility'])
    init_logger(config) # This sets up RecBole's internal logger, which can be different from Python's default logging

    # Load and prepare dataset
    dataset = create_dataset(config)
    train_data, valid_data, test_data = data_preparation(config, dataset)

    # Build item content matrix (sparse)
    item_content_matrix = build_item_content_matrix(dataset)

    logger.info("Computing cosine similarity matrix on item content features...")
    # Compute cosine similarity (sparse matrix) efficiently
    # The output is a sparse matrix, which is memory efficient for large datasets.
    similarity_matrix = cosine_similarity(item_content_matrix, dense_output=False)

    # Extract user indices (assuming continuous user_id from 0 to n-1)
    # RecBole typically remaps user and item IDs to be contiguous 0-indexed integers.
    # dataset.num('user') gives the total number of unique users.
    user_ids = range(dataset.num('user'))

    # Generate recommendations for users
    logger.info("Generating recommendations...")
    # Get the raw interaction feature data from the training dataset.
    # train_data is a DataLoader, train_data._dataset is the Dataset, and .inter_feat is the Interaction object.
    train_interaction = train_data._dataset.inter_feat

    recommendations = generate_user_recommendations(train_interaction, similarity_matrix, user_ids, top_k=10)

    # Evaluate
    # Get the raw interaction feature data from the test dataset for evaluation.
    test_interaction = test_data._dataset.inter_feat
    logger.info("Evaluating recommendations...")
    evaluate_recommendations(recommendations, test_interaction, k=10)


if __name__ == "__main__":
    main()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)
21 Jul 02:41    INFO  [1;35m[Training]: [0m[1;36mtrain_batch_size[0m = [1;33m[2048][0m[1;36m train_neg_sample_args[0m: [1;

AttributeError: 'NoneType' object has no attribute 'to_df'