In [24]:
import logging
from logging import getLogger
import recbole
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.sequential_recommender import GRU4Rec
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger, get_model, get_trainer

In [25]:
parameter_dict = {
    # seq_separator: ","
    'data_path':'',
    'USER_ID_FIELD': 'user_id',
    'ITEM_ID_FIELD': 'item_id',
    'RATING_FIELD': 'rating',

    'load_col': {'inter': ['user_id', 'item_id', 'rating']},
    'device' : 'GPU',

    # model config
    'embedding_size': '64',
    'hidden_size': '128',
    'num_layers': '1',
    'dropout_prob': '0.3',
    'loss_type': 'CE',
    'threshold':{'rating': 0},
    

    # 'eval_setting': TO_LS, full,
    'train_neg_sample_args': None,
    'group_by_user': True,
    'metrics': ["Recall", "MRR", "NDCG", "Hit", "Precision"],
    'topk': 10,
    'metric_decimal_place': 5,

    'learning_rate': 0.001,
    'epochs': 20,
    'train_batch_size': '512',
    'eval_batch_size': '512',
    'valid_metric': 'MRR@10',
}


In [27]:
config = Config(model='NFM', dataset='joke', config_dict = parameter_dict)

init_seed(config['seed'], config['reproducibility'])

# logger initialization
init_logger(config)
logger = getLogger()
# Create handlers
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.INFO)
logger.addHandler(c_handler)

# write config info into log
logger.info(config)

21 Jun 15:18    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = joke


checkpoint_dir = saved
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 20
train_batch_size = 512
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': None, 'mode': {'valid': 'labeled', 'test': 'labeled'}}
repeatable = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [10]
valid_metric = MRR@10
valid_metric_bigger = True
eval_batch_size = 512
metric_decimal_place = 5

Dataset Hyper Parameters:
field_separator = 	
seq_separator =  
USER_ID_FIELD = user_id
ITEM_ID_FIELD = item_id
RATING_FIELD = rating
TIME_FIELD = timestamp
seq_len = None


In [28]:
dataset = create_dataset(config)
logger.info(dataset)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=0, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  feat[field].fillna(value=feat[field].mean(), inplace=True)
21 Jun 15:18    INFO  joke
The number of users: 237
Average actions of users: 42.3728813559322
The number of items: 141
Average act

In [29]:
train_data, valid_data, test_data = data_preparation(config, dataset)

21 Jun 15:18    INFO  [Training]: train_batch_size = [512] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
[Training]: train_batch_size = [512] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
[Training]: train_batch_size = [512] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
[Training]: train_batch_size = [512] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
[Training]: train_batch_size = [512] train_neg_sample_args: [{'distribution': 'none', 'sample_num': 'none', 'alpha': 'none', 'dynamic': False, 'candidate_num': 0}]
21 Jun 15:18    INFO  [Evaluation]: eval_batch_size = [512] eval_args: [{'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': None, 'mode': {'valid': 

In [30]:
for i in train_data:
    print(i)
    break

The batch_size of interaction: 512
    user_id, torch.Size([512]), cpu, torch.int64
    item_id, torch.Size([512]), cpu, torch.int64
    label, torch.Size([512]), cpu, torch.float32




In [31]:
nfm_model = get_model(config["model"])
nfm_model, config["device"]

(recbole.model.context_aware_recommender.nfm.NFM, device(type='cpu'))

In [32]:
model = nfm_model(config, train_data.dataset).to(config['device'])
logger.info(model)
print(model)

21 Jun 15:18    INFO  NFM(
  (token_embedding_table): FMEmbedding(
    (embedding): Embedding(378, 64)
  )
  (first_order_linear): FMFirstOrderLinear(
    (token_embedding_table): FMEmbedding(
      (embedding): Embedding(378, 1)
    )
  )
  (fm): BaseFactorizationMachine()
  (bn): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (mlp_layers): MLPLayers(
    (mlp_layers): Sequential(
      (0): Dropout(p=0.3, inplace=False)
      (1): Linear(in_features=64, out_features=64, bias=True)
      (2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Sigmoid()
      (4): Dropout(p=0.3, inplace=False)
      (5): Linear(in_features=64, out_features=64, bias=True)
      (6): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (7): Sigmoid()
      (8): Dropout(p=0.3, inplace=False)
      (9): Linear(in_features=64, out_features=64, bias=True)
      (10): BatchNorm1d(64, eps=1e-05, momentum

NFM(
  (token_embedding_table): FMEmbedding(
    (embedding): Embedding(378, 64)
  )
  (first_order_linear): FMFirstOrderLinear(
    (token_embedding_table): FMEmbedding(
      (embedding): Embedding(378, 1)
    )
  )
  (fm): BaseFactorizationMachine()
  (bn): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (mlp_layers): MLPLayers(
    (mlp_layers): Sequential(
      (0): Dropout(p=0.3, inplace=False)
      (1): Linear(in_features=64, out_features=64, bias=True)
      (2): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): Sigmoid()
      (4): Dropout(p=0.3, inplace=False)
      (5): Linear(in_features=64, out_features=64, bias=True)
      (6): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (7): Sigmoid()
      (8): Dropout(p=0.3, inplace=False)
      (9): Linear(in_features=64, out_features=64, bias=True)
      (10): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, tra

In [33]:
config["model"], config["MODEL_TYPE"], config["device"]

('NFM', <ModelType.CONTEXT: 3>, device(type='cpu'))

In [34]:
#  trainer loading and initialization
trainer = Trainer(config, model)
trainer

<recbole.trainer.trainer.Trainer at 0x10f0c6867d0>

In [36]:
# model training
best_valid_score, best_valid_result = trainer.fit(train_data)

21 Jun 15:19    INFO  epoch 0 training [time: 1.02s, train loss: 10.4746]
epoch 0 training [time: 1.02s, train loss: 10.4746]
epoch 0 training [time: 1.02s, train loss: 10.4746]
epoch 0 training [time: 1.02s, train loss: 10.4746]
epoch 0 training [time: 1.02s, train loss: 10.4746]
21 Jun 15:19    INFO  Saving current: saved\NFM-Jun-21-2024_15-18-19.pth
Saving current: saved\NFM-Jun-21-2024_15-18-19.pth
Saving current: saved\NFM-Jun-21-2024_15-18-19.pth
Saving current: saved\NFM-Jun-21-2024_15-18-19.pth
Saving current: saved\NFM-Jun-21-2024_15-18-19.pth
21 Jun 15:19    INFO  epoch 1 training [time: 1.03s, train loss: 10.4021]
epoch 1 training [time: 1.03s, train loss: 10.4021]
epoch 1 training [time: 1.03s, train loss: 10.4021]
epoch 1 training [time: 1.03s, train loss: 10.4021]
epoch 1 training [time: 1.03s, train loss: 10.4021]
21 Jun 15:19    INFO  Saving current: saved\NFM-Jun-21-2024_15-18-19.pth
Saving current: saved\NFM-Jun-21-2024_15-18-19.pth
Saving current: saved\NFM-Jun-21-20

In [42]:
import torch
from recbole.quick_start import load_data_and_model

# Load the trained model and data
config, model, dataset, train_data, valid_data, test_data = load_data_and_model(model_file='saved\\NFM-Jun-21-2024_15-18-19.pth')

ValueError: I/O operation on closed file

In [None]:

def get_next_item_recommendations(user_id, seq_len=5, top_k=5):
    # Get the user's interaction sequence
    user_interactions = train_data.dataset.inter_feat[train_data.dataset.inter_feat['user_id'] == user_id]
    user_interactions = user_interactions['item_id'].tolist()
    
    if len(user_interactions) < seq_len:
        seq = [0] * (seq_len - len(user_interactions)) + user_interactions
    else:
        seq = user_interactions[-seq_len:]
    
    seq = torch.tensor(seq).unsqueeze(0).to(config['device'])
    
    # Get top-k next item recommendations
    scores = model.full_sort_predict(seq)
    topk_items = scores.topk(top_k).indices.squeeze().cpu().numpy()
    
    return topk_items

def print_joke(joke_id):
    # Print joke text given a joke ID
    joke_text = jokes_df[jokes_df['jokeId'] == joke_id]['jokeText'].values[0]
    print(joke_text)

# Interactive recommendation loop
import random

user_id = random.choice(ratings_df['user_id'].unique())
while True:
    recommended_jokes = get_next_item_recommendations(user_id)
    
    print("Recommended Jokes:")
    for i, joke_id in enumerate(recommended_jokes, 1):
        print(f"{i}. Joke ID {joke_id}")
    
    choice = input("Enter the number of the joke you like (or 'exit' to stop): ")
    
    if choice.lower() == 'exit':
        break
    
    try:
        selected_index = int(choice) - 1
        selected_joke_id = recommended_jokes[selected_index]
        
        print("\nSelected Joke:")
        print_joke(selected_joke_id)
        
        # Update recommendations based on the chosen joke
        recommended_jokes = get_next_item_recommendations(user_id, seq_len=5, top_k=5)
    except (ValueError, IndexError):
        print("Invalid choice. Please try again.")
