# Import libraries (~10-30 minutes)

In [1]:
!pip install "torch==2.5.1" "triton==3.1.0" "recbole==1.2.0" "causal-conv1d==1.5.3.post1" "einops==0.8.1" "numpy==1.26.4" gdown

Collecting torch==2.5.1
  Downloading torch-2.5.1-cp312-cp312-manylinux1_x86_64.whl.metadata (28 kB)
Collecting triton==3.1.0
  Downloading triton-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.3 kB)
Collecting recbole==1.2.0
  Downloading recbole-1.2.0-py3-none-any.whl.metadata (1.4 kB)
Collecting causal-conv1d==1.5.3.post1
  Downloading causal_conv1d-1.5.3.post1.tar.gz (24 kB)
  Installing build dependencies ... [?25l- \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ |

# Download dataset

In [2]:
import gdown
gdown.download_folder("https://drive.google.com/drive/folders/1WDDhDgDlIz-uEVknbDakwhirytwQ1upS?usp=sharing")

Retrieving folder contents


Retrieving folder 1z43FDT5Kg4lK7AKG4CPoly1axJJsKg9b dataset
Retrieving folder 1yGe8bCqgiXnncFgaEvabBhoHkpD_KibN amazon-beauty
Processing file 1_ZFZuLEBnmoxpYw0niYRS0Q9ozLcMC6Y amazon-beauty.inter
Retrieving folder 18U3LdjIDWOSb-V5JMAUiYBKMqT-IsMys ml-1m
Processing file 16QYqHl42FV4gFMVHHWmavqRjKFbSU2Wz ml-1m.inter
Retrieving folder 1rGWRpgEJQYNDLWNvWfmZUXhtfQJVMW-l yelp
Processing file 1r-F7HDKxycURAOaOpyF7OfppyPGkNyNe yelp.inter
Processing file 1zOp4lXf9e6uZUu7RBxa6iL3-vAdXnFC3 bert4rec.py
Processing file 13RQJuGZH7FCbQVoYDluznjQwPNcAHoew compare_plots.py
Processing file 1fUySZCs7y5uNcw6_IfsSkOUCdUOnteFf config.yaml
Processing file 1r3bcYZhiO78xX2P1yqU0rc4uNgzUUM5J parallel_scan.py
Processing file 1n2lJpZWX1eORMEbmfdkjbe4QYPXwcF-V plot_utils.py
Processing file 1wRJapKgN2LOtq2ZlcF2RSIaesY_3z7ZD README.md
Processing file 1OHZJ-ZLZrZYTSgkNa1qshZ0DVD6NyXln RecBLR.py
Processing file 1gHfSmNn5nVdUUFz6Byzv1I-HqUVNoeyo run.py
Processing file 1Lmerziu73569I3JlqYA8c8eiMt8z2GgS sasrec.py


Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From: https://drive.google.com/uc?id=1_ZFZuLEBnmoxpYw0niYRS0Q9ozLcMC6Y
To: /root/UPDATED_STRUCTURE/dataset/amazon-beauty/amazon-beauty.inter
  0%|                                                                              | 0.00/82.4M [00:00<?, ?B/s]  4%|██▋                                                                  | 3.15M/82.4M [00:00<00:02, 29.2MB/s]  8%|█████▎                                                               | 6.29M/82.4M [00:00<00:03, 24.4MB/s] 11%|███████▉                                                             | 9.44M/82.4M [00:00<00:02, 26.7MB/s] 15%|██████████▌                                                          | 12.6M/82.4M [00:00<00:02, 26.4MB/s] 21%|██████████████▍                                                      | 17.3M/82.4M [00:00<00:02, 32.0MB/s] 27%|██████████████████▊                                          

['/root/UPDATED_STRUCTURE/dataset/amazon-beauty/amazon-beauty.inter',
 '/root/UPDATED_STRUCTURE/dataset/ml-1m/ml-1m.inter',
 '/root/UPDATED_STRUCTURE/dataset/yelp/yelp.inter',
 '/root/UPDATED_STRUCTURE/bert4rec.py',
 '/root/UPDATED_STRUCTURE/compare_plots.py',
 '/root/UPDATED_STRUCTURE/config.yaml',
 '/root/UPDATED_STRUCTURE/parallel_scan.py',
 '/root/UPDATED_STRUCTURE/plot_utils.py',
 '/root/UPDATED_STRUCTURE/README.md',
 '/root/UPDATED_STRUCTURE/RecBLR.py',
 '/root/UPDATED_STRUCTURE/run.py',
 '/root/UPDATED_STRUCTURE/sasrec.py']

# Guide
- model=R -> RecBLR
- model=B -> BERT4Rec
- model=S -> SASRec
- Note: BERT4Rec cost more VRAM to run, could get OOM error if we use 16GB VRAM of T4
- Time: SASRec > RecBLR > BERT4Rec

- Change dataset in the yaml file in the below cell. List: amazon-beauty, yelp, ml-1m

# Utility function

In [3]:
import os

def gen_config(
    bd_lru_only: bool = False,
    disable_conv1d: bool = False,
    disable_ffn: bool = False,
    num_layers: int = 2,
    num_epochs: int = 100,
    file_path: str = "/root/UPDATED_STRUCTURE/config.yaml",
    dataset: str = "amazon-beauty"
) -> None:
    """
    Generates the configuration content dynamically based on input parameters
    and writes it to the specified YAML file path.

    Args:
        bd_lru_only: Whether to use BD-LRU only mode. (bool)
        disable_conv1d: Whether to disable the 1D convolution block. (bool)
        disable_ffn: Whether to disable the FFN block. (bool)
        num_layers: Number of Recurrent layers. (int)
        file_path: The path to which the configuration file should be written. (str)
    """

    yaml_content = f"""
gpu_id: '0'

# RecBLR architecture flags
bd_lru_only: {bd_lru_only}
disable_conv1d: {disable_conv1d}
disable_ffn: {disable_ffn}

# RecBLR settings
hidden_size: 64         # (int) Number of features in the hidden state.
num_layers: {num_layers}  # (int) Number of Recurrent layers.
dropout_prob: 0.2       # (float) Dropout rate.
loss_type: 'CE'         # (str) Type of loss function. Range in ['BPR', 'CE'].
expand: 2               # (int) Block expansion factor
d_conv: 4               # (int) Local convolution width

# dataset settings
dataset: {dataset}
MAX_ITEM_LIST_LENGTH: 200    

USER_ID_FIELD: user_id
ITEM_ID_FIELD: item_id
load_col:
    inter: [user_id, item_id, timestamp]

user_inter_num_interval: "[5,inf)"
item_inter_num_interval: "[5,inf)"

# training settings
epochs: {num_epochs}
train_batch_size: 2048
learner: adam
learning_rate: 0.001
eval_step: 1
stopping_step: 10
train_neg_sample_args: ~

# evalution settings
metrics: ['Hit', 'NDCG', 'MRR']
valid_metric: NDCG@10
eval_batch_size: 4096
weight_decay: 0.0
topk: [10, 20]
"""

    # --- Writing the file ---
    # Ensure the directory exists
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    
    try:
        with open(file_path, 'w') as f:
            f.write(yaml_content)
        print(f"Successfully wrote dynamically generated configuration to {file_path}")

    except Exception as e:
        print(f"Error writing file {file_path}: {e}")
        return

# RecBLR

In [4]:
MODEL='R'
EPOCHS=100
dataset = "yelp"
# dataset = "ml-1m"

## Default

In [5]:
gen_config(num_epochs=EPOCHS, dataset=dataset)        # default is 100, we should follow that
!cd /root/UPDATED_STRUCTURE && python run.py --model $MODEL

Successfully wrote dynamically generated configuration to /root/UPDATED_STRUCTURE/config.yaml
command line args [--model R] will not be used in RecBole
09 Nov 05:59    INFO  ['run.py', '--model', 'R'][0m
[0m09 Nov 05:59    INFO  
[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m dataset/amazon-beauty[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 10[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0

# Unseen Item Handling Experiments

The following sections test RecBLR with preprocessing and/or postprocessing to handle unseen items:

1. **Preprocessing Only**: Maps unseen items in input sequences to their most similar known items based on content similarity. This ensures RecBLR processes clean, in-vocabulary sequences.

2. **Postprocessing Only**: Extends prediction scores from known items to unseen items using weighted similarity. This allows recommendations to cover the entire catalog.

3. **Both (Full Pipeline)**: Applies both preprocessing and postprocessing for comprehensive unseen item handling.

**Note**: These experiments require item features. For datasets without item metadata, synthetic features will be created based on interaction patterns.

In [None]:
gen_config(num_epochs=EPOCHS, dataset=dataset)
!cd /root/UPDATED_STRUCTURE && python run_with_unseen.py --model $MODEL --mode both

## Default + Preprocessing + Postprocessing (Full Unseen Handling)

In [None]:
gen_config(num_epochs=EPOCHS, dataset=dataset)
!cd /root/UPDATED_STRUCTURE && python run_with_unseen.py --model $MODEL --mode postprocessing

## Default + Postprocessing Only (Extend Scores to Unseen Items)

In [None]:
gen_config(num_epochs=EPOCHS, dataset=dataset)
!cd /root/UPDATED_STRUCTURE && python run_with_unseen.py --model $MODEL --mode preprocessing

## Default + Preprocessing Only (Handle Unseen Items in Input)

## Single Recurrent Layer

In [6]:
gen_config(num_layers=1, num_epochs=EPOCHS, dataset=dataset)
!cd /root/UPDATED_STRUCTURE && python run.py --model $MODEL

Successfully wrote dynamically generated configuration to /root/UPDATED_STRUCTURE/config.yaml
command line args [--model R] will not be used in RecBole
09 Nov 06:10    INFO  ['run.py', '--model', 'R'][0m
[0m09 Nov 06:10    INFO  
[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m dataset/amazon-beauty[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 10[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0

## BD-LRU only

In [7]:
gen_config(bd_lru_only=True, num_epochs=EPOCHS, dataset=dataset)
!cd /root/UPDATED_STRUCTURE && python run.py --model $MODEL

Successfully wrote dynamically generated configuration to /root/UPDATED_STRUCTURE/config.yaml
command line args [--model R] will not be used in RecBole
09 Nov 06:16    INFO  ['run.py', '--model', 'R'][0m
[0m09 Nov 06:16    INFO  
[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m dataset/amazon-beauty[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 10[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0

# No Conv1D

In [8]:
gen_config(disable_conv1d=True, num_epochs=EPOCHS, dataset=dataset)
!cd /root/UPDATED_STRUCTURE && python run.py --model $MODEL

Successfully wrote dynamically generated configuration to /root/UPDATED_STRUCTURE/config.yaml
command line args [--model R] will not be used in RecBole
09 Nov 06:23    INFO  ['run.py', '--model', 'R'][0m
[0m09 Nov 06:23    INFO  
[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m dataset/amazon-beauty[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 10[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0

# No FeedForward

In [9]:
gen_config(disable_ffn=True, num_epochs=EPOCHS, dataset=dataset)
!cd /root/UPDATED_STRUCTURE && python run.py --model $MODEL

Successfully wrote dynamically generated configuration to /root/UPDATED_STRUCTURE/config.yaml
command line args [--model R] will not be used in RecBole
09 Nov 06:33    INFO  ['run.py', '--model', 'R'][0m
[0m09 Nov 06:33    INFO  
[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m dataset/amazon-beauty[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 10[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0

In [10]:
import torch
import gc

gc.collect()

82

In [11]:
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("CUDA cache cleared.")

CUDA cache cleared.


# BERT4Rec

In [12]:
MODEL = 'B'
gen_config(num_epochs=EPOCHS, dataset=dataset)
!cd /root/UPDATED_STRUCTURE && python run.py --model $MODEL

Successfully wrote dynamically generated configuration to /root/UPDATED_STRUCTURE/config.yaml
command line args [--model B] will not be used in RecBole
09 Nov 06:41    INFO  ['run.py', '--model', 'B'][0m
[0m09 Nov 06:41    INFO  
[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m dataset/amazon-beauty[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 10[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0

# SASRec

In [13]:
MODEL = 'S'
gen_config(num_epochs=EPOCHS, dataset=dataset)
!cd /root/UPDATED_STRUCTURE && python run.py --model $MODEL

Successfully wrote dynamically generated configuration to /root/UPDATED_STRUCTURE/config.yaml
command line args [--model S] will not be used in RecBole
09 Nov 06:42    INFO  ['run.py', '--model', 'S'][0m
[0m09 Nov 06:42    INFO  
[1;35mGeneral Hyper Parameters:
[0m[1;36mgpu_id[0m =[1;33m 0[0m
[1;36muse_gpu[0m =[1;33m True[0m
[1;36mseed[0m =[1;33m 2020[0m
[1;36mstate[0m =[1;33m INFO[0m
[1;36mreproducibility[0m =[1;33m True[0m
[1;36mdata_path[0m =[1;33m dataset/amazon-beauty[0m
[1;36mcheckpoint_dir[0m =[1;33m saved[0m
[1;36mshow_progress[0m =[1;33m True[0m
[1;36msave_dataset[0m =[1;33m False[0m
[1;36mdataset_save_path[0m =[1;33m None[0m
[1;36msave_dataloaders[0m =[1;33m False[0m
[1;36mdataloaders_save_path[0m =[1;33m None[0m
[1;36mlog_wandb[0m =[1;33m False[0m

[1;35mTraining Hyper Parameters:
[0m[1;36mepochs[0m =[1;33m 10[0m
[1;36mtrain_batch_size[0m =[1;33m 2048[0m
[1;36mlearner[0m =[1;33m adam[0