diff --git a/model/model_training/custom_datasets/oasst_dataset.py b/model/model_training/custom_datasets/oasst_dataset.py index 37bf2a71ca..eec0ce8849 100644 --- a/model/model_training/custom_datasets/oasst_dataset.py +++ b/model/model_training/custom_datasets/oasst_dataset.py @@ -98,6 +98,8 @@ def leaf_filter(thread: list[ExportMessageNode]) -> bool: elif mode == "rm": # for reward models we use thread-fragments ending on prompter messages as prefix and # their (ranked) replies as possible continuations. + if thread[-1].replies is None: + return False return ( thread[-1].role == "prompter" and len([r for r in thread[-1].replies if r.rank is not None]) > 1