Skip to content

Commit

Permalink
fix masking error
Browse files Browse the repository at this point in the history
  • Loading branch information
haoning.wu committed Jan 21, 2024
1 parent f79ed74 commit 4007d89
Showing 1 changed file with 1 addition and 2 deletions.
3 changes: 1 addition & 2 deletions q_align/train/train_mem.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def preprocess_v1(

if has_image:
round_len = len(tokenizer_image_token(rou, tokenizer))
instruction_len = len(tokenizer_image_token(parts[0], tokenizer)) - 2
instruction_len = len(tokenizer_image_token(parts[0], tokenizer)) - 3
else:
round_len = len(tokenizer(rou).input_ids)
instruction_len = len(tokenizer(parts[0]).input_ids) - 2
Expand All @@ -387,7 +387,6 @@ def preprocess_v1(
f"WARNING: tokenization mismatch: {cur_len} vs. {total_len}."
f" (ignored)"
)

return dict(
input_ids=input_ids,
labels=targets,
Expand Down

0 comments on commit 4007d89

Please sign in to comment.