Describe the bug
run ./script/run_finetune.sh and raise ValueError
Traceback (most recent call last):
File "/home/shy/LMFlow/examples/finetune.py", line 61, in
main()
File "/home/shy/LMFlow/examples/finetune.py", line 57, in main
tuned_model = finetuner.tune(model=model, dataset=dataset)
File "/home/shy/LMFlow/src/lmflow/pipeline/finetuner.py", line 274, in tune
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/opt/anaconda3/envs/lmflow/lib/python3.9/site-packages/transformers/trainer.py", line 1662, in train
return inner_training_loop(
File "/opt/anaconda3/envs/lmflow/lib/python3.9/site-packages/transformers/trainer.py", line 1929, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/opt/anaconda3/envs/lmflow/lib/python3.9/site-packages/transformers/trainer.py", line 2699, in training_step
loss = self.compute_loss(model, inputs)
File "/opt/anaconda3/envs/lmflow/lib/python3.9/site-packages/transformers/trainer.py", line 2745, in compute_loss
raise ValueError(
ValueError: The model did not return a loss from the inputs, only the following keys: logits,past_key_values. For reference, the inputs it received are input_ids,attention_mask.
script
deepspeed_args="--master_port=11000" # Default argument
if [ $# -ge 1 ]; then
deepspeed_args="$1"
fi
exp_id=finetune
project_dir=$(cd "$(dirname $0)"/..; pwd)
output_dir=${project_dir}/output_models/${exp_id}
log_dir=${project_dir}/log/${exp_id}
dataset_path=${project_dir}/data/alpaca/train
mkdir -p ${output_dir} ${log_dir}
deepspeed ${deepspeed_args} \
examples/finetune.py \
--arch_type decoder_only \
--model_name_or_path gpt2 \
--dataset_path ${dataset_path} \
--output_dir ${output_dir} --overwrite_output_dir \
--num_train_epochs 0.01 \
--learning_rate 2e-5 \
--block_size 512 \
--per_device_train_batch_size 1 \
--deepspeed configs/ds_config_zero3.json \
--bf16 \
--run_name finetune \
--validation_split_percentage 0 \
--logging_steps 20 \
--do_train \
--ddp_timeout 72000 \
--save_steps 5000 \
--dataloader_num_workers 1 \
| tee ${log_dir}/train.log \
2> ${log_dir}/train.err
Package Version Editable project location
absl-py 1.4.0
accelerate 0.18.0
aiohttp 3.8.4
aiosignal 1.3.1
antlr4-python3-runtime 4.9.3
appdirs 1.4.4
async-timeout 4.0.2
attrs 23.1.0
blinker 1.6.2
certifi 2023.5.7
chardet 5.1.0
charset-normalizer 3.1.0
click 8.1.3
cmake 3.26.3
colorama 0.4.6
cpm-kernels 1.0.11
DataProperty 0.55.1
datasets 2.10.1
deepspeed 0.8.3
dill 0.3.4
docker-pycreds 0.4.0
evaluate 0.4.0
filelock 3.12.0
Flask 2.3.2
Flask-Cors 3.0.10
frozenlist 1.3.3
fsspec 2023.5.0
gitdb 4.0.10
GitPython 3.1.31
hjson 3.1.0
huggingface-hub 0.14.1
icetk 0.0.7
idna 3.4
importlib-metadata 6.6.0
itsdangerous 2.1.2
Jinja2 3.1.2
joblib 1.2.0
jsonlines 3.1.0
lit 16.0.3
lm-eval 0.3.0
lmflow 0.0.1 /home/shy/LMFlow/src
MarkupSafe 2.1.2
mbstrdecoder 1.1.2
mpi4py 3.1.4
mpmath 1.3.0
multidict 6.0.4
multiprocess 0.70.12.2
networkx 3.1
ninja 1.11.1
nltk 3.8.1
numexpr 2.8.4
numpy 1.24.2
nvidia-cublas-cu11 11.10.3.66
nvidia-cuda-cupti-cu11 11.7.101
nvidia-cuda-nvrtc-cu11 11.7.99
nvidia-cuda-runtime-cu11 11.7.99
nvidia-cudnn-cu11 8.5.0.96
nvidia-cufft-cu11 10.9.0.58
nvidia-curand-cu11 10.2.10.91
nvidia-cusolver-cu11 11.4.0.1
nvidia-cusparse-cu11 11.7.4.91
nvidia-nccl-cu11 2.14.3
nvidia-nvtx-cu11 11.7.91
omegaconf 2.3.0
openai 0.27.6
packaging 23.1
pandas 2.0.1
pathtools 0.1.2
pathvalidate 2.5.2
peft 0.3.0.dev0
Pillow 9.5.0
pip 23.0.1
portalocker 2.7.0
protobuf 3.18.3
psutil 5.9.5
py-cpuinfo 9.0.0
pyarrow 12.0.0
pybind11 2.10.4
pycountry 22.3.5
pydantic 1.10.7
pytablewriter 0.64.2
python-dateutil 2.8.2
pytz 2023.3
PyYAML 6.0
regex 2023.5.5
requests 2.30.0
responses 0.18.0
rouge-score 0.1.2
sacrebleu 1.5.0
scikit-learn 1.2.2
scipy 1.10.1
sentencepiece 0.1.99
sentry-sdk 1.22.1
setproctitle 1.3.2
setuptools 66.0.0
six 1.16.0
smmap 5.0.0
sqlitedict 2.1.0
sympy 1.11.1
tabledata 1.3.1
tcolorpy 0.1.3
threadpoolctl 3.1.0
tokenizers 0.13.3
torch 2.0.0
torchvision 0.15.1
tqdm 4.65.0
tqdm-multiprocess 0.0.11
transformers 4.28.0.dev0
triton 2.0.0
trl 0.4.2.dev0
typepy 1.3.0
typing_extensions 4.5.0
tzdata 2023.3
urllib3 1.26.15
wandb 0.14.0
Werkzeug 2.3.3
wheel 0.38.4
xxhash 3.2.0
yarl 1.9.2
zipp 3.15.0
zstandard 0.21.0
Describe the bug
run ./script/run_finetune.sh and raise ValueError
Traceback (most recent call last):
File "/home/shy/LMFlow/examples/finetune.py", line 61, in
main()
File "/home/shy/LMFlow/examples/finetune.py", line 57, in main
tuned_model = finetuner.tune(model=model, dataset=dataset)
File "/home/shy/LMFlow/src/lmflow/pipeline/finetuner.py", line 274, in tune
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/opt/anaconda3/envs/lmflow/lib/python3.9/site-packages/transformers/trainer.py", line 1662, in train
return inner_training_loop(
File "/opt/anaconda3/envs/lmflow/lib/python3.9/site-packages/transformers/trainer.py", line 1929, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/opt/anaconda3/envs/lmflow/lib/python3.9/site-packages/transformers/trainer.py", line 2699, in training_step
loss = self.compute_loss(model, inputs)
File "/opt/anaconda3/envs/lmflow/lib/python3.9/site-packages/transformers/trainer.py", line 2745, in compute_loss
raise ValueError(
ValueError: The model did not return a loss from the inputs, only the following keys: logits,past_key_values. For reference, the inputs it received are input_ids,attention_mask.
script
Package Version Editable project location
absl-py 1.4.0
accelerate 0.18.0
aiohttp 3.8.4
aiosignal 1.3.1
antlr4-python3-runtime 4.9.3
appdirs 1.4.4
async-timeout 4.0.2
attrs 23.1.0
blinker 1.6.2
certifi 2023.5.7
chardet 5.1.0
charset-normalizer 3.1.0
click 8.1.3
cmake 3.26.3
colorama 0.4.6
cpm-kernels 1.0.11
DataProperty 0.55.1
datasets 2.10.1
deepspeed 0.8.3
dill 0.3.4
docker-pycreds 0.4.0
evaluate 0.4.0
filelock 3.12.0
Flask 2.3.2
Flask-Cors 3.0.10
frozenlist 1.3.3
fsspec 2023.5.0
gitdb 4.0.10
GitPython 3.1.31
hjson 3.1.0
huggingface-hub 0.14.1
icetk 0.0.7
idna 3.4
importlib-metadata 6.6.0
itsdangerous 2.1.2
Jinja2 3.1.2
joblib 1.2.0
jsonlines 3.1.0
lit 16.0.3
lm-eval 0.3.0
lmflow 0.0.1 /home/shy/LMFlow/src
MarkupSafe 2.1.2
mbstrdecoder 1.1.2
mpi4py 3.1.4
mpmath 1.3.0
multidict 6.0.4
multiprocess 0.70.12.2
networkx 3.1
ninja 1.11.1
nltk 3.8.1
numexpr 2.8.4
numpy 1.24.2
nvidia-cublas-cu11 11.10.3.66
nvidia-cuda-cupti-cu11 11.7.101
nvidia-cuda-nvrtc-cu11 11.7.99
nvidia-cuda-runtime-cu11 11.7.99
nvidia-cudnn-cu11 8.5.0.96
nvidia-cufft-cu11 10.9.0.58
nvidia-curand-cu11 10.2.10.91
nvidia-cusolver-cu11 11.4.0.1
nvidia-cusparse-cu11 11.7.4.91
nvidia-nccl-cu11 2.14.3
nvidia-nvtx-cu11 11.7.91
omegaconf 2.3.0
openai 0.27.6
packaging 23.1
pandas 2.0.1
pathtools 0.1.2
pathvalidate 2.5.2
peft 0.3.0.dev0
Pillow 9.5.0
pip 23.0.1
portalocker 2.7.0
protobuf 3.18.3
psutil 5.9.5
py-cpuinfo 9.0.0
pyarrow 12.0.0
pybind11 2.10.4
pycountry 22.3.5
pydantic 1.10.7
pytablewriter 0.64.2
python-dateutil 2.8.2
pytz 2023.3
PyYAML 6.0
regex 2023.5.5
requests 2.30.0
responses 0.18.0
rouge-score 0.1.2
sacrebleu 1.5.0
scikit-learn 1.2.2
scipy 1.10.1
sentencepiece 0.1.99
sentry-sdk 1.22.1
setproctitle 1.3.2
setuptools 66.0.0
six 1.16.0
smmap 5.0.0
sqlitedict 2.1.0
sympy 1.11.1
tabledata 1.3.1
tcolorpy 0.1.3
threadpoolctl 3.1.0
tokenizers 0.13.3
torch 2.0.0
torchvision 0.15.1
tqdm 4.65.0
tqdm-multiprocess 0.0.11
transformers 4.28.0.dev0
triton 2.0.0
trl 0.4.2.dev0
typepy 1.3.0
typing_extensions 4.5.0
tzdata 2023.3
urllib3 1.26.15
wandb 0.14.0
Werkzeug 2.3.3
wheel 0.38.4
xxhash 3.2.0
yarl 1.9.2
zipp 3.15.0
zstandard 0.21.0