You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
RuntimeError: weight should have at least three dimensions
return forward_call(*args, **kwargs)
return forward_call(args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py", line 182, in forward
Traceback (most recent call last):
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 460, in forward
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [, width, grid, grid]
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
File "/running_package/code_package/./muffin/train/train_llava15.py", line 338, in
return self._conv_forward(input, self.weight, self.bias)
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
train(attn_implementation="flash_attention_2")
File "/running_package/code_package/./muffin/train/train_llava15.py", line 313, in train
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
return forward_call(*args, **kwargs)
model, data_module, tokenizer = init_model(
File "/running_package/code_package/./muffin/train/train_llava15.py", line 279, in init_model
return F.conv2d(input, weight, bias, self.stride,
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 460, in forward
data_module = make_dpo_data_module(tokenizer, data_args=data_args, reference_model=copy.deepcopy(model).cuda())
File "/running_package/code_package/./muffin/train/train_llava15.py", line 149, in make_dpo_data_module
RuntimeError: weight should have at least three dimensions
return self._conv_forward(input, self.weight, self.bias)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
train_dataset = DPODataset(tokenizer=tokenizer,
File "/running_package/code_package/./muffin/train/train_llava15.py", line 133, in init
return F.conv2d(input, weight, bias, self.stride,
self.list_data_dict = RLAIFVDataset(data_dir, reference_model, tokenizer,multimodal_cfg['image_token_len'], multimodal_cfg['image_processor'], multimodal_cfg['use_im_start_end'], is_llava15=True)
RuntimeError: weight should have at least three dimensions
File "/running_package/code_package/muffin/data/datasets.py", line 45, in init
inference_logp(reference_model, tokenizer, hf_data, self.data_path,
File "/running_package/code_package/muffin/eval/muffin_inference_logp.py", line 326, in inference_logp
outputs = get_multimodal_sample_logps(model, dataloader, tokenizer, is_llava15=is_llava15) # win_logp_list, win_avg_logp_list, win_per_token_logp_list, rej_logp_list, rej_avg_logp_list, rej_per_token_logp_list
File "/running_package/code_package/muffin/eval/muffin_inference_logp.py", line 243, in get_multimodal_sample_logps
) = model.prepare_inputs_labels_for_multimodal(
File "/running_package/code_package/llava/model/llava_arch.py", line 207, in prepare_inputs_labels_for_multimodal
image_features = self.encode_images(images)
File "/running_package/code_package/llava/model/llava_arch.py", line 146, in encode_images
image_features = self.get_model().get_vision_tower()(images)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/running_package/code_package/llava/model/multimodal_encoder/clip_encoder.py", line 57, in forward
image_forward_outs = self.vision_tower(images.to(device=self.device, dtype=self.dtype), output_hidden_states=True)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py", line 917, in forward
return self.vision_model(
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py", line 841, in forward
hidden_states = self.embeddings(pixel_values)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py", line 182, in forward
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [, width, grid, grid]
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 460, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: weight should have at least three dimensions
The text was updated successfully, but these errors were encountered:
Thank you for your reply.I have addressed this question.I replaced the zero-3,jsonl with zero-2,jsonl.However,I want to repeat your result of the experiment.Can you provide the config of your training please?Thank you!
RuntimeError: weight should have at least three dimensions
return forward_call(*args, **kwargs)
return forward_call(args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py", line 182, in forward
Traceback (most recent call last):
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 460, in forward
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [, width, grid, grid]
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
File "/running_package/code_package/./muffin/train/train_llava15.py", line 338, in
return self._conv_forward(input, self.weight, self.bias)
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
train(attn_implementation="flash_attention_2")
File "/running_package/code_package/./muffin/train/train_llava15.py", line 313, in train
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
return forward_call(*args, **kwargs)
model, data_module, tokenizer = init_model(
File "/running_package/code_package/./muffin/train/train_llava15.py", line 279, in init_model
return F.conv2d(input, weight, bias, self.stride,
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 460, in forward
data_module = make_dpo_data_module(tokenizer, data_args=data_args, reference_model=copy.deepcopy(model).cuda())
File "/running_package/code_package/./muffin/train/train_llava15.py", line 149, in make_dpo_data_module
RuntimeError: weight should have at least three dimensions
return self._conv_forward(input, self.weight, self.bias)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
train_dataset = DPODataset(tokenizer=tokenizer,
File "/running_package/code_package/./muffin/train/train_llava15.py", line 133, in init
return F.conv2d(input, weight, bias, self.stride,
self.list_data_dict = RLAIFVDataset(data_dir, reference_model, tokenizer,multimodal_cfg['image_token_len'], multimodal_cfg['image_processor'], multimodal_cfg['use_im_start_end'], is_llava15=True)
RuntimeError: weight should have at least three dimensions
File "/running_package/code_package/muffin/data/datasets.py", line 45, in init
inference_logp(reference_model, tokenizer, hf_data, self.data_path,
File "/running_package/code_package/muffin/eval/muffin_inference_logp.py", line 326, in inference_logp
outputs = get_multimodal_sample_logps(model, dataloader, tokenizer, is_llava15=is_llava15) # win_logp_list, win_avg_logp_list, win_per_token_logp_list, rej_logp_list, rej_avg_logp_list, rej_per_token_logp_list
File "/running_package/code_package/muffin/eval/muffin_inference_logp.py", line 243, in get_multimodal_sample_logps
) = model.prepare_inputs_labels_for_multimodal(
File "/running_package/code_package/llava/model/llava_arch.py", line 207, in prepare_inputs_labels_for_multimodal
image_features = self.encode_images(images)
File "/running_package/code_package/llava/model/llava_arch.py", line 146, in encode_images
image_features = self.get_model().get_vision_tower()(images)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/running_package/code_package/llava/model/multimodal_encoder/clip_encoder.py", line 57, in forward
image_forward_outs = self.vision_tower(images.to(device=self.device, dtype=self.dtype), output_hidden_states=True)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py", line 917, in forward
return self.vision_model(
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py", line 841, in forward
hidden_states = self.embeddings(pixel_values)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/transformers/models/clip/modeling_clip.py", line 182, in forward
patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype)) # shape = [, width, grid, grid]
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 460, in forward
return self._conv_forward(input, self.weight, self.bias)
File "/miniconda3/envs/llava_cu122/lib/python3.10/site-packages/torch/nn/modules/conv.py", line 456, in _conv_forward
return F.conv2d(input, weight, bias, self.stride,
RuntimeError: weight should have at least three dimensions
The text was updated successfully, but these errors were encountered: