Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add automatic image resizing to prevent memory explosion #1946

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Merge branch 'main' into vlm_image_resize
  • Loading branch information
issamarabi authored Mar 16, 2025
commit c97b0ed215fa8c88df5fb6b6e58675e9398bc568
39 changes: 39 additions & 0 deletions unsloth/models/vision.py
Original file line number Diff line number Diff line change
@@ -42,9 +42,29 @@
from transformers.models.llama.modeling_llama import logger
from transformers import __version__ as transformers_version
from triton import __version__ as triton_version

from PIL import Image
import json

from unsloth_zoo.utils import _get_dtype
from unsloth_zoo.patching_utils import patch_model_and_tokenizer
from unsloth_zoo.training_utils import prepare_model_for_training
import types
import functools
import os
import gc
import math
import functools
from typing import Optional, Tuple, List, Union
import re, inspect, sys
import types
try:
from huggingface_hub.utils import get_token
except:
# Old HF Hub versions <= 0.0.25
from huggingface_hub.utils._token import get_token
pass


__all__ = [
"FastBaseModel",
@@ -152,6 +172,8 @@ def from_pretrained(
max_image_width = None,
max_image_height = None,
maintain_image_aspect_ratio = True,
auto_model = AutoModelForVision2Seq,
use_gradient_checkpointing = "unsloth",
**kwargs,
):
os.environ["UNSLOTH_USE_NEW_MODEL"] = "1"
@@ -296,6 +318,7 @@ def from_pretrained(
padding_side = "right",
token = token,
)

# Add padding side as well
tokenizer.tokenizer.padding_side = "right"

@@ -346,6 +369,22 @@ def from_pretrained(
aspect_ratio = "maintaining aspect ratio" if maintain_image_aspect_ratio else "ignoring aspect ratio"
logger.warning_once(f"Unsloth: Image resizing enabled with max dimensions {width_info}x{height_info}, {aspect_ratio}")

if hasattr(tokenizer, "tokenizer"):
__tokenizer = tokenizer.tokenizer
# Add padding side as well
__tokenizer.padding_side = "right"
# Check bos, eos, pad tokens
if hasattr(__tokenizer, "bos_token"):
tokenizer.bos_token = __tokenizer.bos_token
tokenizer.bos_token_id = __tokenizer.bos_token_id
if hasattr(__tokenizer, "eos_token"):
tokenizer.eos_token = __tokenizer.eos_token
tokenizer.eos_token_id = __tokenizer.eos_token_id
if hasattr(__tokenizer, "pad_token"):
tokenizer.pad_token = __tokenizer.pad_token
tokenizer.pad_token_id = __tokenizer.pad_token_id
pass

model, tokenizer = patch_tokenizer(model, tokenizer)
model = post_patch_loss_function(model)
# Fix other stuff like BnB compute data types
You are viewing a condensed version of this merge commit. You can view the full changes here.