## Load Model

In [None]:
from unsloth import FastModel
import torch

model, tokenizer = FastModel.from_pretrained(
    model_name = "/root/autodl-tmp/model/gemma3n_E2B", # gemma3n-e2b:5B gemma3n-e4b:8B
    max_seq_length = 1024, # Choose any for long context!
    load_in_4bit = False,  # 4 bit quantization to reduce memory
    attn_implementation = "eager", # necessary
)

## Inference

**Inputs and Outputs**

**Inputs**  
- Text string (question, prompt, document to summarize)  
- Images, normalized to 256×256, 512×512, or 768×768 and encoded to 256 tokens each  
- Audio data, single-channel, encoded to 6.25 tokens per second  
- Total input context: 32 K tokens  

**Outputs**  
- Generated text (answer, image analysis, document summary, etc.)  
- Total output length: up to 32 K tokens, minus the request input tokens

In [None]:
from transformers import TextStreamer
import gc

# Helper function for inference
def gemma3n_inference(model, messages, max_new_tokens = 128):
    inputs = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt = True, # Must add for generation
        tokenize = True,
        return_dict = True,
        return_tensors = "pt",
    ).to("cuda")
    _ = model.generate(
        **inputs,
        max_new_tokens = max_new_tokens,
        temperature = 1.0, top_p = 0.95, top_k = 64,
        streamer = TextStreamer(tokenizer, skip_prompt = True),
    )
    # Cleanup to reduce VRAM usage
    del inputs
    torch.cuda.empty_cache()
    gc.collect()

### Image-Text-to-Text Task

In [None]:
from PIL import Image


def resize_image_with_aspect_ratio(image: Image.Image, max_size: int) -> Image.Image:
    """
    Resizes an image to a maximum size while maintaining aspect ratio.

    Args:
        image (Image.Image): The PIL Image object.
        max_size (int): The maximum dimension (width or height).

    Returns:
        Image.Image: The resized PIL Image object.
    """
    width, height = image.size
    if width > max_size or height > max_size:
        if width > height:
            new_width = max_size
            new_height = int(height * (max_size / width))
        else:
            new_height = max_size
            new_width = int(width * (max_size / height))
        return image.resize((new_width, new_height))
    return image

image = Image.open("/root/autodl-tmp/kaggle408/resources/question_goose.png")
image = resize_image_with_aspect_ratio(image, 512)
display(image)
print(f"Image Size:{image.size}, Type:{type(image)}")

In [None]:
messages = [{
    "role" : "user",
    "content": [
        {"type": "image", "image": image},
        { "type": "text",  "text" : "Describe this image in detail." }
    ]
}]
# You might have to wait 1 minute for Unsloth's auto compiler
gemma3n_inference(model, messages, max_new_tokens = 256)

### Text Generation Task

In [None]:
messages = [{
    "role" : "user",
    "content": [
        { "type": "text",  "text" : "Who are you?" }
    ]
}]

gemma3n_inference(model, messages, max_new_tokens = 128)