In [1]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [9]:
import torch
from transformers import TrainingArguments, Trainer
from unsloth import FastVisionModel
from datasets import load_dataset, Dataset
import pandas as pd
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
import json
import os
from io import BytesIO
import re
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from transformers import TextStreamer
import string
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction

In [3]:

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Llama-3.2-11B-Vision-Instruct-bnb-4bit", # Llama 3.2 vision support
    "unsloth/Llama-3.2-11B-Vision-bnb-4bit",
    "unsloth/Llama-3.2-90B-Vision-Instruct-bnb-4bit", # Can fit in a 80GB card!
    "unsloth/Llama-3.2-90B-Vision-bnb-4bit",

    "unsloth/Pixtral-12B-2409-bnb-4bit",              # Pixtral fits in 16GB!
    "unsloth/Pixtral-12B-Base-2409-bnb-4bit",         # Pixtral base model

    "unsloth/Qwen2-VL-2B-Instruct-bnb-4bit",          # Qwen2 VL support
    "unsloth/Qwen2-VL-7B-Instruct-bnb-4bit",
    "unsloth/Qwen2-VL-72B-Instruct-bnb-4bit",

    "unsloth/llava-v1.6-mistral-7b-hf-bnb-4bit",      # Any Llava variant works!
    "unsloth/llava-1.5-7b-hf-bnb-4bit",
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastVisionModel.from_pretrained(
    "unsloth/Llama-3.2-11B-Vision-Instruct",
    load_in_4bit = True, # Use 4bit to reduce memory use. False for 16bit LoRA.
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for long context
)

==((====))==  Unsloth 2025.4.7: Fast Mllama patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors.index.json:   0%|          | 0.00/375k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/210 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/477 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/55.9k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/5.15k [00:00<?, ?B/s]

In [4]:
# Apply LoRA fine-tuning setup
model = FastVisionModel.get_peft_model(
    model,
    finetune_vision_layers=True,
    finetune_language_layers=True,
    finetune_attention_modules=True,
    finetune_mlp_modules=True,
    r=32,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    random_state=42,
    use_rslora=True,
)

Unsloth: Making `model.base_model.model.vision_model.transformer` require gradients


In [5]:
# 1. ËÆÄÂèñ parquet Êàê pandas DataFrameÔºà‰øùÁïô image ÁÇ∫ bytesÔºâ
train_df = pd.read_parquet("train_data.parquet")
valid_df = pd.read_parquet("valid_data.parquet")

# 2. ËΩâÊàê HuggingFace DatasetÔºàÊ≠§ÊôÇ image ÈÇÑÊòØ bytesÔºå‰∏çÊúÉÂ†±ÈåØÔºâ
train_dataset = Dataset.from_pandas(train_df)
valid_dataset = Dataset.from_pandas(valid_df)

# 3. ËΩâÊàê conversation Ê†ºÂºèÔºàÈÄôË£°ÂÜçËΩâÊàê PILÔºâ
instruction = "You are an expert radiographer. Describe accurately what you see in this image."

def convert_bytes_to_pil(byte_data):
    return Image.open(io.BytesIO(byte_data)).convert("RGB")

def convert_to_conversation(sample):
    pil_image = convert_bytes_to_pil(sample["image"]["bytes"])
    conversation = [
        { "role": "user",
          "content" : [
            {"type" : "text",  "text"  : instruction},
            {"type" : "image", "image" : pil_image } ]
        },
        { "role" : "assistant",
          "content" : [
            {"type" : "text",  "text"  : sample["caption"]} ]
        },
    ]
    return { "messages" : conversation }

# 4. ‰ΩøÁî® Dataset.map() ËôïÁêÜÊàê conversation Ê†ºÂºèÔºàÂª∂ÂæåËôïÁêÜ PILÔºâ
train_dataset = [convert_to_conversation(sample) for sample in train_dataset]
valid_dataset = [convert_to_conversation(sample) for sample in valid_dataset]

In [6]:
from unsloth.trainer import UnslothVisionDataCollator
from trl import SFTTrainer, SFTConfig
from unsloth import is_bf16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    data_collator = UnslothVisionDataCollator(model, tokenizer),
    train_dataset = train_dataset,
    eval_dataset = valid_dataset,
    args = SFTConfig(
        output_dir = "outputs",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 8,
        max_steps = 30,
        warmup_steps = 10,
        learning_rate = 2e-4,
        weight_decay = 0.01,
        logging_steps = 1,
        seed = 3407,
        fp16 = not is_bf16_supported(),
        bf16 = is_bf16_supported(),
        optim = "adamw_8bit",
        lr_scheduler_type = "cosine",
        report_to = "none",

        # Vision tuning ÂøÖË¶ÅË®≠ÂÆö
        remove_unused_columns = False,
        dataset_text_field = "",  # ÁïôÁ©∫Âç≥ÂèØÔºåËÆì unsloth Ëá™ÂãïÁî® messages
        dataset_kwargs = {"skip_prepare_dataset": True},
        dataset_num_proc = 4,
        max_seq_length = 2048,
    )
)


In [8]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 200 | Num Epochs = 3 | Total steps = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 134,348,800/11,000,000,000 (1.22% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,4.6108
2,4.5344
3,4.432
4,3.9677
5,3.3761
6,2.9083
7,2.5462
8,2.1322
9,1.5821
10,1.3806


TrainOutput(global_step=30, training_loss=1.66600807706515, metrics={'train_runtime': 4747.0441, 'train_samples_per_second': 0.101, 'train_steps_per_second': 0.006, 'total_flos': 1427440484381472.0, 'train_loss': 1.66600807706515})

**INFERENCE**

In [None]:

bleu_scores = []
references = []
hypotheses = []

smooth = SmoothingFunction().method1

def extract_assistant_response(text):
    # Êì∑Âèñ <|assistant|> ÂæåÊÆµ
    if "<|assistant|>" in text:
        text = text.split("<|assistant|>")[-1]

    # ÁßªÈô§ÊâÄÊúâ <|...|> Ê®ôË®ò
    text = re.sub(r"<\|.*?\|>", "", text)

    # ÁßªÈô§ÈñãÈ†≠ÂèØËÉΩÈáçË§áÂá∫ÁèæÁöÑ user prompt
    prompt = "You are an expert radiographer. Describe accurately what you see in this image."
    text = text.replace("user", "")  # ÁßªÈô§Â≠§Á´ãÁöÑ "user"
    text = text.replace("assistant", "")
    text = text.replace(prompt, "")  # ÁßªÈô§ prompt Êú¨Ë∫´
    text = re.sub(r"\n+", " ", text)
    return text.strip()

def normalize_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    return text.strip()

for i, example in enumerate(tqdm(valid_dataset, desc="Validating")):
    try:
        # ‚úÖ Êì∑Âèñ Ground Truth
        messages = example["messages"]
        user_msg = messages[0]["content"]
        gt_response = messages[1]["content"][0]["text"].strip()
        gt_normalized = normalize_text(gt_response)

        # ‚úÖ Êì∑ÂèñÂúñËàáÊåá‰ª§
        image = None
        instruction = ""
        for item in user_msg:
            if item["type"] == "image":
                image = item["image"]
            elif item["type"] == "text":
                instruction = item["text"]

        assert image is not None, f"[{i}] ‚ùå ÂúñÁâáÈÅ∫Â§±"
        assert instruction.strip() != "", f"[{i}] ‚ùå Êåá‰ª§ÁÇ∫Á©∫"

        # ‚úÖ ÊûÑÈÄ† messages + Á∑®Á¢º
        messages = [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": instruction}]}]
        input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True)
        inputs = tokenizer(image, input_text, add_special_tokens=False, return_tensors="pt").to("cuda")
        text_streamer = TextStreamer(tokenizer, skip_prompt=True)

        # ‚úÖ Êé®ÁêÜ
        with torch.no_grad():
            output = model.generate(
                **inputs,
                streamer=text_streamer,
                max_new_tokens=128,
                use_cache=True,
                temperature=1.5,
                top_p=0.9,
                pad_token_id=tokenizer.eos_token_id,
            )

        decoded = tokenizer.decode(output[0], skip_special_tokens=False).strip()
        gen_text = extract_assistant_response(decoded)
        gen_normalized = normalize_text(gen_text)

        print(f"\nüìå GT: {gt_response}")
        print(f"ü§ñ OUT: {gen_text}")

        # ‚úÖ ÂñÆÂè• BLEU
        score = sentence_bleu([gt_normalized.split()], gen_normalized.split(), smoothing_function=smooth)
        print(f"üéØ BLEU: {score:.4f}")
        bleu_scores.append(score)

        # ‚úÖ Êî∂ÈõÜË≥áÊñô‰ª•‰æõ corpus BLEU
        references.append([gt_normalized.split()])
        hypotheses.append(gen_normalized.split())

    except Exception as e:
        print(f"\n‚ö†Ô∏è [Example {i+1}] ÁôºÁîüÈåØË™§Ôºö{repr(e)}\n")
        continue

# ‚úÖ È°ØÁ§∫Âπ≥Âùá BLEU ÂàÜÊï∏
if bleu_scores:
    avg_bleu = sum(bleu_scores) / len(bleu_scores)
    print(f"\n‚úÖ Âπ≥Âùá BLEU ÂàÜÊï∏: {avg_bleu:.4f}")
else:
    print("‚ùå ÁÑ°ÂèØË©ï‰º∞È†ÖÁõÆ")

# ‚úÖ È°çÂ§ñË®àÁÆóÊï¥È´î Corpus BLEU
if references and hypotheses:
    corpus_bleu_score = corpus_bleu(references, hypotheses, smoothing_function=smooth)
    print(f"üìä Corpus BLEU ÂàÜÊï∏ÔºàÊï¥È´îÔºâ: {corpus_bleu_score:.4f}")
else:
    print("‚ùå ÁÑ°Ê≥ïË®àÁÆó Corpus BLEU")

Validating:   0%|          | 0/100 [00:00<?, ?it/s]

Christmast Cat Art - Old Art for Christmas 1909, vintage greeting card of cat orchestra - Christmas vintage design<|eot_id|>


Validating:   1%|          | 1/100 [00:06<11:20,  6.87s/it]


üìå GT: To Wish You A Jolly Christmas: To Wish You A Jolly Christmas Cat Cow Dog Fiddle Music Artist Unknown Year ca 1910 Public Domain
ü§ñ OUT: Christmast Cat Art - Old Art for Christmas 1909, vintage greeting card of cat orchestra - Christmas vintage design
üéØ BLEU: 0.0100
Knitting 12 - Knitting pattern on canvas<|eot_id|>


Validating:   2%|‚ñè         | 2/100 [00:11<08:38,  5.29s/it]


üìå GT: Textured Digital Paper 2020 - 1: Textured digital paper for scrapbooking and more
ü§ñ OUT: Knitting 12 - Knitting pattern on canvas
üéØ BLEU: 0.0000
Guaratinguet: Birds of Cuiaba. Birds in brazilian wetland. Nature wildlife<|eot_id|>


Validating:   3%|‚ñé         | 3/100 [00:17<09:18,  5.76s/it]


üìå GT: A Guira Cuckoo Bird From Brazil: A guira cuckoo bird from brazil in south africa
ü§ñ OUT: Guaratinguet: Birds of Cuiaba. Birds in brazilian wetland. Nature wildlife
üéØ BLEU: 0.0128
Victorian Christmas Child: Vintage image of antique Christmas vintage 2023 illustration<|eot_id|>


Validating:   4%|‚ñç         | 4/100 [00:22<08:42,  5.44s/it]


üìå GT: Vintage Child: Vintage paper with white space, copy space of a child holding a doll at christmas
ü§ñ OUT: Victorian Christmas Child: Vintage image of antique Christmas vintage 2023 illustration
üéØ BLEU: 0.0291
Shop: Shop Worded Christmas Card 2012 - 2 of 4: Close-up of part of a worded greeting card, words shop on a holiday patterned red background<|eot_id|>


Validating:   5%|‚ñå         | 5/100 [00:31<10:39,  6.74s/it]


üìå GT: Shop: Christmas design SHOP sign
ü§ñ OUT: Shop: Shop Worded Christmas Card 2012 - 2 of 4: Close-up of part of a worded greeting card, words shop on a holiday patterned red background
üéØ BLEU: 0.0100
Holiday Card With Happy New Year: Holiday card with Happy New Year image on rustic wood background. Cones green pine and gold glitter bow decoration border with pine cones. Words on the wood<|eot_id|>


Validating:   6%|‚ñå         | 6/100 [00:42<13:01,  8.32s/it]


üìå GT: Happy New Year Gold Bow On Wood: Happy New Year brown text on a wood background framed with cedar branches and a sparkling gold bow.
ü§ñ OUT: Holiday Card With Happy New Year: Holiday card with Happy New Year image on rustic wood background. Cones green pine and gold glitter bow decoration border with pine cones. Words on the wood
üéØ BLEU: 0.0625
Water Drops, Background of water droplet on a yellow background: Drops background: Drops on clear plastic, yellow, background in studio with lights<|eot_id|>


Validating:   7%|‚ñã         | 7/100 [00:49<12:20,  7.96s/it]


üìå GT: Golden Bubbles: Swirl of golden bubbles
ü§ñ OUT: Water Drops, Background of water droplet on a yellow background: Drops background: Drops on clear plastic, yellow, background in studio with lights
üéØ BLEU: 0.0087
Festive, patterned seamless, wrap. A decorative graphic paper seamless background in horizontal strip. Floral, geometric ornament design for paper: wrappers, wallpaper, print, textile<|eot_id|>


Validating:   8%|‚ñä         | 8/100 [00:58<12:26,  8.11s/it]


üìå GT: Vertical Striped Christmas Paper: Vertical red and green columns vertical filled with textured pattern for scrapbooking, paper, greeting cards, background
ü§ñ OUT: Festive, patterned seamless, wrap. A decorative graphic paper seamless background in horizontal strip. Floral, geometric ornament design for paper: wrappers, wallpaper, print, textile
üéØ BLEU: 0.0117
Fantasy Smoke: Smoke<|eot_id|>


Validating:   9%|‚ñâ         | 9/100 [01:01<10:02,  6.62s/it]


üìå GT: Smoke Background: Background motif
ü§ñ OUT: Fantasy Smoke: Smoke
üéØ BLEU: 0.0814
Lilac And Backgrounds Purple - background: Bokeh and glitter texture<|eot_id|>


Validating:  10%|‚ñà         | 10/100 [01:07<09:27,  6.31s/it]


üìå GT: Bokeh Background Lights Abstract: Bokeh blue violet purple colors colorful background lights abstract seamless tiles pattern glow glow spots dots festive solemn colors glitter sparkle paper christmas easter wedding jewels artful classic old
ü§ñ OUT: Lilac And Backgrounds Purple - background: Bokeh and glitter texture
üéØ BLEU: 0.0024
Card Text Happy New Year : 7.50 8<|eot_id|>


Validating:  11%|‚ñà         | 11/100 [01:11<08:32,  5.76s/it]


üìå GT: Happy New Year Fir Tree Background: The text Happy New Year centered on a fir tree background.
ü§ñ OUT: Card Text Happy New Year : 7.50 8
üéØ BLEU: 0.0985
Sea Green Blue Dock Rop<|eot_id|>


Validating:  12%|‚ñà‚ñè        | 12/100 [01:15<07:25,  5.06s/it]


üìå GT: Marine Rope: Sailor's knot
ü§ñ OUT: Sea Green Blue Dock Rop
üéØ BLEU: 0.0000
Abstract Retro Background In Warm Colors: Background with red-orange and tan radial stripes for Valentine's day, birthday, wedding or greeting card<|eot_id|>


Validating:  13%|‚ñà‚ñé        | 13/100 [01:21<08:02,  5.54s/it]


üìå GT: Christmas Starburst: Red starburst background image
ü§ñ OUT: Abstract Retro Background In Warm Colors: Background with red-orange and tan radial stripes for Valentine's day, birthday, wedding or greeting card
üéØ BLEU: 0.0091
Metal Foil Wallpaper 10: A scrapbooking item, seamless pattern with distressed effect texture on a metal sheet<|eot_id|>


Validating:  14%|‚ñà‚ñç        | 14/100 [01:28<08:14,  5.75s/it]


üìå GT: Background: Background motif
ü§ñ OUT: Metal Foil Wallpaper 10: A scrapbooking item, seamless pattern with distressed effect texture on a metal sheet
üéØ BLEU: 0.0000
Flamingo Flower: The exotic and tropical Anthurium andraeanum flower<|eot_id|>


Validating:  15%|‚ñà‚ñå        | 15/100 [01:33<07:54,  5.58s/it]


üìå GT: Light Pink Calla Lily: Light pink calla lily
ü§ñ OUT: Flamingo Flower: The exotic and tropical Anthurium andraeanum flower
üéØ BLEU: 0.0000
Grass: Grass 9<|eot_id|>


Validating:  16%|‚ñà‚ñå        | 16/100 [01:37<07:10,  5.13s/it]


üìå GT: Grass Background: Fresh green grass background macro image
ü§ñ OUT: Grass: Grass 9
üéØ BLEU: 0.0255
Cars On Paper: Various colors of the new black model.<|eot_id|>


Validating:  17%|‚ñà‚ñã        | 17/100 [01:42<07:03,  5.10s/it]


üìå GT: Porche 928: Porche car design
ü§ñ OUT: Cars On Paper: Various colors of the new black model.
üéØ BLEU: 0.0000
Violet Flowers: Close-up image of fabric with flower pattern<|eot_id|>


Validating:  18%|‚ñà‚ñä        | 18/100 [01:47<06:45,  4.94s/it]


üìå GT: Floral Pattern Background 1809: Floral pattern seamless
ü§ñ OUT: Violet Flowers: Close-up image of fabric with flower pattern
üéØ BLEU: 0.0240
You Light Up My Life: Beautiful picture of the side of a blue box with hearts for the door<|eot_id|>


Validating:  19%|‚ñà‚ñâ        | 19/100 [01:53<07:06,  5.27s/it]


üìå GT: You Light Up My Life: A heart lamp with rays and the words You Light Up My Life
ü§ñ OUT: You Light Up My Life: Beautiful picture of the side of a blue box with hearts for the door
üéØ BLEU: 0.2131
Swan: Swan swimming in the water in the sunshine<|eot_id|>


Validating:  20%|‚ñà‚ñà        | 20/100 [01:57<06:38,  4.98s/it]


üìå GT: Swan On Lake: Blue photo of swan on lake
ü§ñ OUT: Swan: Swan swimming in the water in the sunshine
üéØ BLEU: 0.0285
Angelfish Gold: Artistic golden goldfish swimming in clear water<|eot_id|>


Validating:  21%|‚ñà‚ñà        | 21/100 [02:02<06:37,  5.03s/it]


üìå GT: Fish Of Flowers: A pair of cute yellow fish made with flowers from the Rose Parade Floats
ü§ñ OUT: Angelfish Gold: Artistic golden goldfish swimming in clear water
üéØ BLEU: 0.0000
Funny, Fat Seal: A chubby seal isolated<|eot_id|>


Validating:  22%|‚ñà‚ñà‚ñè       | 22/100 [02:06<06:16,  4.82s/it]


üìå GT: Sea Lion Meme: Funny sea lion on blue starburst background
ü§ñ OUT: Funny, Fat Seal: A chubby seal isolated
üéØ BLEU: 0.0215
Columba Columba Tinea - Clique nature 14/02/2022 - Columba Columba Tinea bird Mourning Dove: Mourning Dove 15-15-30.<|eot_id|>


Validating:  23%|‚ñà‚ñà‚ñé       | 23/100 [02:15<07:47,  6.07s/it]


üìå GT: Mourning Dove Flies Toward Feeder: Mourning doves crowd a bird feeder and one flies in for a landing
ü§ñ OUT: Columba Columba Tinea - Clique nature 14/02/2022 - Columba Columba Tinea bird Mourning Dove: Mourning Dove 15-15-30.
üéØ BLEU: 0.0272
Floral Pattern - Texture floral design for scrapbooking & etc. Ornamental, vintage<|eot_id|>


Validating:  24%|‚ñà‚ñà‚ñç       | 24/100 [02:21<07:23,  5.83s/it]


üìå GT: Silver Glitter Bow: Silver sparkly bow on a blue gift
ü§ñ OUT: Floral Pattern - Texture floral design for scrapbooking & etc. Ornamental, vintage
üéØ BLEU: 0.0000
White haired yogi: Yoga and Wellness In the Yoga Mudra or Buddha, hand posture the white haired yogi feels his soul at peace<|eot_id|>


Validating:  25%|‚ñà‚ñà‚ñå       | 25/100 [02:29<08:05,  6.47s/it]


üìå GT: Gassho Meditation 2: A man with beard and white hairs is practicing a gassho meditation
ü§ñ OUT: White haired yogi: Yoga and Wellness In the Yoga Mudra or Buddha, hand posture the white haired yogi feels his soul at peace
üéØ BLEU: 0.0098
2 martini glasses : 2 vintage crystal glass cocktail mixing mugs<|eot_id|>


Validating:  26%|‚ñà‚ñà‚ñå       | 26/100 [02:33<07:22,  5.98s/it]


üìå GT: Blue And Gold Glasses: Champagne Glasses in blue and gold artistic
ü§ñ OUT: 2 martini glasses : 2 vintage crystal glass cocktail mixing mugs
üéØ BLEU: 0.0191
Grey Background<|eot_id|>


Validating:  27%|‚ñà‚ñà‚ñã       | 27/100 [02:36<06:07,  5.04s/it]


üìå GT: Decorative Alphabet Black: Alphabet capital letters for art projects
ü§ñ OUT: Grey Background
üéØ BLEU: 0.0000
Destruction To Renewal: Flowers after wildfire, trees that got burned in Alberta Canada, Canada, Fire, Flowers, Spring, Grass, Mountains<|eot_id|>


Validating:  28%|‚ñà‚ñà‚ñä       | 28/100 [02:44<07:04,  5.90s/it]


üìå GT: Wild Flowers In The Forest: Wild Flowers in the Forest
ü§ñ OUT: Destruction To Renewal: Flowers after wildfire, trees that got burned in Alberta Canada, Canada, Fire, Flowers, Spring, Grass, Mountains
üéØ BLEU: 0.0134
Inner Circle Words: Inner Circle: What Am I Feeling, Fear, Guilt, Sadness, Pain<|eot_id|>


Validating:  29%|‚ñà‚ñà‚ñâ       | 29/100 [02:50<06:58,  5.90s/it]


üìå GT: Mind Liberation: Liberate yourself from a mindset based in insecurity by adopting and practicing a new one based in being safe, mattering, and belonging
ü§ñ OUT: Inner Circle Words: Inner Circle: What Am I Feeling, Fear, Guilt, Sadness, Pain
üéØ BLEU: 0.0000
Star Trek Men And Woman Art Star Art Background Digital<|eot_id|>


Validating:  30%|‚ñà‚ñà‚ñà       | 30/100 [02:55<06:25,  5.51s/it]


üìå GT: Robot, Artificial Intelligence: Robot, artificial intelligence, woman, future, computer science, electrical engineering, technology, developer, technology, thinking, computer, man, intelligent, controlled, circuit board, circuit board, printed circuit, information, data, function, micro
ü§ñ OUT: Star Trek Men And Woman Art Star Art Background Digital
üéØ BLEU: 0.0029
Digital Vintage Christmas 1 - A digital graphic from original image to make a background graphic, or a card. 1 Image ID : 1080333 digital vintage: abstract: computer: computer: cute design: festive: graphic: graphics: illustrations: holidays: holiday<|eot_id|>


Validating:  31%|‚ñà‚ñà‚ñà       | 31/100 [03:06<08:26,  7.35s/it]


üìå GT: Christmas Reindeer Scenic: Christmas trees and reindeer artistic
ü§ñ OUT: Digital Vintage Christmas 1 - A digital graphic from original image to make a background graphic, or a card. 1 Image ID : 1080333 digital vintage: abstract: computer: computer: cute design: festive: graphic: graphics: illustrations: holidays: holiday
üéØ BLEU: 0.0053
Stonehouse in the French Alps: Stone building and wooden chalet cabins in the french alps in winter<|eot_id|>


Validating:  32%|‚ñà‚ñà‚ñà‚ñè      | 32/100 [03:12<07:52,  6.95s/it]


üìå GT: Alpine Village. France: Alpine village architecture, France Alp Chamonix
ü§ñ OUT: Stonehouse in the French Alps: Stone building and wooden chalet cabins in the french alps in winter
üéØ BLEU: 0.0000
Eastern Gray Squirrel: An eastern gray squirrel perched on a fence<|eot_id|>


Validating:  33%|‚ñà‚ñà‚ñà‚ñé      | 33/100 [03:17<07:07,  6.38s/it]


üìå GT: Bryant's Fox Squirrel Eating: Bryant's Fox Squirrel eating while up on a patio outcrop
ü§ñ OUT: Eastern Gray Squirrel: An eastern gray squirrel perched on a fence
üéØ BLEU: 0.0361
Aiguille du Midi, Chamonix Montane: Chamonix, Mont Blanc, French Alps, Tour of Mont Blanc, French Alps Snow, Mont Blanc, Snowed Mountain Peak, Ski Tour Of Mont Blanc<|eot_id|>


Validating:  34%|‚ñà‚ñà‚ñà‚ñç      | 34/100 [03:27<08:05,  7.36s/it]


üìå GT: Aiguille Du Midi, France: Aiguille du Midi is a mountain in the Mont Blanc massif within the French Alps. It is a popular tourist destination and can be directly accessed by cable car from Chamonix that takes visitors close to Mont Blanc.
ü§ñ OUT: Aiguille du Midi, Chamonix Montane: Chamonix, Mont Blanc, French Alps, Tour of Mont Blanc, French Alps Snow, Mont Blanc, Snowed Mountain Peak, Ski Tour Of Mont Blanc
üéØ BLEU: 0.0345
Snowflakes Seamless Patterned Background: Watercolor texture snowflakes background for scrapbooking decorative paper design card graphic, etc.<|eot_id|>


Validating:  35%|‚ñà‚ñà‚ñà‚ñå      | 35/100 [03:34<07:51,  7.25s/it]


üìå GT: Winter Background - 11: Digital paper patterned white snowflakes on red background for scrapbooking and others
ü§ñ OUT: Snowflakes Seamless Patterned Background: Watercolor texture snowflakes background for scrapbooking decorative paper design card graphic, etc.
üéØ BLEU: 0.0752
Wood Finish Flooring: Real wood textured background<|eot_id|>


Validating:  36%|‚ñà‚ñà‚ñà‚ñå      | 36/100 [03:38<06:39,  6.25s/it]


üìå GT: Wood Texture: A warm wood texture.
ü§ñ OUT: Wood Finish Flooring: Real wood textured background
üéØ BLEU: 0.0393
Rose Flowers Background: Bouquet of roses flowers, closeup, macro<|eot_id|>


Validating:  37%|‚ñà‚ñà‚ñà‚ñã      | 37/100 [03:43<06:16,  5.98s/it]


üìå GT: Rose: Profile of a reddish pink rose in full bloom
ü§ñ OUT: Rose Flowers Background: Bouquet of roses flowers, closeup, macro
üéØ BLEU: 0.0255
Man Watching The Sunset At Desert: man watching the sunset in the mountains in desert, dramatic and inspirational silhouette image of man thinking and feeling inspirational<|eot_id|>


Validating:  38%|‚ñà‚ñà‚ñà‚ñä      | 38/100 [03:50<06:29,  6.28s/it]


üìå GT: Silhouette: Silhouette of a Visitor to Joshua Tree National Park
ü§ñ OUT: Man Watching The Sunset At Desert: man watching the sunset in the mountains in desert, dramatic and inspirational silhouette image of man thinking and feeling inspirational
üéØ BLEU: 0.0086
A Tiny Crab: Little brown crab on tree with water drops<|eot_id|>


Validating:  39%|‚ñà‚ñà‚ñà‚ñâ      | 39/100 [03:55<06:00,  5.90s/it]


üìå GT: Fairy Hat On Plant: Tiny inverted plant part that looks like the hat for a fairy
ü§ñ OUT: A Tiny Crab: Little brown crab on tree with water drops
üéØ BLEU: 0.0157
White-bellied Whistling Duck: White-bellied whistling duck in closeup on dark background<|eot_id|>


Validating:  40%|‚ñà‚ñà‚ñà‚ñà      | 40/100 [04:01<05:58,  5.98s/it]


üìå GT: Keen Eye Of Spurwinged Goose: Close view of keen eye of spurwinged goose
ü§ñ OUT: White-bellied Whistling Duck: White-bellied whistling duck in closeup on dark background
üéØ BLEU: 0.0000
Gray Surface: Gray wall<|eot_id|>


Validating:  41%|‚ñà‚ñà‚ñà‚ñà      | 41/100 [04:05<05:07,  5.20s/it]


üìå GT: Gray Gradient Natural Background: Gray fog color gradient in natural occurring event
ü§ñ OUT: Gray Surface: Gray wall
üéØ BLEU: 0.0129
Stained Glass Wallpaper Seamless: Abstract seamless 

In [10]:
# ‚úÖ ËºâÂÖ•Ê®°Âûã

FastVisionModel.for_inference(model) # Enable for inference!
model.eval().to("cuda")


# ‚úÖ ËºâÂÖ• test Ë≥áÊñô
test_df = pd.read_parquet("test_data.parquet")
test_dataset = Dataset.from_pandas(test_df)

messages = [
    {"role": "user", "content": [
        {"type": "image"},
        {"type": "text", "text": instruction}
    ]}
]

# ‚úÖ bytes ‚ûú PIL image
def convert_bytes_to_pil(image_bytes):
    return Image.open(BytesIO(image_bytes)).convert("RGB")

def clean_caption(text):
    # ÁßªÈô§ÁâπÊÆä token
    text = re.sub(r"<\|.*?\|>", "", text)

    # ÁßªÈô§ËßíËâ≤ÂêçËàá user prompt
    text = re.sub(r"user\s*\n*", "", text, flags=re.IGNORECASE)

    # ÁßªÈô§ prompt Êú¨Ë∫´ÔºàÈò≤Ê≠¢Ê®°ÂûãË§áËø∞Ôºâ
    prompt = "You are an expert radiographer. Describe accurately what you see in this image."
    text = text.replace(prompt, "")
    text = text.replace("assistant", "")
    text = re.sub(r"\n+", " ", text)

    return text.strip()

# ‚úÖ Êé®ÁêÜÂáΩÊï∏ÔºàÂñÆÂºµÂúñÔºâ
def predict_caption(image_bytes):
    image = convert_bytes_to_pil(image_bytes)
    input_text = tokenizer.apply_chat_template(messages, add_generation_prompt = True)
    inputs = tokenizer(
        image,
        input_text,
        add_special_tokens=False,
        return_tensors="pt",
    ).to("cuda")
    text_streamer = TextStreamer(tokenizer, skip_prompt = True)
    outputs = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 128,
                   use_cache = True, temperature = 1.5, min_p = 0.1)

    decoded = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    if "<|assistant|>" in decoded:
        caption = decoded.split("<|assistant|>")[-1].strip()
    else:
        caption = decoded.strip()

    # Ê∏ÖÈô§‰ªª‰ΩïÊÆòÁïôÁâπÊÆä token
    return clean_caption(caption)


# ‚úÖ Êé®ÁêÜÂÖ®ÈÉ® test data
predictions = []
for row in tqdm(test_dataset, total=len(test_dataset)):
    image_bytes = row["image"]["bytes"]
    caption = predict_caption(image_bytes)
    predictions.append(caption)

# ‚úÖ ÂÑ≤Â≠òÈ†êÊ∏¨ÁµêÊûú
test_df["caption"] = predictions
test_df[["idx", "caption"]].to_csv("test_predictions.csv", index=False)

  0%|          | 0/100 [00:00<?, ?it/s]

Christmas Bokeh Lights Background: Yellow Lights Bokeh Background for Christmas and New Year Celebration<|eot_id|>


  1%|          | 1/100 [00:30<49:55, 30.26s/it]

Zion Park - Zion National Park, Utah, United States<|eot_id|>


  2%|‚ñè         | 2/100 [00:35<25:22, 15.54s/it]

Palm And Monstera Frame: Background<|eot_id|>


  3%|‚ñé         | 3/100 [00:39<16:41, 10.32s/it]

Christmas Pattern Background: Background of modern Christmas pattern design<|eot_id|>


  4%|‚ñç         | 4/100 [00:43<12:40,  7.92s/it]

Christmas 2022: Christmas postage stamps<|eot_id|>


  5%|‚ñå         | 5/100 [00:48<10:24,  6.57s/it]

Vintage Style Ikat Watercolor Design Pattern 24: Vintage style watercolour design pattern 24 for scrapbooking and paper crafting<|eot_id|>


  6%|‚ñå         | 6/100 [00:54<10:23,  6.63s/it]

Close Up Silver Bead Work: Close up image of silver bead work<|eot_id|>


  7%|‚ñã         | 7/100 [01:00<09:39,  6.23s/it]

Dots Polka: Polka dots seamless pattern on a turquoise background<|eot_id|>


  8%|‚ñä         | 8/100 [01:05<08:57,  5.84s/it]

Pigeon And Swirls Christmas Card: Graphic of white dove and swirls on a blue background for Christmas and Winter seasons<|eot_id|>


  9%|‚ñâ         | 9/100 [01:12<09:34,  6.31s/it]

Cochem Castle: Cochem Castle (Burg Cochem)<|eot_id|>


 10%|‚ñà         | 10/100 [01:17<08:41,  5.80s/it]

Seagull On Moss Covered Rocks: Seagull On Rocks In Ocean<|eot_id|>


 11%|‚ñà         | 11/100 [01:22<08:19,  5.61s/it]

Kite Flying: Kite Flying In Blue Sky<|eot_id|>


 12%|‚ñà‚ñè        | 12/100 [01:26<07:40,  5.24s/it]

Young Elk Standing In The Snow: Young elk standing in the snow in Rocky Mountains<|eot_id|>


 13%|‚ñà‚ñé        | 13/100 [01:31<07:33,  5.22s/it]

Fitness Teen 02: Young fit woman posing with red dumbbells on a white background<|eot_id|>


 14%|‚ñà‚ñç        | 14/100 [01:37<07:46,  5.42s/it]

Fluffy Dog On Bed: Pomeranian in a dog bed, top-down view<|eot_id|>


 15%|‚ñà‚ñå        | 15/100 [01:43<07:39,  5.41s/it]

Purple Flowers 5: Purple Flowers 5<|eot_id|>


 16%|‚ñà‚ñå        | 16/100 [01:47<07:16,  5.20s/it]

Christmas Wallpaper 7: Christmas wallpaper 7 with white background<|eot_id|>


 17%|‚ñà‚ñã        | 17/100 [01:52<06:55,  5.00s/it]

Love Heart In The Sand: Heart drawn on sand beach<|eot_id|>


 18%|‚ñà‚ñä        | 18/100 [01:56<06:33,  4.80s/it]

Old Town Bridge At Night: Wooden bridge over water, between old warehouses<|eot_id|>


 19%|‚ñà‚ñâ        | 19/100 [02:01<06:35,  4.89s/it]

Alien Head Art: Closeup of the face of an alien in a colorful glow, artistic style and effects<|eot_id|>


 20%|‚ñà‚ñà        | 20/100 [02:08<07:03,  5.30s/it]

Close-up Of Easter Egg: Colorful dyed easter eggs in egg tray<|eot_id|>


 21%|‚ñà‚ñà        | 21/100 [02:13<07:05,  5.38s/it]

Velvet Fabric Textures 16: Velvety textiles textures background<|eot_id|>


 22%|‚ñà‚ñà‚ñè       | 22/100 [02:18<06:55,  5.32s/it]

Ti Plant Leaves: Close-up of ti plant leaves<|eot_id|>


 23%|‚ñà‚ñà‚ñé       | 23/100 [02:23<06:33,  5.11s/it]

Mountain Lion - Mountain Lion peering down from a tree.<|eot_id|>


 24%|‚ñà‚ñà‚ñç       | 24/100 [02:28<06:15,  4.94s/it]

Glitter Texture Paper: Background pattern design<|eot_id|>


 25%|‚ñà‚ñà‚ñå       | 25/100 [02:31<05:48,  4.64s/it]

Snow-Heavy Trees: Snow-covered winter tree branches on an overcast day<|eot_id|>


 26%|‚ñà‚ñà‚ñå       | 26/100 [02:37<06:02,  4.90s/it]

Paper Wasp Nest With Wasps, Vespula, Nest: Paper wasp nest with wasps Vespula<|eot_id|>


 27%|‚ñà‚ñà‚ñã       | 27/100 [02:44<06:37,  5.44s/it]

Valais Alps In Winter: Switzerland: Alps, snow, mountains, tourism, vacation, skiing, winter<|eot_id|>


 28%|‚ñà‚ñà‚ñä       | 28/100 [02:50<06:55,  5.77s/it]

Single Rose: One rose in close-up<|eot_id|>


 29%|‚ñà‚ñà‚ñâ       | 29/100 [02:54<06:11,  5.23s/it]

Christmas Victorian Antique Santa Claus: Victorian-style antique christmas card with santa claus on a penny farthing bike.<|eot_id|>


 30%|‚ñà‚ñà‚ñà       | 30/100 [03:01<06:38,  5.69s/it]

Watercolor Vintage Christmas Background: Watercolor vintage Christmas background with stained glass design. Vintage paper texture and effect. Holiday greeting card or scrapbooking design<|eot_id|>


 31%|‚ñà‚ñà‚ñà       | 31/100 [03:08<07:09,  6.22s/it]

Underwater Man: Underwater man<|eot_id|>


 32%|‚ñà‚ñà‚ñà‚ñè      | 32/100 [03:13<06:19,  5.58s/it]

Broken Building: A very old house ruin, nature grows in again.<|eot_id|>


 33%|‚ñà‚ñà‚ñà‚ñé      | 33/100 [03:17<05:58,  5.35s/it]

Yellow To Red Gradient Background: Gradient background: from yellow to green to red. Abstract 2<|eot_id|>


 34%|‚ñà‚ñà‚ñà‚ñç      | 34/100 [03:23<06:04,  5.53s/it]

Ocean Waves Texture: A close-up texture of ocean waves.<|eot_id|>


 35%|‚ñà‚ñà‚ñà‚ñå      | 35/100 [03:28<05:40,  5.23s/it]

Carpathian Mountains, Autumn: Autumn in the Carpathian mountains of Ukraine.<|eot_id|>


 36%|‚ñà‚ñà‚ñà‚ñå      | 36/100 [03:33<05:36,  5.26s/it]

Night Stars: Night stars in a forest, night time.<|eot_id|>


 37%|‚ñà‚ñà‚ñà‚ñã      | 37/100 [03:38<05:29,  5.23s/it]

Black Background Vintage Texture: Vintage black background<|eot_id|>


 38%|‚ñà‚ñà‚ñà‚ñä      | 38/100 [03:42<04:59,  4.83s/it]

Seamless Abstract Paper 24: Paper: Seamless abstract paper<|eot_id|>


 39%|‚ñà‚ñà‚ñà‚ñâ      | 39/100 [03:47<04:52,  4.79s/it]

Water Reflection In Water, Background<|eot_id|>


 40%|‚ñà‚ñà‚ñà‚ñà      | 40/100 [03:51<04:28,  4.47s/it]

New Year's Background 2018: A 2018 Happy New Year background<|eot_id|>


 41%|‚ñà‚ñà‚ñà‚ñà      | 41/100 [03:56<04:35,  4.67s/it]

Anna Hummingbird, Costa Rica: A hummingbird on a feeder<|eot_id|>


 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 42/100 [04:01<04:47,  4.95s/it]

Happy New Year 2022: Two glasses, blue and yellow for the New Year<|eot_id|>


 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 43/100 [04:07<04:56,  5.21s/it]

Flaming Starlight Snowflake: Flaming snowflake design art for backgrounds<|eot_id|>


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 44/100 [04:13<04:56,  5.29s/it]

Small Planes In Hangar: Three small aircraft in front of an aircraft hangar<|eot_id|>


 45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 45/100 [04:18<04:55,  5.37s/it]

Tis The Season: Patterned paper: Merry Christmas<|eot_id|>


 46%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 46/100 [04:23<04:33,  5.06s/it]

Sailboat On The Ocean: Sailboat on the ocean.<|eot_id|>


 47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 47/100 [04:27<04:25,  5.01s/it]

Digital art painting: 2 - Christmas art: 2, 2 Christmas boat<|eot_id|>


 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 48/100 [04:33<04:26,  5.12s/it]

Patterned: 3d tile texture and ornament tile art with the six six-pointed stars graphic and golden red colors design on the white background. Ornamental graphic tile ornamentation with the golden red six-pointed star pattern tile 3d pattern texture wallpaper 2020: 3d tile texture and ornament tile art with the six six-pointed stars graphic and golden red colors design on the white background<|eot_id|>


 49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 49/100 [04:50<07:29,  8.82s/it]

Scenic Montana Riverbed: A river runs through the scenic mountains of Glacier National Park.<|eot_id|>


 50%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 50/100 [04:56<06:33,  7.88s/it]

Rudolph The Red-Nosed Reindeer Christmas Stamp: Rudolph the Red Nosed Reindeer Christmas Stamp<|eot_id|>


 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 51/100 [05:03<06:14,  7.64s/it]

Vintage Floral Background Paper 23: Vintage background paper with flowers, swirls and leaves<|eot_id|>


 52%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè    | 52/100 [05:08<05:32,  6.93s/it]

Red Valentine: A festive, red and gold digital design for scrapbooking and card making, suitable for any type of holiday<|eot_id|>


 53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 53/100 [05:16<05:34,  7.12s/it]

Waterfall In A Forest: Waterfall in tropical forest with bamboo on the left<|eot_id|>


 54%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 54/100 [05:21<05:00,  6.53s/it]

Joyeux Noel: Christmas holiday greeting card, red poinsettia flowers, Christmas ornaments, silver and gold, vintage color. Happy Holidays card, Christmas and new years greetings. Paper and digital watercolor art<|eot_id|>


 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 55/100 [05:31<05:41,  7.58s/it]

Background Art 11: Texture, color and design of red, orange and gold watercolor paint on dark background for scrapbooking and design<|eot_id|>


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 56/100 [05:39<05:38,  7.68s/it]

Snowy Street At Night: Snow covered tree and road illuminated by a street light at night.<|eot_id|>


 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 57/100 [05:45<05:08,  7.18s/it]

Christmas Scrap Paper: Paper scrap<|eot_id|>


 58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 58/100 [05:49<04:16,  6.12s/it]

Love Valentine's Day 2019: Vintage romantic style scrapbook paper for scrapbooking and others<|eot_id|>


 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 59/100 [05:54<04:05,  5.98s/it]

Frog Peppermint Wallpaper Background: Peppermint frog wallpaper background<|eot_id|>


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 60/100 [05:59<03:46,  5.66s/it]

Moray Eel Close-Up: Close-up image of an eel.<|eot_id|>


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 61/100 [06:05<03:36,  5.56s/it]

Hot Air Balloon Ride: Colorful hot air balloons.<|eot_id|>


 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 62/100 [06:09<03:21,  5.30s/it]

Cattle Grazing In Pasture: Cattle grazing in a pasture<|eot_id|>


 63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 63/100 [06:14<03:15,  5.29s/it]

Floral Abstract Background Design 28: Digital artwork<|eot_id|>


 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 64/100 [06:19<02:59,  4.98s/it]

The Red Ringed Commodore Butterfly: Red ringed commodore butterfly, India<|eot_id|>


 65%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 65/100 [06:24<02:57,  5.07s/it]

Abstract Black Plaid Texture Background: Seamless pattern with modern black plaid design<|eot_id|>


 66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 66/100 [06:29<02:54,  5.13s/it]

Happy Holidays: Holiday Season Card with decorative bow and mistletoe on a dark background. Seasonal holiday card with decorations<|eot_id|>


 67%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã   | 67/100 [06:36<03:03,  5.57s/it]

Plaid Red And Green Background: Plaid red and green background<|eot_id|>


 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 68/100 [06:41<02:53,  5.41s/it]

Metal Honeycomb Background: Gold and silver honeycomb patterned metal texture<|eot_id|>


 69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 69/100 [06:46<02:43,  5.28s/it]

Droplet Rose Wallpaper: Water drops on a pink rose with yellow roses in background.<|eot_id|>


 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 70/100 [06:52<02:45,  5.51s/it]

Easter Eggs: Easter eggs with blue background<|eot_id|>


 71%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà   | 71/100 [06:56<02:26,  5.07s/it]

Stairs to Mont Blanc: Stairs and observation deck at Mont Blanc, France<|eot_id|>


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 72/100 [07:01<02:22,  5.10s/it]

Old American Flag: Close-up of a crinkled american flag<|eot_id|>


 73%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 73/100 [07:06<02:15,  5.03s/it]

Watercolor: Abstract watercolor design pattern textured colorful background colorful textured modern graphic design vector vintage art pattern colorful illustration<|eot_id|>


 74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 74/100 [07:12<02:18,  5.34s/it]

Tree By The Pond: Trees, grass and a water hole<|eot_id|>


 75%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 75/100 [07:17<02:11,  5.25s/it]

Valentine's Day Card: Valentine's day card.<|eot_id|>


 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 76/100 [07:21<01:59,  4.98s/it]

Happy Birthday Banner: Birthday Banner, colorful and colorful letters Happy Birthday on the flag. Paper art and design<|eot_id|>


 77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 77/100 [07:28<02:06,  5.51s/it]

Christmas Artificial Flower Bouquet: Decorative christmas floral bouquet<|eot_id|>


 78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 78/100 [07:33<01:54,  5.23s/it]

Christmas Ornament On Snow: Glass Christmas ornament isolated on a black background<|eot_id|>


 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 79/100 [07:38<01:47,  5.11s/it]

Abstract 14: Artwork by Frank Merrifield<|eot_id|>


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 80/100 [07:42<01:38,  4.93s/it]

Snowman Doll: Christmas snowman doll<|eot_id|>


 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 81/100 [07:46<01:27,  4.61s/it]

Steel Curtain - Ride of Steel Curtain Ride at Night, Sandusky, OH - The steel coaster 'Steel Curtain' at night<|eot_id|>


 82%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè | 82/100 [07:53<01:37,  5.41s/it]

Reindeer: Colorful christmas reindeer, hand-drawn line art graphic vector, rasterized illustration<|eot_id|>


 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 83/100 [08:00<01:37,  5.72s/it]

Spring Seamless Floral Pattern: Floral seamless background with many spring flowers, leaves and buds: Leaf, grass, spring flowers: Vector Design For Scrapbooking, Wallpaper, Wrapping Paper<|eot_id|>


 84%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 84/100 [08:09<01:46,  6.65s/it]

Red Lens Closeup: Red glass background<|eot_id|>


 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 85/100 [08:12<01:27,  5.82s/it]

Venice Canal And Bridge: Canal in Venice Italy with Rialto Bridge and gondola ride<|eot_id|>


 86%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå | 86/100 [08:19<01:23,  5.97s/it]

Digital Background Abstract Color: Art abstract modern texture colorful digital art decorative artistic design<|eot_id|>


 87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 87/100 [08:24<01:14,  5.76s/it]

Abstract Watercolor Art: Abstract watercolor art background for scrapbooking, journaling, and others<|eot_id|>


 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 88/100 [08:30<01:11,  5.92s/it]

Christmas Pudding: Christmas Pudding - Vintage<|eot_id|>


 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ | 89/100 [08:35<00:59,  5.45s/it]

Gold Sequin Background: Gold sequins background<|eot_id|>


 90%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 90/100 [08:39<00:50,  5.01s/it]

Squirrel Hiding in Wood: A gray squirrel with a brownish tint lays in wood shavings<|eot_id|>


 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 91/100 [08:45<00:47,  5.33s/it]

Snow Field 2: Snowy winter landscape<|eot_id|>


 92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 92/100 [08:49<00:39,  4.94s/it]

Background Metal Plaque: Colorful backgrounds with an ornate border<|eot_id|>


 93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 93/100 [08:54<00:35,  5.00s/it]

Bokeh Background Green: Background texture abstract bokeh effect<|eot_id|>


 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 94/100 [08:58<00:29,  4.87s/it]

Abstract Color Background: Abstract color texture background design graphics<|eot_id|>


 95%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 95/100 [09:03<00:23,  4.67s/it]

Petroleum Refinery Storage Tanks: A collection of storage tanks in an oil refinery<|eot_id|>


 96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 96/100 [09:08<00:19,  4.90s/it]

Neon Lights: Close-up image of neon lights at a park in Atlanta, GA.<|eot_id|>


 97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 97/100 [09:14<00:15,  5.07s/it]

Christmas Tree Lights: Christmas tree decoration with snowflake shaped lights on white door background<|eot_id|>


 98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 98/100 [09:19<00:10,  5.24s/it]

Christmas Tree Window Art: Digital Art Illustration of Christmas Tree Window Art<|eot_id|>


 99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 99/100 [09:24<00:05,  5.13s/it]

Holly Paper: Christmas wrapping paper: holly with berries and leaves on cream background<|eot_id|>


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 100/100 [09:30<00:00,  5.70s/it]


In [11]:
import pandas as pd
import json

# ËÆÄÂÖ• CSV Ê™îÊ°à
df = pd.read_csv("test_predictions.csv")

# Â∞á DataFrame ËΩâÁÇ∫Á¨¶ÂêàÊ†ºÂºèÁöÑ JSON ÁµêÊßã
results = [{"idx": int(row["idx"]), "output": row["caption"]} for _, row in df.iterrows()]

# ÂÑ≤Â≠òÁÇ∫ JSON Ê™îÊ°àÔºà‰æãÂ¶Ç output_submission.jsonÔºâ
with open("submission.json", "w", encoding="utf-8") as f:
    json.dump(results, f, indent=4, ensure_ascii=False)

print("‚úÖ Â∑≤ÊàêÂäüËΩâÊèõÁÇ∫ submission.json Ê†ºÂºè")


‚úÖ Â∑≤ÊàêÂäüËΩâÊèõÁÇ∫ submission.json Ê†ºÂºè


In [12]:
from google.colab import files
files.download("submission.json")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Ê®°ÂûãË®≠ÂÆöÔºö

Vision Â±§ÂæÆË™ø: finetune_vision_layers = True

Ë™ûË®ÄÂ±§ÂæÆË™ø: finetune_language_layers = True

Ê≥®ÊÑèÂäõÂ±§ÂæÆË™ø: finetune_attention_modules = True

MLP Â±§ÂæÆË™ø: finetune_mlp_modules = True

LoRA: r = 32, lora_alpha = 32Ôºålora_dropout = 0.05

ÂÖ∂‰ªñ: use_rslora = True

Ë®ìÁ∑¥ÊâπÊ¨°Â§ßÂ∞è: per_device_train_batch_size = 2

Ê¢ØÂ∫¶Á¥ØÁ©çÊ≠•Êï∏: gradient_accumulation_steps = 8

Â≠∏ÁøíÁéá: learning_rate = 2e-4

ÂÑ™ÂåñÂô®: ‰ΩøÁî® "adamw_8bit"

Â≠∏ÁøíÁéáË™øÂ∫¶: "cosine"„ÄÇ

ÊúÄÂ§ßÊ≠•Êï∏: max_steps = 30



ÊàëÁöÑcodeÂïüÁî®‰∫Ü finetune_vision_layers = TrueÔºåÂ∞çË™ûË®ÄÂ±§ÈÄ≤Ë°åÂæÆË™øÔºåÈÇÑÂ∞çË¶ñË¶∫Â±§ÈÄ≤Ë°åÂæÆË™ø„ÄÇÈÄôÂ∞çÊñºÂ§öÊ®°ÊÖãÊ®°Âûã‰æÜË™™ÊòØÈáçË¶ÅÁöÑÔºåÂ∞§ÂÖ∂ÊòØÂú®ÂúñÂÉèÂíåÊñáÊú¨‰∫íÂãïÁöÑ‰ªªÂãô‰∏≠ÔºåË¶ñË¶∫Â±§ÁöÑÂæÆË™øËÉΩÂπ´Âä©Ê®°ÂûãÊõ¥Â•ΩÂú∞ÁêÜËß£ÂíåÁîüÊàêË¶ñË¶∫ÂÖßÂÆπ„ÄÇ

LoRAÈÖçÁΩÆÔºö

‰ΩøÁî®‰∫ÜÊõ¥Â§ßÁöÑ r  Âíå lora_alphaÔºå‰ª•ÂèäÁ®çÂæÆÊèêÈ´òÁöÑ lora_dropout „ÄÇÈÄô‰∫õËÆäÊõ¥ÊúâÂä©ÊñºÂ¢ûÂä†Ê®°ÂûãÁöÑË°®ÈÅîËÉΩÂäõÔºåÂèØËÉΩÊúÉÊèêÂçáÊÄßËÉΩÔºå‰ΩÜÂêåÊôÇ‰πüÂ¢ûÂä†‰∫ÜÈÅéÊì¨ÂêàÁöÑÈ¢®Èö™„ÄÇuse_rslora = True ÈÄôÂÄãË®≠ÁΩÆÂèØËÉΩÊúâÂä©ÊñºÈôç‰ΩéÈÅéÊì¨ÂêàÔºå‰∏¶ÊèêÈ´òÊ®°ÂûãÂú®‰∏çÂêåÊï∏ÊìöÈõÜ‰∏äÁöÑÁ©©ÂÆöÊÄß„ÄÇ

Ë®ìÁ∑¥Ë®≠ÂÆöÁöÑËÆäÂåñÔºö

Â¢ûÂä†‰∫Ü gradient_accumulation_steps = 8ÔºåÈÄôÊÑèÂë≥ËëóÊØèÊ¨°Êõ¥Êñ∞Ê®°ÂûãÂèÉÊï∏ÊôÇÊúÉÁ¥ØÁ©çÊõ¥Â§öÊ≠•È©üÔºåÂæûËÄåÊ∏õÂ∞ëÈ°ØÂ≠òÁöÑÈúÄÊ±Ç‰∏¶‰∏îËÉΩÂ§†ËôïÁêÜÊõ¥Â§ßÁöÑÊâπÊ¨°„ÄÇ

Ë™øÊï¥‰∫ÜÂ≠∏ÁøíÁéáË™øÂ∫¶ÊñπÂºèÔºåÁî±ÂéüÊú¨ÁöÑ linear Ë™øÂ∫¶ËÆäÁÇ∫ cosine Ë™øÂ∫¶„ÄÇÈÄôÊ®£ÁöÑÂ≠∏ÁøíÁéáË°∞Ê∏õÊúâÂä©ÊñºÂú®Ë®ìÁ∑¥ÂæåÊúüÊõ¥Á©©ÂÆöÂú∞Êî∂ÊñÇÔºåÈÅøÂÖçË®ìÁ∑¥ÈÅéÁ®ã‰∏≠ÁöÑÂäáÁÉàËÆäÂåñ„ÄÇ