In [1]:
!pip -q install -U "pillow<12" transformers peft accelerate bitsandbytes huggingface_hub

import os, gc, torch
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "disabled"

gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

print("✅ installed & ready")

✅ installed & ready


In [2]:
from google.colab import files
import os, zipfile, glob

# upload zip
up = files.upload()  # выбери train_data.zip
zip_files = [fn for fn in up.keys() if fn.lower().endswith(".zip")]
assert zip_files, f"Нужен .zip. Загружено: {list(up.keys())[:5]}"
zip_name = zip_files[0]

# extract
DATA_ROOT = "dataset"
os.makedirs(DATA_ROOT, exist_ok=True)
with zipfile.ZipFile(zip_name, "r") as z:
    z.extractall(DATA_ROOT)

# auto-find metadata/images (на случай вложенных папок)
meta_cands = glob.glob(f"{DATA_ROOT}/**/metadata.jsonl", recursive=True)
img_dir_cands = [p for p in glob.glob(f"{DATA_ROOT}/**/images", recursive=True) if os.path.isdir(p)]

print("metadata candidates:", meta_cands[:5])
print("images dir candidates:", img_dir_cands[:5])

assert meta_cands, "metadata.jsonl не найден в распакованном архиве"
assert img_dir_cands, "папка images не найдена в распакованном архиве"

META_PATH = meta_cands[0]
IMG_DIR = img_dir_cands[0]

print("✅ Using META_PATH:", META_PATH)
print("✅ Using IMG_DIR  :", IMG_DIR)

# quick sanity
import json
with open(META_PATH, "r", encoding="utf-8") as f:
    first = json.loads(f.readline())
print("sample record:", first)
print("sample image exists:", os.path.exists(os.path.join(IMG_DIR, first["file_name"])))

Saving train_data.zip to train_data.zip
metadata candidates: ['dataset/metadata.jsonl']
images dir candidates: ['dataset/images']
✅ Using META_PATH: dataset/metadata.jsonl
✅ Using IMG_DIR  : dataset/images
sample record: {'file_name': '00001.jpg', 'text': 'Ровно по центру экрана игры "Ashes of Creation" стоит спиной персонаж. Нет ни надписи "Fishing", ни NPC c сиреневым ромбом над ним, ни значка взаимодействия "F"'}
sample image exists: True


In [3]:
import torch
from transformers import AutoProcessor, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers.models.lfm2_vl import Lfm2VlForConditionalGeneration

MODEL_ID = "LiquidAI/LFM2-VL-1.6B"

bnb = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)

model = Lfm2VlForConditionalGeneration.from_pretrained(
    MODEL_ID,
    quantization_config=bnb,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True,
)

model.config.use_cache = False

model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=False)

lora = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    target_modules=["q_proj", "v_proj"],
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora)
model.print_trainable_parameters()

print("✅ model loaded")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`torch_dtype` is deprecated! Use `dtype` instead!
You are using a model of type lfm2_vl to instantiate a model of type lfm2-vl. This is not supported for all configurations of models and can yield errors.


trainable params: 1,277,952 || all params: 1,586,081,952 || trainable%: 0.0806
✅ model loaded


In [4]:
import os, json
import torch
from PIL import Image
from torch.utils.data import Dataset
from transformers import TrainingArguments, Trainer

FIX_SIZE = 384

class MyDataset(Dataset):
    def __init__(self, jsonl_path, images_dir, processor):
        self.items = [json.loads(l) for l in open(jsonl_path, "r", encoding="utf-8") if l.strip()]
        self.images_dir = images_dir
        self.processor = processor

    def __len__(self):
        return len(self.items)

    def __getitem__(self, idx):
        it = self.items[idx]
        img_path = os.path.join(self.images_dir, it["file_name"])
        img = Image.open(img_path).convert("RGB")

        # stability: avoid multi-crop issues
        img = img.resize((FIX_SIZE, FIX_SIZE))

        # Minimal stable prompt.
        # NOTE: For your project, you can replace "Describe the image." with your own instruction.
        prompt = f"<image>\nDescribe the image.\nAnswer: {it['text']}"

        enc = self.processor(text=prompt, images=img, return_tensors="pt")
        enc["labels"] = enc["input_ids"].clone()

        return {k: v.squeeze(0) for k, v in enc.items()}

def collate_fn(features):
    out = {}
    for k in features[0]:
        v0 = features[0][k]
        out[k] = torch.stack([f[k] for f in features]) if isinstance(v0, torch.Tensor) else [f[k] for f in features]
    return out

train_ds = MyDataset(META_PATH, IMG_DIR, processor)

args = TrainingArguments(
    output_dir="out",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_strategy="no",
    report_to="none",
    remove_unused_columns=False,
    dataloader_pin_memory=False,
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=train_ds,
    data_collator=collate_fn,
)

trainer.train()
print("✅ training done")

Step,Training Loss
10,11.3053
20,10.5172
30,9.7921
40,9.3508


✅ training done


In [5]:
import os, zipfile

FINAL_DIR = "out/final_adapter"
os.makedirs(FINAL_DIR, exist_ok=True)
model.save_pretrained(FINAL_DIR)

zip_name = "lfm2_adapter.zip"
with zipfile.ZipFile(zip_name, "w", zipfile.ZIP_DEFLATED) as z:
    for root, _, files_ in os.walk(FINAL_DIR):
        for f in files_:
            full = os.path.join(root, f)
            z.write(full, os.path.relpath(full, FINAL_DIR))

print("✅ Saved:", zip_name)

try:
    from google.colab import files
    files.download(zip_name)
except Exception:
    pass

✅ Saved: lfm2_adapter.zip


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [6]:
from google.colab import files
import os, glob, shutil, zipfile

up = files.upload()  # загрузи либо probe.7z, либо набор картинок

# 1) если .7z — распакуем
seven = [fn for fn in up.keys() if fn.lower().endswith(".7z")]
if seven:
    arc = seven[0]
    print("✅ 7z uploaded:", arc)

    !apt-get -qq update
    !apt-get -qq install -y p7zip-full

    os.makedirs("probe", exist_ok=True)
    !7z x -y "{arc}" -oprobe > /content/probe_extract.log

    print("✅ extracted to ./probe (log: /content/probe_extract.log)")

else:
    # 2) иначе считаем, что загрузили картинки
    os.makedirs("probe", exist_ok=True)
    moved = 0
    for fn in list(up.keys()):
        if fn.lower().endswith((".jpg",".jpeg",".png",".webp",".bmp",".tif",".tiff")):
            shutil.move(fn, os.path.join("probe", fn))
            moved += 1
    print("✅ moved images to ./probe:", moved)

# list found images
exts = (".jpg",".jpeg",".png",".webp",".bmp",".tif",".tiff")
paths = sorted([p for p in glob.glob("probe/**/*", recursive=True) if p.lower().endswith(exts)])
print("probe images found:", len(paths))
print("examples:", paths[:20])

assert len(paths) > 0, "В ./probe нет картинок. Проверь, что загрузил правильный архив/файлы."

Saving 00056.jpg to 00056.jpg
Saving 00057.jpg to 00057.jpg
Saving 00058.jpg to 00058.jpg
Saving 00059.jpg to 00059.jpg
Saving 00060.jpg to 00060.jpg
Saving 00061.jpg to 00061.jpg
Saving 00062.jpg to 00062.jpg
Saving 00063.jpg to 00063.jpg
Saving 00064.jpg to 00064.jpg
Saving 00065.jpg to 00065.jpg
Saving 00066.jpg to 00066.jpg
Saving 00067.jpg to 00067.jpg
Saving 00068.jpg to 00068.jpg
Saving 00069.jpg to 00069.jpg
Saving 00070.jpg to 00070.jpg
Saving 00071.jpg to 00071.jpg
Saving 00072.jpg to 00072.jpg
Saving 00073.jpg to 00073.jpg
Saving 00074.jpg to 00074.jpg
Saving 00075.jpg to 00075.jpg
Saving 00076.jpg to 00076.jpg
Saving 00077.jpg to 00077.jpg
Saving 00078.jpg to 00078.jpg
Saving 00079.jpg to 00079.jpg
Saving 00080.jpg to 00080.jpg
Saving 00081.jpg to 00081.jpg
Saving 00082.jpg to 00082.jpg
Saving 00083.jpg to 00083.jpg
Saving 00084.jpg to 00084.jpg
Saving 00085.jpg to 00085.jpg
Saving 00086.jpg to 00086.jpg
Saving 00087.jpg to 00087.jpg
Saving 00088.jpg to 00088.jpg
Saving 000

In [7]:
import glob, os, json
import torch
from PIL import Image

FIX_SIZE = 384
exts = (".jpg",".jpeg",".png",".webp",".bmp",".tif",".tiff")
paths = sorted([p for p in glob.glob("probe/**/*", recursive=True) if p.lower().endswith(exts)])

print("found probe images:", len(paths))

@torch.no_grad()
def infer_one(image_path: str, question: str):
    img = Image.open(image_path).convert("RGB").resize((FIX_SIZE, FIX_SIZE))

    messages = [
        {"role": "system", "content": "Answer briefly."},
        {"role": "user", "content": [
            {"type": "image"},
            {"type": "text", "text": question}
        ]},
    ]

    prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
    enc = processor(text=prompt, images=img, return_tensors="pt")
    for k, v in enc.items():
        if isinstance(v, torch.Tensor):
            enc[k] = v.to(model.device)

    out = model.generate(**enc, max_new_tokens=128, do_sample=False)
    gen = out[0][enc["input_ids"].shape[-1]:]
    return processor.decode(gen, skip_special_tokens=True).strip()

QUESTION = "Describe the image in one sentence."

out_path = "preds.jsonl"
with open(out_path, "w", encoding="utf-8") as f:
    for p in paths:
        ans = infer_one(p, QUESTION)
        rec = {"file_name": os.path.relpath(p, "probe"), "answer": ans}
        f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        print(rec["file_name"], "=>", ans[:150])

print("✅ saved:", out_path)

try:
    from google.colab import files
    files.download(out_path)
except Exception:
    pass

found probe images: 67
00056.jpg => A list of various topics and studies related to the use of social media in the field of biology.
00057.jpg => A list of various topics and studies related to the use of social media in the field of biology.
00058.jpg => A person is riding a horse.
00059.jpg => A list of various topics and studies related to the use of social media in the field of biology.
00060.jpg => A person is standing in a field with a large banner that reads "Stop the Spread of COVID-19" in the background.
00061.jpg => A person is standing in a field with a large banner that reads "Stop the Spread of COVID-19".
00062.jpg => A person is standing in a field with a large banner that reads "Stop the Spread of Misinformation".
00063.jpg => A list of various topics and studies related to the use of social media in the field of biology.
00064.jpg => A list of various topics and studies related to the use of social media in the field of biology.
00065.jpg => A list of various topics and

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>