In [None]:
from google.colab import files
files.upload()  # Burada kaggle.json dosyasını yükle

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json


Saving kaggle.json to kaggle.json


In [None]:
import zipfile

!kaggle competitions download -c obss-intern-competition-2025


zipfile.ZipFile("obss-intern-competition-2025.zip").extractall("caption_data")


Downloading obss-intern-competition-2025.zip to /content
 93% 924M/995M [00:00<00:00, 1.34GB/s]
100% 995M/995M [00:00<00:00, 1.35GB/s]


In [None]:
!pip install transformers



In [None]:
# Tüm belleği sıfırlamak için
import os
os.kill(os.getpid(), 9)


In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) y
Token is valid (permission: fineGrained).
The token `obss2` has been saved to /root/.cache/huggingface/stored_tokens
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authen

In [None]:
# ✅ Gerekli kütüphaneleri yükle
!pip install -q transformers accelerate datasets

import os
import pandas as pd
from PIL import Image
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
from torch.optim import AdamW


# Ayarlar
model_id = "google/paligemma-3b-pt-224"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
batch_size = 2
learning_rate = 5e-6
num_epochs = 4

# Dataset sınıfı
class MyCaptionDataset(Dataset):
    def __init__(self, csv_path):
        df = pd.read_csv(csv_path)
        self.image_paths = df["image_id"].apply(lambda x: f"/content/caption_data/train/train/{x}.jpg").tolist()
        self.captions = df["caption"].tolist()

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB").resize((224, 224))
        return {
            "image": image,
            "caption": self.captions[idx]
        }

    def __len__(self):
        return len(self.image_paths)

# Dataset'i yükle ve böl
full_dataset = MyCaptionDataset("/content/caption_data/train.csv")
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Processor ve model
processor = AutoProcessor.from_pretrained(model_id)
model = PaliGemmaForConditionalGeneration.from_pretrained(model_id).to(device=device, dtype=dtype)

def collate_fn(batch):
    images = [item["image"] for item in batch]
    prompts = ["<image> Caption the image." for _ in batch]  # ✅ önemli düzeltme
    suffixes = [item["caption"] for item in batch]

    inputs = processor(
        images=images,
        text=prompts,
        suffix=suffixes,
        return_tensors="pt",
        padding="longest",
    ).to(device)

    return inputs


# DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

# Optimizer
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Eğitim döngüsü
model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Loss: {total_loss/len(train_loader):.4f}")

# Modeli kaydet
model.save_pretrained("paligemma-4e")
processor.save_pretrained("paligemma-4e")
print("✅ Eğitim tamamlandı ve model kaydedildi.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Epoch 1/4: 100%|██████████| 8547/8547 [38:20<00:00,  3.72it/s]


Epoch 1 Loss: 1.4125


Epoch 2/4: 100%|██████████| 8547/8547 [38:18<00:00,  3.72it/s]


Epoch 2 Loss: 1.2966


Epoch 3/4: 100%|██████████| 8547/8547 [38:12<00:00,  3.73it/s]


Epoch 3 Loss: 1.2741


Epoch 4/4: 100%|██████████| 8547/8547 [38:00<00:00,  3.75it/s]


Epoch 4 Loss: 1.2635
✅ Eğitim tamamlandı ve model kaydedildi.


In [None]:
# Google Drive'ı bağla
from google.colab import drive
drive.mount('/content/drive')

# Model klasörünü Drive'a kopyala
!cp -r /content/paligemma-4e /content/drive/MyDrive/paligemma-4e
print("✅ Model başarıyla Drive'a kopyalandı.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Model başarıyla Drive'a kopyalandı.


2 epoch ekleme

In [None]:
# !pip install -q transformers accelerate datasets

import os
import pandas as pd
from PIL import Image
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader, random_split
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
from torch.optim import AdamW

# Ortam ayarları
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
batch_size = 2
learning_rate = 5e-6
additional_epochs = 2  # 5. ve 6. epoch #7 ve 8

# Dataset sınıfı
class MyCaptionDataset(Dataset):
    def __init__(self, csv_path):
        df = pd.read_csv(csv_path)
        self.image_paths = df["image_id"].apply(lambda x: f"/content/caption_data/train/train/{x}.jpg").tolist()
        self.captions = df["caption"].tolist()

    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB").resize((224, 224))
        return {
            "image": image,
            "caption": self.captions[idx]
        }

    def __len__(self):
        return len(self.image_paths)

# Dataset'i yükle ve böl
full_dataset = MyCaptionDataset("/content/caption_data/train.csv")
train_size = int(0.8 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

# Daha önce eğitilen modeli ve processor'ü yükle
model_path = "/content/drive/MyDrive/paligemma-10e"  # ya da "/content/drive/MyDrive/paligemma-4e"
processor = AutoProcessor.from_pretrained(model_path)
model = PaliGemmaForConditionalGeneration.from_pretrained(model_path).to(device=device, dtype=dtype)

# Collate function
def collate_fn(batch):
    images = [item["image"] for item in batch]
    prompts = ["<image> Caption the image." for _ in batch]
    suffixes = [item["caption"] for item in batch]

    inputs = processor(
        images=images,
        text=prompts,
        suffix=suffixes,
        return_tensors="pt",
        padding="longest",
    ).to(device)

    return inputs

# DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn)

#  Optimizer
optimizer = AdamW(model.parameters(), lr=learning_rate)

model.train()
for epoch in range(10, 10 + additional_epochs):  # 7. ve 8. epoch
    total_loss = 0
    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/10"):
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()
    print(f"Epoch {epoch+1} Loss: {total_loss / len(train_loader):.4f}")

# Güncellenmiş modeli kaydet
model.save_pretrained("paligemma-12e")
processor.save_pretrained("paligemma-12e")
print("✅ 6 epoch tamamlandı ve model 'paligemma-12e' olarak kaydedildi.")
#kullanmadım sadece deneme amaçlı 12 epocha kadar eğittim.

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Epoch 11/10: 100%|██████████| 8547/8547 [37:42<00:00,  3.78it/s]


Epoch 11 Loss: 1.1509


Epoch 12/10: 100%|██████████| 8547/8547 [37:41<00:00,  3.78it/s]


Epoch 12 Loss: 1.1227
✅ 6 epoch tamamlandı ve model 'paligemma-12e' olarak kaydedildi.


In [None]:
# Google Drive'ı bağla
from google.colab import drive
drive.mount('/content/drive')

# Model klasörünü Drive'a kopyala
!cp -r /content/paligemma-10e /content/drive/MyDrive/paligemma-10e
print("✅ Model başarıyla Drive'a kopyalandı.")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
✅ Model başarıyla Drive'a kopyalandı.


In [None]:
import pandas as pd
from tqdm import tqdm
from PIL import Image
import torch
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
import os

# Modeli yükle /content/drive/MyDrive/paligemma-ft2
model_path = "/content/drive/MyDrive/paligemma-4e"  # V1 için
model = PaliGemmaForConditionalGeneration.from_pretrained(model_path).to("cuda")
processor = AutoProcessor.from_pretrained(model_path)

# Test verisi yükle
test_df = pd.read_csv("/content/caption_data/test.csv")  # test.csv: image_id içermeli
test_dir = "/content/caption_data/test/test"

# Caption'ları üret
captions = []

for image_id in tqdm(test_df["image_id"]):
    image_path = os.path.join(test_dir, f"{image_id}.jpg")
    image = Image.open(image_path).convert("RGB").resize((224, 224))
    #    prompts = ["<image> Describe this image in detail, focusing on objects, setting, and visible actions:" for _ in batch]  # ✅ önemli düzeltme

    prompt = "<image> Caption the image."
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=64)
        caption = processor.batch_decode(output, skip_special_tokens=True)[0]
        caption = caption.replace(prompt, "").strip()  # promptu sil

    captions.append(caption)

# CSV olarak kaydet
submission = pd.DataFrame({"image_id": test_df["image_id"], "caption": captions})
submission.to_csv("submission_4e.csv", index=False)
print("✅ Submission oluşturuldu: submission_4e.csv")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
100%|██████████| 3771/3771 [40:45<00:00,  1.54it/s]

✅ Submission oluşturuldu: submission_4e.csv





In [None]:
import pandas as pd
from tqdm import tqdm
from PIL import Image
import torch
from transformers import AutoProcessor, PaliGemmaForConditionalGeneration
import os

# Modeli yükle /content/drive/MyDrive/paligemma-ft2
model_path = "/content/drive/MyDrive/paligemma-10e"  # V1 için
model = PaliGemmaForConditionalGeneration.from_pretrained(model_path).to("cuda")
processor = AutoProcessor.from_pretrained(model_path)

# Test verisi yükle
test_df = pd.read_csv("/content/caption_data/test.csv")  # test.csv: image_id içermeli
test_dir = "/content/caption_data/test/test"

# Caption'ları üret
captions = []

for image_id in tqdm(test_df["image_id"]):
    image_path = os.path.join(test_dir, f"{image_id}.jpg")
    image = Image.open(image_path).convert("RGB").resize((224, 224))
    #    prompts = ["<image> Describe this image in detail, focusing on objects, setting, and visible actions:" for _ in batch]  # ✅ önemli düzeltme

    prompt = "<image> Caption the image."
    inputs = processor(images=image, text=prompt, return_tensors="pt").to("cuda")

    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=64)
        caption = processor.batch_decode(output, skip_special_tokens=True)[0]
        caption = caption.replace(prompt, "").strip()  # promptu sil

    captions.append(caption)

# CSV olarak kaydet
submission = pd.DataFrame({"image_id": test_df["image_id"], "caption": captions})
submission.to_csv("submission_10e.csv", index=False)
print("✅ Submission oluşturuldu: submission_10e.csv")


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 3771/3771 [40:40<00:00,  1.55it/s]

✅ Submission oluşturuldu: submission_10e.csv





In [None]:
import pandas as pd

# Hatalı dosyayı yükle
df = pd.read_csv("/content/submission_10e.csv")

# Caption'lardaki prompt kısmını temizle
df["caption"] = df["caption"].str.replace("(?i)^Caption the image.\\s*[:\\-]?\\s*", "", regex=True).str.strip()

# Yeni dosya olarak kaydet
df.to_csv("submission_cleaned.csv", index=False)
print("✅ Prompt temizlenmiş yeni submission.csv hazırlandı.")


✅ Prompt temizlenmiş yeni submission.csv hazırlandı.


In [None]:
from google.colab import files
files.download("submission_cleaned.csv")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>