In [None]:
import os
import random
import glob
import shutil


import json
import yaml

from collections import defaultdict
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

from lxml import etree as ET

import pandas as pd
import matplotlib.pyplot as plt

import cv2

from sklearn.model_selection import train_test_split
from transformers import AutoImageProcessor, AutoModelForObjectDetection
import torch
from PIL import Image, ImageDraw, ImageFont


2025-05-27 16:52:45.590832: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748364765.821328      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748364765.889183      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
from transformers import logging as hf_logging

# suppress everything below ERROR
hf_logging.set_verbosity_error()


In [3]:
classname2idx = {"logo": 0}
idx2classname =  {0:"logo"}


In [None]:
from torch.utils.data import Dataset
from PIL import Image
import numpy as np

class LogoDataset(Dataset):
    def __init__(self, dataset, image_processor, transform=None):
        self.dataset = dataset
        self.image_processor = image_processor
        self.transform = transform

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        sample = self.dataset[idx]

        image_path = sample["file_path"]
        image = Image.open(image_path)
        formatted_annotations = sample["annotation"]

        # Convert image to RGB numpy array
        image = np.array(image.convert("RGB"))

        result = self.image_processor(
            images=image, annotations=formatted_annotations, return_tensors="pt"
        )

        # Image processor expands batch dimension, lets squeeze it
        result = {k: v[0] for k, v in result.items()}

        return result


In [5]:
import pickle
train_path = "/kaggle/input/dataset-logo/train_dataset.pkl"
valid_path = "/kaggle/input/dataset-logo/validation_dataset.pkl"

with open(train_path, "rb") as f:
    train = pickle.load(f)

with open(valid_path, "rb") as f:
    valid = pickle.load(f)

  check_for_updates()


In [6]:
train[0]

{'pixel_values': tensor([[[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]],
 
         [[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]],
 
         [[1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          ...,
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.],
          [1., 1., 1.,  ..., 1., 1., 1.]]]),
 'labels': {'size': tensor([480, 480]), 'image_id': tensor([0]), 'class_labels': tensor([0]), 'boxes': tensor([[0.5041, 0.5519, 0.4888, 0.2154]]), 'area': tensor([24258.7656]

### train model on (5) % of data ... this time we train on 2nd subset

In [12]:
from transformers import TrainingArguments, Trainer
import torch

# 1) Clear any stray allocations
torch.cuda.empty_cache()

training_args = TrainingArguments(
    output_dir="rtdetr-v2-finetune-on-10-percent",
    num_train_epochs=5,
    max_grad_norm=0.1,
    learning_rate=5e-5,
    warmup_steps=300,
    per_device_train_batch_size=8,
    dataloader_num_workers=2,
    metric_for_best_model="eval_map",
    greater_is_better=True,
    load_best_model_at_end=True,
    eval_strategy="epoch",
    save_strategy="epoch",
    fp16=True,
    save_total_limit=2,
    remove_unused_columns=False,
    eval_do_concat_batches=False
)


In [7]:
import torch

def collate_fn(batch):
    data = {}
    data["pixel_values"] = torch.stack([x["pixel_values"] for x in batch])
    data["labels"] = [x["labels"] for x in batch]
    return data

### load finetune model that trained on first 5% data 

In [8]:
import numpy as np
from transformers import AutoImageProcessor
checkpoint = "/kaggle/input/rtdter/pytorch/default/1"
image_size = 480

image_processor = AutoImageProcessor.from_pretrained(
    checkpoint,
    do_resize=True,
    size={"width": image_size, "height": image_size},
    use_fast=True,
)

In [9]:
import numpy as np
from dataclasses import dataclass
from transformers.image_transforms import center_to_corners_format
from torchmetrics.detection.mean_ap import MeanAveragePrecision


@dataclass
class ModelOutput:
    logits: torch.Tensor
    pred_boxes: torch.Tensor


class MAPEvaluator:

    def __init__(self, image_processor, threshold=0.00, id2label=None):
        self.image_processor = image_processor
        self.threshold = threshold
        self.id2label = id2label

    def collect_image_sizes(self, targets):
        """Collect image sizes across the dataset as list of tensors with shape [batch_size, 2]."""
        image_sizes = []
        for batch in targets:
            batch_image_sizes = torch.tensor(np.array([x["size"] for x in batch]))
            image_sizes.append(batch_image_sizes)
        return image_sizes

    def collect_targets(self, targets, image_sizes):
        post_processed_targets = []
        for target_batch, image_size_batch in zip(targets, image_sizes):
            for target, size in zip(target_batch, image_size_batch):

                # here we have "yolo" format (x_center, y_center, width, height) in relative coordinates 0..1
                # and we need to convert it to "pascal" format (x_min, y_min, x_max, y_max) in absolute coordinates
                height, width = size
                boxes = torch.tensor(target["boxes"])
                boxes = center_to_corners_format(boxes)
                boxes = boxes * torch.tensor([[width, height, width, height]])

                labels = torch.tensor(target["class_labels"])
                post_processed_targets.append({"boxes": boxes, "labels": labels})
        return post_processed_targets

    def collect_predictions(self, predictions, image_sizes):
        post_processed_predictions = []
        for batch, target_sizes in zip(predictions, image_sizes):
            batch_logits, batch_boxes = batch[1], batch[2]
            output = ModelOutput(logits=torch.tensor(batch_logits), pred_boxes=torch.tensor(batch_boxes))
            post_processed_output = self.image_processor.post_process_object_detection(
                output, threshold=self.threshold, target_sizes=target_sizes
            )
            post_processed_predictions.extend(post_processed_output)
        return post_processed_predictions

    @torch.no_grad()
    def __call__(self, evaluation_results):
        # 1) Pre- and post-process your preds & targets
        predictions, targets = evaluation_results.predictions, evaluation_results.label_ids

        image_sizes = self.collect_image_sizes(targets)
        post_processed_targets = self.collect_targets(targets, image_sizes)
        post_processed_predictions = self.collect_predictions(predictions, image_sizes)
    
        # 2) Instantiate the metric under the name "evaluator"
        evaluator = MeanAveragePrecision(box_format="xyxy", class_metrics=True)
        evaluator.warn_on_many_detections = False
        evaluator.update(post_processed_predictions, post_processed_targets)

        metrics = evaluator.compute()
    
        # …then your wrapping of per-class metrics, rounding, etc…
        return metrics


eval_compute_metrics_fn = MAPEvaluator(image_processor=image_processor, threshold=0.01, id2label=idx2classname)

In [None]:
import os
os.environ["WANDB_API_KEY"] = os.getenv("WANDB_API_KEY")

In [14]:
import os
import zipfile
import torch
from torch.utils.data import Subset
from transformers import (
    AutoModelForImageClassification,     # or your specific class
    TrainingArguments,
    Trainer,
)

# --- CONFIG ---
FINETUNED_CHECKPOINT = "/kaggle/input/rtdter/pytorch/default/1"
OUTPUT_BASE_DIR      = "./"
SUBSET_FRAC          = 0.05       # 5%
MAX_COVER_FRAC       = 0.50       # up to 50% total
SEED                 = 42

# --- Datasets (your existing train/valid) ---
# train = ...
# valid = ...

# load model

# compute sizes
train_size = len(train)
valid_size = len(valid)
subset_train_size = int(SUBSET_FRAC * train_size)
subset_valid_size = int(SUBSET_FRAC * valid_size)
num_subsets = int(MAX_COVER_FRAC / SUBSET_FRAC)  # == 10 for 50%

for i in range(num_subsets):
    if i==0:
        print("skip 1st subset")
        continue
    start_train = i * subset_train_size
    end_train   = start_train + subset_train_size
    start_val   = i * subset_valid_size
    end_val     = start_val + subset_valid_size

    # make Subsets
    train_subset = Subset(train, list(range(start_train, end_train)))
    valid_subset = Subset(valid, list(range(start_val, end_val)))

    # reset model to the fine‐tuned state for each chunk
    model = AutoModelForObjectDetection.from_pretrained(FINETUNED_CHECKPOINT)
    model.to("cuda")

    # setup training args
    training_args = TrainingArguments(
        output_dir=f"{OUTPUT_BASE_DIR}/model_subset_{i+1}",
        per_device_train_batch_size=training_args.per_device_train_batch_size,
        per_device_eval_batch_size=training_args.per_device_eval_batch_size,
        num_train_epochs=training_args.num_train_epochs,
        learning_rate=training_args.learning_rate,
        logging_steps=training_args.logging_steps,
        save_strategy="no",            # we'll save manually
        eval_strategy="no",
        seed=SEED,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_subset,
        eval_dataset=valid_subset,
        tokenizer=image_processor,
        data_collator=collate_fn,
        compute_metrics=eval_compute_metrics_fn,
    )

    print(f"\n=== Training subset {i+1}/{num_subsets} "
          f"(train idx {start_train}:{end_train}, "
          f"valid idx {start_val}:{end_val}) ===")

    trainer.train()

    # --- save & zip ---
    subset_dir = training_args.output_dir
    trainer.save_model(subset_dir)

    zip_path = f"{subset_dir}.zip"
    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
        for root, _, files in os.walk(subset_dir):
            for fn in files:
                fullpath = os.path.join(root, fn)
                arcname  = os.path.relpath(fullpath, subset_dir)
                zf.write(fullpath, arcname)

    print(f"Saved & zipped to {zip_path}")

    # clear CUDA
    torch.cuda.empty_cache()
    print("Cleared CUDA cache.")


skip 1st subset


  trainer = Trainer(



=== Training subset 2/10 (train idx 6346:12692, valid idx 1586:3172) ===
{'loss': 14.2229, 'grad_norm': 27.506196975708008, 'learning_rate': 4.380352644836273e-05, 'epoch': 0.6297229219143576}
{'loss': 13.8249, 'grad_norm': 42.66318893432617, 'learning_rate': 3.7506297229219146e-05, 'epoch': 1.2594458438287153}
{'loss': 13.1307, 'grad_norm': 45.53601837158203, 'learning_rate': 3.1221662468513854e-05, 'epoch': 1.8891687657430731}
{'loss': 12.6174, 'grad_norm': 24.074769973754883, 'learning_rate': 2.4924433249370276e-05, 'epoch': 2.5188916876574305}
{'loss': 12.2288, 'grad_norm': 36.361873626708984, 'learning_rate': 1.86272040302267e-05, 'epoch': 3.1486146095717884}
{'loss': 11.8171, 'grad_norm': 25.454195022583008, 'learning_rate': 1.2329974811083123e-05, 'epoch': 3.7783375314861463}
{'loss': 11.4877, 'grad_norm': 24.974241256713867, 'learning_rate': 6.045340050377834e-06, 'epoch': 4.408060453400504}
{'train_runtime': 2556.622, 'train_samples_per_second': 12.411, 'train_steps_per_secon

  trainer = Trainer(



=== Training subset 3/10 (train idx 12692:19038, valid idx 3172:4758) ===
{'loss': 14.1414, 'grad_norm': 50.41816711425781, 'learning_rate': 4.380352644836273e-05, 'epoch': 0.6297229219143576}


KeyboardInterrupt: 

we have finetuned model on 2nd subset of 5% of data and save finetuned model (that is trained on 10% of data) 

Now we trained a model on finetuned_on_10_percent_data model.. this time we dont evualte model just train it on 25% of data 

In [45]:
import os
import zipfile
import torch
from torch.utils.data import Subset
from transformers import (
    AutoModelForImageClassification,     # or your specific class
    TrainingArguments,
    Trainer,
)

# --- CONFIG ---
# 1. compute split sizes
total = len(train)
n_small = int(0.25 * total)
n_rest  = total - n_small
generator = torch.Generator().manual_seed(42)
small_train_ds, _ = random_split(
    train,
    [n_small, n_rest],
    generator=generator
)

OUTPUT_BASE_DIR      = "./"
SUBSET_FRAC          = 0.1       # 10%
MAX_COVER_FRAC       = 0.50       # up to 50% total
SEED                 = 42

# --- Datasets (your existing train/valid) ---
# train = ...
# valid = ...

# taking first 5% records only  from train dataset
import torch
from torch.utils.data import random_split, DataLoader

# 1. compute split sizes
total = len(train)
n_small = int(0.25 * total)

# 2. do the random split (with a fixed seed for reproducibility)
generator = torch.Generator().manual_seed(42)
small_train_ds, set_25,_,_ = random_split(
    train,
    [n_small, n_small, n_small+2, n_small+1],
    generator=generator
)

total_valid = len(valid)
n_small_valid = int(0.25 * total_valid)

# 2. do the random split (with a fixed seed for reproducibility)
generator = torch.Generator().manual_seed(42)
small_valid__ds, valid_set_25,_,_ = random_split(
    valid,
    [n_small_valid, n_small_valid, n_small_valid+2, n_small_valid+1],
    generator=generator
)
# load model

torch.cuda.empty_cache()

# reset model to the fine‐tuned state for each chunk
FINETUNED_CHECKPOINT = "/kaggle/working/model_subset_2"
model = AutoModelForObjectDetection.from_pretrained(FINETUNED_CHECKPOINT)
model.to("cuda")

    # setup training args
NEW_FINETUNED_CHECKPOINT = f"{OUTPUT_BASE_DIR}/ON-WHOLE-dataset"
training_args = TrainingArguments(
        output_dir=NEW_FINETUNED_CHECKPOINT,
        per_device_train_batch_size=training_args.per_device_train_batch_size,
        per_device_eval_batch_size=training_args.per_device_eval_batch_size,
        num_train_epochs=training_args.num_train_epochs,
        learning_rate=training_args.learning_rate,
        logging_steps=training_args.logging_steps,
        save_strategy="no",            # we'll save manually
        eval_strategy="no",
        seed=SEED,
    )

trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=set_25,
        eval_dataset=valid_set_25,
        tokenizer=image_processor,
        data_collator=collate_fn,
        compute_metrics=eval_compute_metrics_fn,
    )

trainer.train()

    # --- save & zip ---
subset_dir = training_args.output_dir
trainer.save_model(subset_dir)

zip_path = f"{subset_dir}.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
   for root, _, files in os.walk(subset_dir):
        for fn in files:
              fullpath = os.path.join(root, fn)
              arcname  = os.path.relpath(fullpath, subset_dir)
              zf.write(fullpath, arcname)

   print(f"Saved & zipped to {zip_path}")
    
    # clear CUDA
torch.cuda.empty_cache()
print("Cleared CUDA cache.")


  trainer = Trainer(


{'loss': 13.3178, 'grad_norm': 23.39399528503418, 'learning_rate': 4.876228888328712e-05, 'epoch': 0.12603982858583312}
{'loss': 13.5126, 'grad_norm': 98.26414489746094, 'learning_rate': 4.750189059742879e-05, 'epoch': 0.25207965717166625}
{'loss': 13.2577, 'grad_norm': 26.730506896972656, 'learning_rate': 4.6241492311570464e-05, 'epoch': 0.3781194857574994}
{'loss': 13.0447, 'grad_norm': 38.708953857421875, 'learning_rate': 4.498109402571213e-05, 'epoch': 0.5041593143433325}
{'loss': 13.0322, 'grad_norm': 386.195068359375, 'learning_rate': 4.372321653642552e-05, 'epoch': 0.6301991429291656}
{'loss': 12.7154, 'grad_norm': 50.50520706176758, 'learning_rate': 4.2462818250567184e-05, 'epoch': 0.7562389715149987}
{'loss': 12.71, 'grad_norm': 35.673072814941406, 'learning_rate': 4.120241996470885e-05, 'epoch': 0.8822788001008318}
{'loss': 12.5111, 'grad_norm': 29.194944381713867, 'learning_rate': 3.994202167885052e-05, 'epoch': 1.008318628686665}
{'loss': 12.1101, 'grad_norm': 274.854003906

saved the final finetuned model that is trained on 35 % of whole dataset 