In [9]:
import json
import os
import sys
import pandas as pd

In [10]:
metadata = r'D:\KEEP\FPTUni\DH_FPT\FPTU\Syllabuses\K√¨ 8\DSP391m\Dataset\processed_taco_coco\annotations\instances_train2017.json'

with open(metadata, 'r') as f:
    metadata = json.load(f)

In [11]:
print(metadata.keys())

dict_keys(['info', 'licenses', 'categories', 'images', 'annotations'])


In [12]:
print(metadata['categories'])

[{'supercategory': 'Aluminium foil', 'id': 0, 'name': 'Aluminium foil'}, {'supercategory': 'Battery', 'id': 1, 'name': 'Battery'}, {'supercategory': 'Blister pack', 'id': 2, 'name': 'Aluminium blister pack'}, {'supercategory': 'Blister pack', 'id': 3, 'name': 'Carded blister pack'}, {'supercategory': 'Bottle', 'id': 4, 'name': 'Other plastic bottle'}, {'supercategory': 'Bottle', 'id': 5, 'name': 'Clear plastic bottle'}, {'supercategory': 'Bottle', 'id': 6, 'name': 'Glass bottle'}, {'supercategory': 'Bottle cap', 'id': 7, 'name': 'Plastic bottle cap'}, {'supercategory': 'Bottle cap', 'id': 8, 'name': 'Metal bottle cap'}, {'supercategory': 'Broken glass', 'id': 9, 'name': 'Broken glass'}, {'supercategory': 'Can', 'id': 10, 'name': 'Food Can'}, {'supercategory': 'Can', 'id': 11, 'name': 'Aerosol'}, {'supercategory': 'Can', 'id': 12, 'name': 'Drink can'}, {'supercategory': 'Carton', 'id': 13, 'name': 'Toilet tube'}, {'supercategory': 'Carton', 'id': 14, 'name': 'Other carton'}, {'supercate

In [14]:
MODEL_PATHS = {
    "Rtdetrv2": "FINAL/FINETUNE_BASELINE/rtdetrv2_finetune_taco_BASELINE/last.pth",
    "Distill-Convnet": "FINAL/FINETUNE_DISTILLED/rtdetrv2_finetune_taco_convnext_teacher/last.pth",
    "Distill-Vit": "FINAL/FINETUNE_DISTILLED/rtdetrv2_finetune_taco_vit_teacher/best.pth",
    "YOLOv11l": "FINAL/YOLO/yolo11n.pt"
}

In [19]:
import torch

def inspect_checkpoint(path):
    ckpt = torch.load(path, map_location='cpu')

    print(f"\nüîç Inspecting file: {path}")
    print(f"Type of object loaded: {type(ckpt)}")

    # Case 1: It's a full model object (serialized)
    if not isinstance(ckpt, (dict,)):
        print("‚Üí This looks like a **full model object** (saved with torch.save(model, ...))")
        print("  ‚úÖ You can load directly using: model = torch.load(path)")
        return

    # Case 2: It's a dictionary (most common case)
    print("‚Üí This is a **dictionary checkpoint**. Keys:")
    for key in ckpt.keys():
        print("   ‚Ä¢", key)

    # Try to identify its type
    if "model" in ckpt:
        print("üß† Detected: checkpoint with 'model' weights (may also include optimizer, epoch info).")
    elif all(k.startswith("module.") or "." in k for k in ckpt.keys()):
        print("ü™∂ Detected: pure state_dict (weights only).")
    else:
        print("‚ö†Ô∏è Unknown structure ‚Äî open specific keys manually to verify contents.")

# Example usage
inspect_checkpoint(MODEL_PATHS['Rtdetrv2'])



üîç Inspecting file: FINAL/FINETUNE_BASELINE/rtdetrv2_finetune_taco_BASELINE/last.pth
Type of object loaded: <class 'dict'>
‚Üí This is a **dictionary checkpoint**. Keys:
   ‚Ä¢ date
   ‚Ä¢ last_epoch
   ‚Ä¢ model
   ‚Ä¢ criterion
   ‚Ä¢ postprocessor
   ‚Ä¢ scaler
   ‚Ä¢ optimizer
   ‚Ä¢ lr_scheduler
üß† Detected: checkpoint with 'model' weights (may also include optimizer, epoch info).


In [None]:
%%writefile trainer_convnext.py
import os
import math
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import Tensor
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from transformers import AutoModel, AutoConfig
from PIL import Image
from pycocotools.coco import COCO
from torchvision import transforms as T
from tqdm import tqdm
import wandb
import datetime
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data.distributed import DistributedSampler

class HuggingFaceTeacherWrapper(nn.Module):
    def __init__(self, model_id: str, token: str = None):
        super().__init__()
        if int(os.environ.get("RANK", 0)) == 0:
            print(f"Loading teacher model '{model_id}' from Hugging Face...")
        config = AutoConfig.from_pretrained(model_id, token=token)
        self._model = AutoModel.from_pretrained(model_id, token=token)
        self.is_vit = "vit" in config.model_type.lower()
        self._feature_dim = (
            self._model.config.hidden_size
            if self.is_vit
            else self._model.config.hidden_sizes[-1]
        )
        if int(os.environ.get("RANK", 0)) == 0:
            print(f"Detected {'ViT' if self.is_vit else 'ConvNeXT'} architecture. Feature dim: {self._feature_dim}")

    def feature_dim(self) -> int:
        return self._feature_dim

    def forward(self, x: Tensor) -> Tensor:
        outputs = self._model(pixel_values=x, output_hidden_states=True)
        if self.is_vit:
            patch_tokens = outputs.last_hidden_state[:, 1:, :]
            b, s, d = patch_tokens.shape
            h = w = int(math.sqrt(s))
            return patch_tokens.permute(0, 2, 1).reshape(b, d, h, w)
        return outputs.hidden_states[-1]

class CocoDetectionForDistill(torch.utils.data.Dataset):
    def __init__(self, root, ann_file, transforms):
        self.root = root
        self.coco = COCO(ann_file)
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.transforms = transforms

    def __getitem__(self, index):
        img_id = self.ids[index]
        path = self.coco.loadImgs(img_id)[0]["file_name"]
        img = Image.open(os.path.join(self.root, path)).convert("RGB")
        return self.transforms(img), 0

    def __len__(self):
        return len(self.ids)

def setup_ddp():
    dist.init_process_group(backend="nccl")
    torch.cuda.set_device(int(os.environ["LOCAL_RANK"]))

def cleanup_ddp():
    dist.destroy_process_group()


def main_training_function(rank, world_size, config):
    device = rank

    is_main_process = (rank == 0)

    if is_main_process:
        print(f"Running DDP on {world_size} GPUs.")
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
        run_name = f"run_ddp_{timestamp}_lr{config['learning_rate']}_bs{config['batch_size_per_gpu']}"
        try:
            from kaggle_secrets import UserSecretsClient
            from huggingface_hub import login
            secrets = UserSecretsClient()
            hf_token = secrets.get_secret("HUGGINGFACE_TOKEN")
            wandb_key = secrets.get_secret("WANDB_API_KEY")
            login(token=hf_token)
            wandb.login(key=wandb_key)
            wandb.init(project=config["wandb_project"], config=config, name=run_name)
        except Exception:
            hf_token = None
            print("Could not log in, continuing without W&B.")
    else:
        hf_token = None

    dist.barrier()

    teacher_model = HuggingFaceTeacherWrapper(config["teacher_hf_id"], token=hf_token).to(device)
    teacher_model.eval()

    if is_main_process:
        print("ƒêang t·∫£i student model tr√™n ti·∫øn tr√¨nh ch√≠nh...")
        torch.hub.load("lyuwenyu/RT-DETR", "rtdetrv2_l", pretrained=True, trust_repo=True)

    dist.barrier()

    student_hub_model = torch.hub.load("lyuwenyu/RT-DETR", "rtdetrv2_l", pretrained=True, trust_repo=True)
    student_model = student_hub_model.model.to(device)

    with torch.no_grad():
        x = torch.randn(1, 3, 640, 640).to(device)
        student_channels = student_model.encoder(student_model.backbone(x))[-1].shape[1]
    teacher_channels = teacher_model.feature_dim()
    projection_layer = nn.Conv2d(student_channels, teacher_channels, kernel_size=1).to(device)

    student_model = DDP(student_model, device_ids=[device])
    projection_layer = DDP(projection_layer, device_ids=[device])

    transforms = T.Compose([
        T.Resize((640, 640)), T.ToTensor(),
        T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    train_dataset = CocoDetectionForDistill(
        root=os.path.join(config["dataset_dir"], "train2017"),
        ann_file=os.path.join(config["dataset_dir"], "annotations/instances_train2017.json"),
        transforms=transforms
    )
    val_dataset = CocoDetectionForDistill(
        root=os.path.join(config["dataset_dir"], "val2017"),
        ann_file=os.path.join(config["dataset_dir"], "annotations/instances_val2017.json"),
        transforms=transforms
    )
    if is_main_process:
        print(f"Data loaded: {len(train_dataset)} training images, {len(val_dataset)} validation images.")

    train_sampler = DistributedSampler(train_dataset, num_replicas=world_size, rank=rank, shuffle=True)
    val_sampler = DistributedSampler(val_dataset, num_replicas=world_size, rank=rank, shuffle=False)

    train_loader = DataLoader(
        train_dataset, batch_size=config["batch_size_per_gpu"],
        shuffle=False, num_workers=config["num_workers"], pin_memory=True, drop_last=True, sampler=train_sampler
    )
    val_loader = DataLoader(
        val_dataset, batch_size=config["batch_size_per_gpu"],
        shuffle=False, num_workers=config["num_workers"], pin_memory=True, drop_last=False, sampler=val_sampler
    )

    params = list(student_model.module.backbone.parameters()) + \
             list(student_model.module.encoder.parameters()) + \
             list(projection_layer.module.parameters())

    optimizer = torch.optim.AdamW(params, lr=config["learning_rate"], weight_decay=config["weight_decay"])
    criterion = nn.MSELoss()
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=config['scheduler_factor'], patience=config['scheduler_patience'], verbose=is_main_process)

    if is_main_process and wandb.run:
        wandb.watch((student_model, projection_layer), log="all", log_freq=100)

    best_val_loss = float('inf')
    early_stopping_counter = 0

    if is_main_process:
        print("Starting training...")

    for epoch in range(config["epochs"]):
        train_sampler.set_epoch(epoch)

        start = time.time()
        student_model.train()
        projection_layer.train()
        total_train_loss = 0.0

        train_iterator = tqdm(train_loader, desc=f"Epoch {epoch+1}/{config['epochs']} [Train]") if is_main_process else train_loader

        for images, _ in train_iterator:
            images = images.to(device)
            with torch.no_grad():
                teacher_features = teacher_model(images)
            student_features = student_model.module.encoder(student_model.module.backbone(images))[-1]
            projected = projection_layer(student_features)
            teacher_resized = F.interpolate(teacher_features, size=projected.shape[-2:], mode="bilinear", align_corners=False)
            loss = criterion(projected, teacher_resized)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()

        train_loss_tensor = torch.tensor(total_train_loss).to(device)
        dist.all_reduce(train_loss_tensor, op=dist.ReduceOp.SUM)
        avg_train_loss = train_loss_tensor.item() / (len(train_loader) * world_size)

        student_model.eval()
        projection_layer.eval()
        total_val_loss = 0.0

        val_iterator = tqdm(val_loader, desc=f"Epoch {epoch+1}/{config['epochs']} [Val]") if is_main_process else val_loader
        with torch.no_grad():
            for images, _ in val_iterator:
                images = images.to(device)
                teacher_features = teacher_model(images)
                student_features = student_model.module.encoder(student_model.module.backbone(images))[-1]
                projected = projection_layer(student_features)
                teacher_resized = F.interpolate(teacher_features, size=projected.shape[-2:], mode="bilinear", align_corners=False)
                loss = criterion(projected, teacher_resized)
                total_val_loss += loss.item()

        val_loss_tensor = torch.tensor(total_val_loss).to(device)
        dist.all_reduce(val_loss_tensor, op=dist.ReduceOp.SUM)
        avg_val_loss = val_loss_tensor.item() / (len(val_loader) * world_size)

        if is_main_process:
            duration = time.time() - start
            print(f"Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Val Loss: {avg_val_loss:.4f} | Duration: {duration:.2f}s")
            if wandb.run:
                wandb.log({"epoch": epoch + 1, "train/avg_loss": avg_train_loss, "val/avg_loss": avg_val_loss, "time/epoch_s": duration, "train/epoch_lr": optimizer.param_groups[0]['lr']})

            scheduler.step(avg_val_loss)

            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                early_stopping_counter = 0
                print(f"Validation loss improved to {best_val_loss:.4f}. Saving best model...")
                best_weights = {**student_model.module.backbone.state_dict(), **student_model.module.encoder.state_dict()}
                torch.save({'model': best_weights}, config["best_weights_filename"])

            else:
                early_stopping_counter += 1
                print(f"Validation loss did not improve. Early stopping counter: {early_stopping_counter}/{config['early_stopping_patience']}")

        stop_training = torch.tensor(1 if early_stopping_counter >= config['early_stopping_patience'] else 0, device=device)
        dist.all_reduce(stop_training, op=dist.ReduceOp.MAX)
        if stop_training.item() == 1:
            if is_main_process:
                print("Early stopping triggered. Training finished.")
            break

    if is_main_process:
        print("\nDistillation finished.")
        final_weights = {**student_model.module.backbone.state_dict(), **student_model.module.encoder.state_dict()}
        torch.save({'model': final_weights}, config["final_weights_filename"])
        print(f"Saved final epoch weights to '{config['final_weights_filename']}'")
        print(f"Best weights were saved to '{config['best_weights_filename']}' with val_loss: {best_val_loss:.4f}")
        if wandb.run:
            wandb.summary["best_val_loss"] = best_val_loss
            wandb.finish()


if __name__ == "__main__":
    setup_ddp()
    rank = int(os.environ["RANK"])
    world_size = int(os.environ["WORLD_SIZE"])

    DATASET_DIR = "/kaggle/input/dsp-pre-final/processed_taco_coco"
    config = {
        "learning_rate": 1e-4, "epochs": 50, "batch_size_per_gpu": 16,
        "num_workers": 2, "weight_decay": 1e-5,
        "teacher_hf_id": "facebook/dinov3-convnext-base-pretrain-lvd1689m",
        "dataset_dir": DATASET_DIR,
        "scheduler_patience": 3, "scheduler_factor": 0.1,
        "early_stopping_patience": 7,
        "best_weights_filename": "distilled_rtdetr_convnext_teacher_BEST.pth",
        "final_weights_filename": "distilled_rtdetr_convnext_teacher_FINAL.pth",
        "wandb_project": "Distill-RTDETR-ConvNeXt-Teacher",
    }

    main_training_function(rank, world_size, config)

    cleanup_ddp()

In [None]:
%%writefile trainer_vit.py
import os
import sys
import shutil
import torch
import wandb
import lightly_train
from lightly_train.model_wrappers import RTDETRModelWrapper
import datetime

def main_training_function(config):
    is_main_process = os.environ.get("LOCAL_RANK", "0") == "0"

    if is_main_process:
        timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M")
        run_name = f"run_ddp_{timestamp}_lr{config['learning_rate']}_bs{config['batch_size_per_gpu']}"
        try:
            from kaggle_secrets import UserSecretsClient
            wandb_key = UserSecretsClient().get_secret("WANDB_API_KEY")
            wandb.login(key=wandb_key)
        except Exception as e:
            print(f"W&B secrets not available. Skipping login. Error: {e}")

        if os.path.exists(config['output_dir']):
            print(f"Output directory '{config['output_dir']}' already exists. Deleting it.")
            shutil.rmtree(config['output_dir'])

    if not is_main_process:
        run_name = ""

    if torch.distributed.is_initialized():
        torch.distributed.barrier()

    if is_main_process:
        print("Initializing Student Model (RT-DETR)...")
    student_hub_model = torch.hub.load('lyuwenyu/RT-DETR', 'rtdetrv2_l', pretrained=True, trust_repo=True)
    wrapped_student = RTDETRModelWrapper(student_hub_model.model)

    callbacks_config = {
        "model_checkpoint": {
            "dirpath": os.path.join(config['output_dir'], 'checkpoints'),
            "filename": 'best-model-{epoch}-{validation_loss:.4f}',
            "monitor": 'val_loss',
            "mode": 'min',
            "save_top_k": 1,
        },
        "learning_rate_monitor": {}
    }

    global_batch_size = config['batch_size_per_gpu'] * config['num_gpus']

    if is_main_process:
        print("Starting distillation with lightly_train.train()...")
        print(f"Global batch size: {global_batch_size} ({config['batch_size_per_gpu']} per GPU)")

    lightly_train.train(
        model=wrapped_student,
        method="distillationv1",
        method_args={
            "teacher": config['teacher_name'],
            "teacher_url": config['teacher_url'],
        },
        data=[config['train_dir'], config['val_dir']],
        out=config['output_dir'],
        epochs=config['epochs'],
        batch_size=global_batch_size,
        num_workers=config['num_workers'],
        optim=config['optimizer_name'],
        optim_args={"lr": config['learning_rate'], "weight_decay": config['weight_decay']},
        callbacks=callbacks_config,
        loggers={
            "wandb": {
                "project": config['wandb_project'],
                "name": run_name,
            }
        },
        devices=config['num_gpus'],
        strategy='ddp_find_unused_parameters_true',
        accelerator='gpu'
    )
    if is_main_process:
        print("\nDistillation finished.")
        print(f"Best model checkpoint saved in directory: {os.path.join(config['output_dir'], 'checkpoints')}")

if __name__ == '__main__':
    DINOV3_VIT_TEACHER_URL = ""
    try:
        from kaggle_secrets import UserSecretsClient
        DINOV3_VIT_TEACHER_URL = UserSecretsClient().get_secret("DINOV3_TEACHER_URL")
    except Exception as e:
         print(f"Could not read secret 'DINOV3_VIT_URL'. Please set it manually. Error: {e}")

    if not DINOV3_VIT_TEACHER_URL:
        print("ERROR: Save your token key into kaggle secret")
    else:
        BASE_DIR = "/kaggle/input/dsp-pre-final/processed_taco_coco"
        TRAIN_DIR = os.path.join(BASE_DIR, "train2017")
        VAL_DIR = os.path.join(BASE_DIR, "val2017")

        config = {
            "num_gpus": 2,
            "epochs": 50, "batch_size_per_gpu": 8, "num_workers": 2,
            "optimizer_name": "adamw", "learning_rate": 1e-4, "weight_decay": 1e-5,
            "early_stopping_patience": 7,
            "teacher_name": "dinov3/vitb16",
            "teacher_url": DINOV3_VIT_TEACHER_URL,
            "train_dir": TRAIN_DIR,
            "val_dir": VAL_DIR,
            "output_dir": "out/distill_vit_lightly",
            "wandb_project": "Distill-RTDETR-Distill-VIT"
        }

        main_training_function(config)

In [None]:
%%writefile /kaggle/working/RT-DETR/rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_taco_finetune_convnext.yml
__include__: [
  '../dataset/coco_detection.yml',
  '../runtime.yml',
  './include/dataloader.yml',
  './include/rtdetrv2_r50vd.yml',
]

output_dir: ./output/rtdetrv2_finetune_taco_convnext_teacher

RTDETR:
  backbone: HGNetv2

HGNetv2:
  name: 'L'
  return_idx: [1, 2, 3]
  freeze_at: 0
  freeze_norm: True
  pretrained: True

task: detection
remap_mscoco_category: false
tuning: '../distilled_rtdetr_convnext_teacher_BEST.pth'
compile: true
epoches: 50

num_classes: 60

train_dataloader:
  num_workers: 4
  dataset:
    type: CocoDetection
    img_folder: /kaggle/input/dsp-pre-final/processed_taco_coco/train2017
    ann_file: /kaggle/input/dsp-pre-final/processed_taco_coco/annotations/instances_train2017.json

val_dataloader:
  num_workers: 4
  dataset:
    type: CocoDetection
    img_folder: /kaggle/input/dsp-pre-final/processed_taco_coco/val2017
    ann_file: /kaggle/input/dsp-pre-final/processed_taco_coco/annotations/instances_val2017.json

batch_size: 16

optimizer:
  type: AdamW
  params:
    - params: '^(?=.*backbone)'
      lr: 0.00001
  lr: 0.0001
  weight_decay: 0.0001
  betas: [0.9, 0.999]

lr_scheduler:
  type: OneCycleLR
  max_lr: 0.0001
  pct_start: 0.3
  total_steps: 4000

checkpoint_freq: 10

In [1]:
import cv2
from camera_utils import manual_roi_setup, camera_stream, draw_rois

def test_camera_utils():
    print("========== CAMERA UTILS TEST ==========")
    print("[1] Starting ROI setup...")

    # Step 1: Draw the ROIs manually
    rois = manual_roi_setup(cam_index=0, fps_limit=30)

    if not rois:
        print("[WARN] No ROIs defined. Exiting test.")
        return

    print(f"[INFO] {len(rois)} ROIs finalized:")
    for roi in rois:
        print(f"   - {roi['label']}: ({roi['x']},{roi['y']},{roi['w']},{roi['h']})")

    # Step 2: Open camera stream to preview the ROIs
    print("[2] Starting camera stream with ROI overlay (press 'q' to exit)...")

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("‚ùå Cannot open webcam.")
        return

    fps_limit = 30
    frame_time = 1.0 / fps_limit

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Draw the predefined ROIs on the frame
        draw_rois(frame, rois)

        # Show the frame
        cv2.imshow("Camera ROI Test", frame)
        key = cv2.waitKey(1) & 0xFF
        if key == ord('q') or key == 27:
            break

        # Limit FPS
        cv2.waitKey(int(1000 / fps_limit))

    cap.release()
    cv2.destroyAllWindows()
    print("[INFO] Test completed successfully.")

if __name__ == "__main__":
    test_camera_utils()


ModuleNotFoundError: No module named 'camera_utils'

In [1]:
!nvidia-smi

Mon Nov  3 09:55:57 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 581.57                 Driver Version: 581.57         CUDA Version: 13.0     |
+-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4050 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   47C    P0             14W /   80W |       0MiB /   6141MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [5]:
import torch
print("True" if torch.cuda.is_available() else "False")

False
