In [None]:
!pip install kaggle

In [None]:
from google.colab import files
files.upload()

In [None]:
# 3. Configure and secure the token
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
!kaggle datasets download alagappanramanathan3/til-cv

In [None]:
!unzip til-cv.zip /til-cv

Archive:  til-cv.zip
caution: filename not matched:  /til-cv


In [None]:
!ls ./cv/images -1 | wc -l

20000


In [None]:
# 6. Copy the dataset folder into Drive
!cp -r ./cv /content/drive/MyDrive/cv

cp: cannot stat './cv': No such file or directory


In [None]:
# 5. Mount Google Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
# Copy the entire cv folder to local disk
!rsync -av --ignore-existing -r /content/drive/MyDrive/cv /content/cv

# List contents to verify the copy worked
!ls -la /content/cv

# Check disk usage
!df -h

In [None]:
# 2. Install PyTorch for A100
!pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 -f https://download.pytorch.org/whl/torch_stable.html

In [None]:
# Downgrade NumPy to 1.26 (latest 1.x version)
!pip install "numpy<2" --force-reinstall

# Or specifically install 1.26.3
!pip install numpy==1.26.3 --force-reinstall

In [None]:
!git clone https://github.com/IDEA-Research/DINO.git

Cloning into 'DINO'...
remote: Enumerating objects: 442, done.[K
remote: Counting objects: 100% (192/192), done.[K
remote: Compressing objects: 100% (98/98), done.[K
remote: Total 442 (delta 137), reused 94 (delta 94), pack-reused 250 (from 1)[K
Receiving objects: 100% (442/442), 13.43 MiB | 27.62 MiB/s, done.
Resolving deltas: 100% (191/191), done.


In [None]:
# 3. Install other requirements
!pip install -r DINO/requirements.txt

In [None]:
# Uninstall current PyTorch
!pip uninstall torch torchvision torchaudio -y

# Install PyTorch with CUDA 12.1 (closest stable version to 12.5)
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [None]:
# 4. Build custom CUDA ops
%cd DINO/models/dino/ops
!python setup.py build install
%cd /content

# Test import
!python -c "import MultiScaleDeformableAttention as MSDA; print('MSDA module loaded successfully')"

In [None]:
# 5. Test import
!python -c "import sys; sys.path.append('/content/DINO'); from models.dino.dino import build_dino; print('DINO imported successfully')"

DINO imported successfully


In [None]:
# Import torch BEFORE importing any PyTorch extensions
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

# Now test the DINO import
import sys
sys.path.append('/content/DINO')
from models.dino.dino import build_dino
print('DINO imported successfully')

PyTorch version: 2.5.1+cu121
CUDA available: True




DINO imported successfully


In [None]:
# Check if the shared library was created
!find /content/DINO -name "*.so" | grep -i multiscale
!find ~/.cache/torch_extensions -name "*.so" 2>/dev/nul l | grep -i multiscale

/content/DINO/models/dino/ops/build/lib.linux-x86_64-cpython-311/MultiScaleDeformableAttention.cpython-311-x86_64-linux-gnu.so


In [None]:
%cd /content/DINO/models/dino/ops

# Copy the built module to the correct location
!cp build/lib.linux-x86_64-cpython-311/MultiScaleDeformableAttention.cpython-311-x86_64-linux-gnu.so ./

%cd /content

/content/DINO/models/dino/ops
/content


In [None]:
import sys
sys.path.append('/content/DINO/models/dino/ops/build/lib.linux-x86_64-cpython-311')

# Now test import
import MultiScaleDeformableAttention as MSDA
print('MSDA loaded successfully')

MSDA loaded successfully


In [None]:
import torch
import sys
sys.path.append('/content/DINO')

# Test the custom ops first
import MultiScaleDeformableAttention as MSDA
print('MSDA loaded successfully')

# Now test DINO
from models.dino.dino import build_dino
print('DINO imported successfully')

MSDA loaded successfully
DINO imported successfully


In [None]:
from pathlib import Path
BASE_PATH = Path("/content/drive/MyDrive/cv/cv")

In [None]:
import json
import os
from collections import Counter

# Load and analyze annotations.json structure
with open(BASE_PATH / "annotations.json", 'r') as f:
    annotations = json.load(f)

print("=== ANNOTATIONS.JSON STRUCTURE ANALYSIS ===")
print(f"Type of root object: {type(annotations)}")

if isinstance(annotations, dict):
    print(f"Top-level keys: {list(annotations.keys())}")

    # Analyze each top-level key
    for key in annotations.keys():
        print(f"\n--- Key: '{key}' ---")
        print(f"Type: {type(annotations[key])}")

        if isinstance(annotations[key], list):
            print(f"Length: {len(annotations[key])}")
            if len(annotations[key]) > 0:
                print(f"First item type: {type(annotations[key][0])}")
                print(f"First item sample: {annotations[key][0]}")

        elif isinstance(annotations[key], dict):
            print(f"Dict keys: {list(annotations[key].keys())[:10]}")  # First 10 keys

elif isinstance(annotations, list):
    print(f"List length: {len(annotations)}")
    if len(annotations) > 0:
        print(f"First item: {annotations[0]}")

# Check if it follows any known annotation format patterns
def check_annotation_format(data):
    if isinstance(data, dict):
        # Check for COCO format
        if all(key in data for key in ['images', 'annotations', 'categories']):
            return "COCO format"
        # Check for YOLO format indicators
        elif 'labels' in data or 'bboxes' in data:
            return "Possible YOLO format"
    return "Custom format"

format_type = check_annotation_format(annotations)
print(f"\nDetected format: {format_type}")

# Verify image files
print(f"\n=== IMAGE FILES VERIFICATION ===")
image_count = len([f for f in os.listdir(BASE_PATH / "images") if f.endswith('.jpg')])
print(f"Number of .jpg files in images folder: {image_count}")

# Check image naming pattern
image_files = [f for f in os.listdir(BASE_PATH / "images") if f.endswith('.jpg')][:10]
print(f"Sample image filenames: {image_files}")

=== ANNOTATIONS.JSON STRUCTURE ANALYSIS ===
Type of root object: <class 'dict'>
Top-level keys: ['info', 'images', 'annotations', 'licenses', 'categories']

--- Key: 'info' ---
Type: <class 'dict'>
Dict keys: []

--- Key: 'images' ---
Type: <class 'list'>
Length: 20000
First item type: <class 'dict'>
First item sample: {'id': 9146, 'width': 1920, 'height': 1080, 'file_name': '9146.jpg'}

--- Key: 'annotations' ---
Type: <class 'list'>
Length: 72967
First item type: <class 'dict'>
First item sample: {'id': 0, 'image_id': 9146, 'category_id': 1, 'area': 3861.0041370397084, 'bbox': [2.9999542236328125, 315.9999918937683, 99.00009155273438, 39.0000057220459], 'iscrowd': 0}

--- Key: 'licenses' ---
Type: <class 'list'>
Length: 0

--- Key: 'categories' ---
Type: <class 'list'>
Length: 18
First item type: <class 'dict'>
First item sample: {'id': 0, 'name': 'cargo aircraft'}

Detected format: COCO format

=== IMAGE FILES VERIFICATION ===
Number of .jpg files in images folder: 20000
Sample imag

In [None]:
import json

# Load the annotations
with open(BASE_PATH / 'annotations.json', 'r') as f:
    data = json.load(f)

print("=== CATEGORIES ANALYSIS ===")
categories = data['categories']
for cat in categories:
    print(f"ID: {cat['id']}, Name: {cat['name']}")

print(f"\n=== DATA STATISTICS ===")
print(f"Total images: {len(data['images'])}")
print(f"Total annotations: {len(data['annotations'])}")

# Check category ID distribution
from collections import Counter
cat_counts = Counter([ann['category_id'] for ann in data['annotations']])
print(f"\nAnnotations per category:")
for cat_id, count in sorted(cat_counts.items()):
    cat_name = next((cat['name'] for cat in categories if cat['id'] == cat_id), 'Unknown')
    print(f"  {cat_id}: {cat_name} ({count} annotations)")

# Check for missing images
image_ids = set(img['id'] for img in data['images'])
annotation_image_ids = set(ann['image_id'] for ann in data['annotations'])
missing_images = annotation_image_ids - image_ids
if missing_images:
    print(f"\nWARNING: {len(missing_images)} annotations reference missing images")
else:
    print(f"\n✓ All annotation image_ids have corresponding images")

=== CATEGORIES ANALYSIS ===
ID: 0, Name: cargo aircraft
ID: 1, Name: commercial aircraft
ID: 2, Name: drone
ID: 3, Name: fighter jet
ID: 4, Name: fighter plane
ID: 5, Name: helicopter
ID: 6, Name: light aircraft
ID: 7, Name: missile
ID: 8, Name: truck
ID: 9, Name: car
ID: 10, Name: tank
ID: 11, Name: bus
ID: 12, Name: van
ID: 13, Name: cargo ship
ID: 14, Name: yacht
ID: 15, Name: cruise ship
ID: 16, Name: warship
ID: 17, Name: sailboat

=== DATA STATISTICS ===
Total images: 20000
Total annotations: 72967

Annotations per category:
  0: cargo aircraft (1189 annotations)
  1: commercial aircraft (6769 annotations)
  2: drone (3865 annotations)
  3: fighter jet (9792 annotations)
  4: fighter plane (4906 annotations)
  5: helicopter (6299 annotations)
  6: light aircraft (3663 annotations)
  7: missile (3200 annotations)
  8: truck (8456 annotations)
  9: car (3395 annotations)
  10: tank (8650 annotations)
  11: bus (4897 annotations)
  12: van (2502 annotations)
  13: cargo ship (1270 a

In [None]:
import json
import os
from sklearn.model_selection import train_test_split

!rm -rf coco_dataset/

def convert_coco_split(input_file, output_dir, train_split=0.8): # Renamed for clarity
    """
    Splits a COCO format annotation file into training and validation sets.
    Category IDs are preserved (expected to be 0-based).
    """

    with open(input_file, 'r') as f:
        data = json.load(f)

    # --- Removed category ID shifting ---
    # The original category IDs from data['categories'] will be used directly.
    # Annotations in data['annotations'] should already use these original IDs.
    print("Using original category IDs (expected to be 0-based).")

    # Create a mapping of original IDs to themselves for clarity,
    # or if any downstream process still expects a 'category_mapping'.
    category_mapping = {cat['id']: cat['id'] for cat in data['categories']}

    # --- End of removed/modified section ---

    # Create train/val split
    images = data['images']
    train_images, val_images = train_test_split(images, train_size=train_split, random_state=42)

    train_image_ids = set(img['id'] for img in train_images)
    val_image_ids = set(img['id'] for img in val_images)

    # Split annotations
    # No changes needed here as 'category_id' in annotations should already match the original 'id' in categories
    train_annotations = [ann for ann in data['annotations'] if ann['image_id'] in train_image_ids]
    val_annotations = [ann for ann in data['annotations'] if ann['image_id'] in val_image_ids]

    # Create train dataset
    train_data = {
        'info': data['info'],
        'licenses': data['licenses'],
        'categories': data['categories'], # Categories are used as is
        'images': train_images,
        'annotations': train_annotations # Annotations are used as is
    }

    # Create val dataset
    val_data = {
        'info': data['info'],
        'licenses': data['licenses'],
        'categories': data['categories'], # Categories are used as is
        'images': val_images,
        'annotations': val_annotations # Annotations are used as is
    }

    # Create output directory structure
    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(f"{output_dir}/annotations", exist_ok=True)

    # Save datasets
    with open(f"{output_dir}/annotations/instances_train2017.json", 'w') as f:
        json.dump(train_data, f)

    with open(f"{output_dir}/annotations/instances_val2017.json", 'w') as f:
        json.dump(val_data, f)

    print(f"✓ Train split: {len(train_images)} images, {len(train_annotations)} annotations")
    print(f"✓ Val split: {len(val_images)} images, {len(val_annotations)} annotations")
    print(f"✓ Saved to {output_dir}/annotations/")

    return category_mapping

# Convert your data using the modified function
category_mapping = convert_coco_split(BASE_PATH / 'annotations.json', 'coco_dataset')
print(f"\nCategory ID mapping (no change applied): {category_mapping}")

Using original category IDs (expected to be 0-based).
✓ Train split: 16000 images, 58057 annotations
✓ Val split: 4000 images, 14910 annotations
✓ Saved to coco_dataset/annotations/

Category ID mapping (no change applied): {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17}


In [None]:
import concurrent.futures
import threading
import os
import json
from tqdm.auto import tqdm
import shutil
from pathlib import Path

def copy_file(src_dst):
    """Copy a single file - function for threading"""
    src_path, dst_path = src_dst
    # Ensure destination directory exists
    dst_path.parent.mkdir(parents=True, exist_ok=True)
    shutil.copy2(src_path, dst_path)
    return dst_path

def setup_tao_directory_with_split(base_path=None, annotations_dir='coco_dataset/annotations'):
    """Copy images according to the train/val split in annotations"""

    if base_path is None:
        base_path = BASE_PATH
        if base_path is None:
            return False

    base_path = Path(base_path)
    images_dir = base_path / 'images'

    if not images_dir.exists():
        print(f"Images directory not found at {images_dir}")
        return False

    # Create directories
    os.makedirs('coco_dataset/train2017', exist_ok=True)
    os.makedirs('coco_dataset/val2017', exist_ok=True)

    # Read the annotation files to get the correct image splits
    train_annotations_file = f"{annotations_dir}/instances_train2017.json"
    val_annotations_file = f"{annotations_dir}/instances_val2017.json"

    if not os.path.exists(train_annotations_file) or not os.path.exists(val_annotations_file):
        print("Error: Annotation files not found. Run convert_to_tao_format first.")
        return False

    # Load annotation files to get image lists
    with open(train_annotations_file, 'r') as f:
        train_data = json.load(f)

    with open(val_annotations_file, 'r') as f:
        val_data = json.load(f)

    # Extract image filenames from annotations
    train_images = {img['file_name'] for img in train_data['images']}
    val_images = {img['file_name'] for img in val_data['images']}

    print(f"Train images: {len(train_images)}")
    print(f"Val images: {len(val_images)}")

    # Get all image files
    all_image_files = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]
    print(f"Found {len(all_image_files)} total image files")

    # Create copy tasks for train and val separately
    train_tasks = []
    val_tasks = []

    for img_file in all_image_files:
        src_path = images_dir / img_file
        if img_file in train_images:
            dst_path = Path('coco_dataset/train2017') / img_file
            train_tasks.append((src_path, dst_path))
        elif img_file in val_images:
            dst_path = Path('coco_dataset/val2017') / img_file
            val_tasks.append((src_path, dst_path))

    all_tasks = train_tasks + val_tasks
    print(f"Copying {len(train_tasks)} train images and {len(val_tasks)} val images...")

    # Use ThreadPoolExecutor for I/O-bound tasks
    max_workers = min(32, (os.cpu_count() or 1) + 4)

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        list(tqdm(
            executor.map(copy_file, all_tasks),
            total=len(all_tasks),
            desc='Copying images to train/val splits',
            unit='files'
        ))

    print(f"✓ Copied {len(train_tasks)} images to train2017/")
    print(f"✓ Copied {len(val_tasks)} images to val2017/")

    return True

# Run the corrected setup
setup_tao_directory_with_split(BASE_PATH)

Train images: 16000
Val images: 4000
Found 20000 total image files
Copying 16000 train images and 4000 val images...


Copying images to train/val splits:   0%|          | 0/20000 [00:00<?, ?files/s]

✓ Copied 16000 images to train2017/
✓ Copied 4000 images to val2017/


True

In [None]:
!pip install -U gdown
#!gdown --id 1CrzFP0RycSC24KKmF5k0libLRJgpX9x0 --output checkpoint0029_4scale_swin.pth
!gdown --id 14h4UCi-HsDL01ZQRbpV47dzMST_py_vM --output checkpoint0029_5scale_swin.pth
#https://drive.google.com/file/d/14h4UCi-HsDL01ZQRbpV47dzMST_py_vM/view?usp=drive_link
#https://drive.google.com/file/d/1CrzFP0RycSC24KKmF5k0libLRJgpX9x0/view?usp=drive_link
#https://drive.google.com/file/d/14h4UCi-HsDL01ZQRbpV47dzMST_py_vM/view?usp=drive_link

In [None]:
!mkdir configs

# Swin 5 scale

In [None]:
config_content = """data_aug_scales = [400, 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800, 832]
data_aug_max_size = 1333
data_aug_scales2_resize = [400, 500, 600]
data_aug_scales2_crop = [384, 600]

data_aug_scale_overlap = None

num_classes=18
dn_labelbook_size=19

lr = 0.0001
param_dict_type = 'default'
lr_backbone = 1e-05
lr_backbone_names = ['backbone.0']
lr_linear_proj_names = ['reference_points', 'sampling_offsets']
lr_linear_proj_mult = 0.1
ddetr_lr_param = False
batch_size = 2                    # Reduced for 5-scale
weight_decay = 0.0001
epochs = 12
lr_drop = 11
save_checkpoint_interval = 1
clip_max_norm = 0.1
onecyclelr = False
multi_step_lr = False
lr_drop_list = [33, 45]

modelname = 'dino'
frozen_weights = None
backbone = 'swin_L_384_22k'
use_checkpoint = True

dilation = False
position_embedding = 'sine'
pe_temperatureH = 20
pe_temperatureW = 20
return_interm_indices = [0, 1, 2, 3]    # 5-scale indices
backbone_freeze_keywords = None
enc_layers = 6
dec_layers = 6
unic_layers = 0
pre_norm = False
dim_feedforward = 2048
hidden_dim = 256
dropout = 0.0
nheads = 8
num_queries = 900
query_dim = 4
num_patterns = 0
pdetr3_bbox_embed_diff_each_layer = False
pdetr3_refHW = -1
random_refpoints_xy = False
fix_refpoints_hw = -1
dabdetr_yolo_like_anchor_update = False
dabdetr_deformable_encoder = False
dabdetr_deformable_decoder = False
use_deformable_box_attn = False
box_attn_type = 'roi_align'
dec_layer_number = None
num_feature_levels = 5              # Key change: 5 scales
enc_n_points = 4
dec_n_points = 4
decoder_layer_noise = False
dln_xy_noise = 0.2
dln_hw_noise = 0.2
add_channel_attention = False
add_pos_value = False
two_stage_type = 'standard'
two_stage_pat_embed = 0
two_stage_add_query_num = 0
two_stage_bbox_embed_share = False
two_stage_class_embed_share = False
two_stage_learn_wh = False
two_stage_default_hw = 0.05
two_stage_keep_all_tokens = False
num_select = 300
transformer_activation = 'relu'
batch_norm_type = 'FrozenBatchNorm2d'
masks = False
aux_loss = True
set_cost_class = 2.0
set_cost_bbox = 5.0
set_cost_giou = 2.0
cls_loss_coef = 1.0
mask_loss_coef = 1.0
dice_loss_coef = 1.0
bbox_loss_coef = 5.0
giou_loss_coef = 2.0
enc_loss_coef = 1.0
interm_loss_coef = 1.0
no_interm_box_loss = False
focal_alpha = 0.5                   # Improved for minority classes

decoder_sa_type = 'sa'
matcher_type = 'HungarianMatcher'
decoder_module_seq = ['sa', 'ca', 'ffn']
nms_iou_threshold = -1

dec_pred_bbox_embed_share = True
dec_pred_class_embed_share = True

# for dn
use_dn = True
dn_number = 100
dn_box_noise_scale = 0.4
dn_label_noise_ratio = 0.5
embed_init_tgt = True

match_unstable_error = True

# for ema
use_ema = False
ema_decay = 0.9997
ema_epoch = 0

use_detached_boxes_dec_out = False

data = dict(
    train=dict(
        ann_file='/content/coco_dataset/annotations/instances_train2017.json',
        img_prefix='/content/coco_dataset/train2017/',
    ),
    val=dict(
        ann_file='/content/coco_dataset/annotations/instances_val2017.json',
        img_prefix='/content/coco_dataset/val2017/',
    ),
)

optimizer = dict(
    type='AdamW',
    lr=1e-4,               # Slightly reduced for smaller batch
    weight_decay=1e-4,
    eps=1e-8,
    betas=(0.9, 0.999)
)

optimizer_config = dict(
    grad_clip=dict(max_norm=0.1, norm_type=2),
    accumulate_grad_batches=4      # Increased to compensate for smaller batch
)

lr_config = dict(
    policy='step',
    step=[8, 11],
    gamma=0.1,
    warmup='linear',
    warmup_iters=1000,
    warmup_ratio=0.0001
)

runner = dict(type='EpochBasedRunner', max_epochs=12)
checkpoint_config = dict(interval=2, max_keep_ckpts=3)
evaluation = dict(interval=2, metric='bbox', save_best='auto')

fp16 = dict(loss_scale='dynamic')
"""

with open("configs/dino_swinl_ultra.py", "w") as f:
    f.write(config_content.strip())

# Swin 4 scale

In [None]:
# model = dict(
#     num_classes=18,
#     dn_labelbook_size=19,
#     num_feature_levels=5,
#     # Increase model capacity
#     # hidden_dim=512,         # Increase from default 256
#     # nheads=16,             # Increase attention heads
#     # num_queries=1200,      # More queries for better detection
# )

import os

config_content = """data_aug_scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
data_aug_max_size = 1333
data_aug_scales2_resize = [400, 500, 600]
data_aug_scales2_crop = [384, 600]

data_aug_scale_overlap = None

num_classes=18
dn_labelbook_size=19

lr = 0.0001
param_dict_type = 'default'
lr_backbone = 1e-05
lr_backbone_names = ['backbone.0']
lr_linear_proj_names = ['reference_points', 'sampling_offsets']
lr_linear_proj_mult = 0.1
ddetr_lr_param = False
batch_size = 8
weight_decay = 0.0001
epochs = 12
lr_drop = 11
save_checkpoint_interval = 1
clip_max_norm = 0.1
onecyclelr = False
multi_step_lr = False
lr_drop_list = [33, 45]


modelname = 'dino'
frozen_weights = None
backbone = 'swin_L_384_22k'
use_checkpoint = True

dilation = False
position_embedding = 'sine'
pe_temperatureH = 20
pe_temperatureW = 20
return_interm_indices = [1, 2, 3]
backbone_freeze_keywords = None
enc_layers = 6
dec_layers = 6
unic_layers = 0
pre_norm = False
dim_feedforward = 2048
hidden_dim = 256
dropout = 0.0
nheads = 8
num_queries = 900
query_dim = 4
num_patterns = 0
pdetr3_bbox_embed_diff_each_layer = False
pdetr3_refHW = -1
random_refpoints_xy = False
fix_refpoints_hw = -1
dabdetr_yolo_like_anchor_update = False
dabdetr_deformable_encoder = False
dabdetr_deformable_decoder = False
use_deformable_box_attn = False
box_attn_type = 'roi_align'
dec_layer_number = None
num_feature_levels = 4
enc_n_points = 4
dec_n_points = 4
decoder_layer_noise = False
dln_xy_noise = 0.2
dln_hw_noise = 0.2
add_channel_attention = False
add_pos_value = False
two_stage_type = 'standard'
two_stage_pat_embed = 0
two_stage_add_query_num = 0
two_stage_bbox_embed_share = False
two_stage_class_embed_share = False
two_stage_learn_wh = False
two_stage_default_hw = 0.05
two_stage_keep_all_tokens = False
num_select = 300
transformer_activation = 'relu'
batch_norm_type = 'FrozenBatchNorm2d'
masks = False
aux_loss = True
set_cost_class = 2.0
set_cost_bbox = 5.0
set_cost_giou = 2.0
cls_loss_coef = 1.0
mask_loss_coef = 1.0
dice_loss_coef = 1.0
bbox_loss_coef = 5.0
giou_loss_coef = 2.0
enc_loss_coef = 1.0
interm_loss_coef = 1.0
no_interm_box_loss = False
focal_alpha = 0.25

decoder_sa_type = 'sa' # ['sa', 'ca_label', 'ca_content']
matcher_type = 'HungarianMatcher' # or SimpleMinsumMatcher
decoder_module_seq = ['sa', 'ca', 'ffn']
nms_iou_threshold = -1

dec_pred_bbox_embed_share = True
dec_pred_class_embed_share = True

# for dn
use_dn = True
dn_number = 100
dn_box_noise_scale = 0.4
dn_label_noise_ratio = 0.5
embed_init_tgt = True

match_unstable_error = True

# for ema
use_ema = False
ema_decay = 0.9997
ema_epoch = 0

use_detached_boxes_dec_out = False

data = dict(
    train=dict(
        ann_file='/content/coco_dataset/annotations/instances_train2017.json',
        img_prefix='/content/coco_dataset/train2017/',
    ),
    val=dict(
        ann_file='/content/coco_dataset/annotations/instances_val2017.json',
        img_prefix='/content/coco_dataset/val2017/',
    ),
)

# Scale learning rate with massive batch size (32/4 = 8x original)
optimizer = dict(
    type='AdamW',
    lr=2e-4,               # Scale up significantly
    weight_decay=1e-4,
    eps=1e-8,
    betas=(0.9, 0.999)
)

optimizer_config = dict(
    grad_clip=dict(max_norm=0.1, norm_type=2),
    accumulate_grad_batches=2
)

lr_config = dict(
    policy='step',
    step=[8, 11],         # Adjust schedule for faster convergence
    gamma=0.1,
    warmup='linear',
    warmup_iters=1000,     # More warmup for high LR
    warmup_ratio=0.0001
)

runner = dict(type='EpochBasedRunner', max_epochs=12)
checkpoint_config = dict(interval=2, max_keep_ckpts=3)
evaluation = dict(interval=2, metric='bbox', save_best='auto')

# Mixed precision for efficiency
fp16 = dict(loss_scale='dynamic')
"""

with open("configs/dino_swinl_ultra.py", "w") as f:
    f.write(config_content.strip())

In [None]:
!pip install yapf==0.40.1

[33mDEPRECATION: Loading egg at /usr/local/lib/python3.11/dist-packages/MultiScaleDeformableAttention-1.0-py3.11-linux-x86_64.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330[0m[33m


In [None]:
import torch
from collections import OrderedDict

def inspect_checkpoint_layers(checkpoint_path):
    """
    Loads a PyTorch checkpoint and prints the keys (layer names) and their shapes
    from its state_dict.
    Handles common checkpoint structures.
    """
    try:
        print(f"Loading checkpoint from: {checkpoint_path}")
        checkpoint = torch.load(checkpoint_path, map_location='cpu')

        model_state_dict = None

        if 'model' in checkpoint:
            print("Found 'model' key in checkpoint.")
            model_state_dict = checkpoint['model']
        elif 'state_dict' in checkpoint: # Another common key for model weights
            print("Found 'state_dict' key in checkpoint.")
            model_state_dict = checkpoint['state_dict']
        elif isinstance(checkpoint, OrderedDict) or isinstance(checkpoint, dict):
             # Check if the checkpoint itself is a state_dict
             is_state_dict = True
             if not checkpoint: # Empty dict
                 is_state_dict = False
             else:
                 for k, v in checkpoint.items():
                     if not isinstance(k, str) or not isinstance(v, torch.Tensor):
                         is_state_dict = False
                         break
             if is_state_dict:
                print("Checkpoint appears to be a raw state_dict itself.")
                model_state_dict = checkpoint
             else:
                print("Could not automatically determine the model state_dict.")
                print("Available top-level keys in checkpoint:", list(checkpoint.keys()) if isinstance(checkpoint, dict) else "Checkpoint is not a dict.")
                return
        else:
            print("Checkpoint format not recognized or does not contain model weights in expected keys ('model', 'state_dict').")
            print("Checkpoint type:", type(checkpoint))
            return

        if model_state_dict:
            print(f"\nLayers and shapes found in the model's state_dict:")
            max_key_len = 0
            if model_state_dict.keys(): # Ensure not empty
                max_key_len = max(len(key) for key in model_state_dict.keys())

            for key, value in model_state_dict.items():
                print(f"{key:<{max_key_len}} : {list(value.shape)}")
            print(f"\nTotal number of parameter tensors in state_dict: {len(model_state_dict.keys())}")
        else:
            print("No model state_dict could be extracted from the checkpoint.")

    except Exception as e:
        print(f"Error loading or inspecting checkpoint: {e}")
        import traceback
        traceback.print_exc()

if __name__ == '__main__':
    # --- IMPORTANT: SET THE PATH TO YOUR CHECKPOINT ---
    checkpoint_file = '/content/checkpoint0029_5scale_swin.pth'
    # Example: checkpoint_file = 'path/to/your/checkpoint.pth'

    if checkpoint_file:
        inspect_checkpoint_layers(checkpoint_file)
    else:
        print("Please set the 'checkpoint_file' variable in the script to your checkpoint's path.")

In [None]:
# Copyright (c) 2022 IDEA. All Rights Reserved.
# ------------------------------------------------------------------------
main_2 = """import argparse
import datetime
import json
import random
import time
from pathlib import Path
import os, sys
import numpy as np
import torch
from torch.utils.data import DataLoader, DistributedSampler

from util.get_param_dicts import get_param_dict
from util.logger import setup_logger
from util.slconfig import DictAction, SLConfig
from util.utils import ModelEma, BestMetricHolder
import util.misc as utils
import datasets
from datasets import build_dataset, get_coco_api_from_dataset
from engine import evaluate, train_one_epoch, test


def get_args_parser():
    parser = argparse.ArgumentParser('Set transformer detector', add_help=False)
    parser.add_argument('--config_file', '-c', type=str, required=True)
    parser.add_argument('--options',
                        nargs='+',
                        action=DictAction,
                        help='override some settings in the used config, the key-value pair '
                             'in xxx=yyy format will be merged into config file.')

    # dataset parameters
    parser.add_argument('--dataset_file', default='coco')
    parser.add_argument('--coco_path', type=str, default='/comp_robot/cv_public_dataset/COCO2017/')
    parser.add_argument('--coco_panoptic_path', type=str)
    parser.add_argument('--remove_difficult', action='store_true')
    parser.add_argument('--fix_size', action='store_true')

    # training parameters
    parser.add_argument('--output_dir', default='',
                        help='path where to save, empty for no saving')
    parser.add_argument('--note', default='',
                        help='add some notes to the experiment')
    parser.add_argument('--device', default='cuda',
                        help='device to use for training / testing')
    parser.add_argument('--seed', default=42, type=int)
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--pretrain_model_path', help='load from other checkpoint')
    parser.add_argument('--finetune_ignore', type=str, nargs='+')
    parser.add_argument('--start_epoch', default=0, type=int, metavar='N',
                        help='start epoch')
    parser.add_argument('--eval', action='store_true')
    parser.add_argument('--num_workers', default=10, type=int)
    parser.add_argument('--test', action='store_true')
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--find_unused_params', action='store_true')
    parser.add_argument('--save_results', action='store_true')
    parser.add_argument('--save_log', action='store_true')

    # distributed training parameters
    parser.add_argument('--world_size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
    parser.add_argument('--rank', default=0, type=int,
                        help='number of distributed processes')
    parser.add_argument("--local_rank", type=int, help='local rank for DistributedDataParallel')
    parser.add_argument('--amp', action='store_true',
                        help="Train with mixed precision")

    return parser


def build_model_main(args):
    # we use register to maintain models from catdet6 on.
    from models.registry import MODULE_BUILD_FUNCS
    assert args.modelname in MODULE_BUILD_FUNCS._module_dict, f"Model {args.modelname} not found in registry."
    build_func = MODULE_BUILD_FUNCS.get(args.modelname)
    model, criterion, postprocessors = build_func(args)
    return model, criterion, postprocessors


def main(args):
    utils.init_distributed_mode(args)
    # load cfg file and update the args
    print("Loading config file from {}".format(args.config_file))
    time.sleep(args.rank * 0.02)
    cfg = SLConfig.fromfile(args.config_file)
    if args.options is not None:
        cfg.merge_from_dict(args.options)

    if args.rank == 0:
        save_cfg_path = os.path.join(args.output_dir, "config_cfg.py")
        cfg.dump(save_cfg_path)
        save_json_path = os.path.join(args.output_dir, "config_args_raw.json")
        with open(save_json_path, 'w') as f:
            json.dump(vars(args), f, indent=2)

    cfg_dict = cfg._cfg_dict.to_dict()
    args_vars = vars(args)
    for k, v in cfg_dict.items():
        if k not in args_vars:
            setattr(args, k, v)
        else:
            # Allow overriding if the key is already in args from command line
            # This behavior might be intended if options are meant to override direct args too
            # Original code raised ValueError, but often overriding is desired.
            # For safety, let's log if an arg is being overridden by config.
            if getattr(args, k) != v and args.rank == 0: # Check if different and log only on main process
                 print(f"INFO: Argument '{k}' from command line ({getattr(args, k)}) is being kept, value from config file ({v}) is ignored.")
            # If strict "args only" is needed, uncomment the ValueError:
            # raise ValueError("Key {} can used by args only".format(k))
            pass # Default behavior now is command-line args take precedence if already set.

    # update some new args temporally
    if not getattr(args, 'use_ema', None):
        args.use_ema = False
    if not getattr(args, 'debug', None):
        args.debug = False

    # setup logger
    os.makedirs(args.output_dir, exist_ok=True)
    logger = setup_logger(output=os.path.join(args.output_dir, 'info.txt'), distributed_rank=args.rank, color=False,
                          name="DINO") # Changed name for clarity
    logger.info("git:\\n  {}\\n".format(utils.get_sha()))
    logger.info("Command: " + ' '.join(sys.argv))
    if args.rank == 0:
        save_json_path = os.path.join(args.output_dir, "config_args_all.json")
        with open(save_json_path, 'w') as f:
            json.dump(vars(args), f, indent=2)
        logger.info("Full config saved to {}".format(save_json_path))

    logger.info('world size: {}'.format(args.world_size))
    logger.info('rank: {}'.format(args.rank))
    logger.info('local_rank: {}'.format(args.local_rank))
    logger.info("args: " + str(args) + '\\n')

    if args.frozen_weights is not None:
        assert args.masks, "Frozen training is meant for segmentation only"

    print(args) # This is already printing args, good for quick check

    device = torch.device(args.device)

    # fix the seed for reproducibility
    seed = args.seed + utils.get_rank()
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    # --- DEBUG PRINTS FOR MODEL BUILDING ---
    logger.info(f"DEBUG main.py: Args before build_model_main: num_classes={args.num_classes}, "
                f"dn_labelbook_size={args.dn_labelbook_size}, use_dn={getattr(args, 'use_dn', 'Not SetInArgs')}, "
                f"dn_number={getattr(args, 'dn_number', 'Not SetInArgs')}")
    # --- END DEBUG PRINTS ---

    model, criterion, postprocessors = build_model_main(args)
    wo_class_error = False # What is this? Defaulting to False seems standard.
    model.to(device)

    # ema
    if args.use_ema:
        ema_m = ModelEma(model, args.ema_decay)
    else:
        ema_m = None

    model_without_ddp = model
    if args.distributed:
        model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu],
                                                          find_unused_parameters=args.find_unused_params)
        model_without_ddp = model.module

    # --- DEBUG PRINTS FOR MODEL AND CRITERION ---
    logger.info("DEBUG main.py: Model and Criterion built.")
    if hasattr(criterion, 'num_classes'):
        logger.info(f"DEBUG main.py: Criterion num_classes: {criterion.num_classes}")
    else:
        logger.info("DEBUG main.py: Criterion does not have a direct 'num_classes' attribute to inspect here.")

    # Inspecting model's class-specific layers
    m_to_inspect = model_without_ddp
    if hasattr(m_to_inspect, 'class_embed') and m_to_inspect.class_embed is not None:
        if hasattr(m_to_inspect.class_embed, 'weight'): # Single class_embed layer
             logger.info(f"DEBUG main.py: Model class_embed weight shape: {m_to_inspect.class_embed.weight.shape}")
        elif isinstance(m_to_inspect.class_embed, torch.nn.ModuleList) and len(m_to_inspect.class_embed) > 0: # DINO uses ModuleList
            if hasattr(m_to_inspect.class_embed[-1], 'out_proj') and hasattr(m_to_inspect.class_embed[-1].out_proj, 'weight'): # Common DINO structure
                 logger.info(f"DEBUG main.py: Model final class_embed layer (out_proj) weight shape: {m_to_inspect.class_embed[-1].out_proj.weight.shape}")
            elif hasattr(m_to_inspect.class_embed[-1], 'weight'):
                 logger.info(f"DEBUG main.py: Model final class_embed layer weight shape: {m_to_inspect.class_embed[-1].weight.shape}")
            else:
                 logger.info(f"DEBUG main.py: Model final class_embed layer structure not immediately parsable for weight shape.")
        else:
            logger.info(f"DEBUG main.py: Model class_embed found but structure not immediately parsable for weight shape.")

    if hasattr(m_to_inspect, 'label_enc') and m_to_inspect.label_enc is not None and hasattr(m_to_inspect.label_enc, 'weight'): # For DN label encoder
        logger.info(f"DEBUG main.py: Model label_enc weight shape: {m_to_inspect.label_enc.weight.shape}")
    else:
        logger.info(f"DEBUG main.py: Model label_enc (for DN) not found or has no weight attribute.")
    # --- END DEBUG PRINTS ---


    n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
    logger.info('number of params:' + str(n_parameters))
    # logger.info("params:\\n"+json.dumps({n: p.numel() for n, p in model.named_parameters() if p.requires_grad}, indent=2)) # Can be very verbose

    param_dicts = get_param_dict(args, model_without_ddp)
    optimizer = torch.optim.AdamW(param_dicts, lr=args.lr,
                                  weight_decay=args.weight_decay)

    dataset_train = build_dataset(image_set='train', args=args)
    dataset_val = build_dataset(image_set='val', args=args)

    if args.distributed:
        sampler_train = DistributedSampler(dataset_train)
        sampler_val = DistributedSampler(dataset_val, shuffle=False)
    else:
        sampler_train = torch.utils.data.RandomSampler(dataset_train)
        sampler_val = torch.utils.data.SequentialSampler(dataset_val)

    batch_sampler_train = torch.utils.data.BatchSampler(
        sampler_train, args.batch_size, drop_last=True)

    data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train,
                                   collate_fn=utils.collate_fn, num_workers=args.num_workers, pin_memory=True) # Added pin_memory
    data_loader_val = DataLoader(dataset_val, 1, sampler=sampler_val, # Val batch_size often 1
                                 drop_last=False, collate_fn=utils.collate_fn, num_workers=args.num_workers, pin_memory=True) # Added pin_memory


    if args.onecyclelr:
        lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=args.lr, steps_per_epoch=len(data_loader_train), epochs=args.epochs, pct_start=0.2)
    elif args.multi_step_lr:
        lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=args.lr_drop_list)
    else: # Default step LR
        lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop if hasattr(args, 'lr_drop') else args.epochs // 2 ) # Safer default for lr_drop
        # Ensure lr_drop is less than total epochs
        if hasattr(args, 'lr_drop') and args.lr_drop >= args.epochs and args.rank == 0:
            logger.warning(f"lr_drop ({args.lr_drop}) is >= total epochs ({args.epochs}). LR will not drop with StepLR.")


    if args.dataset_file == "coco_panoptic":
        # We also evaluate AP during panoptic training, on original coco DS
        coco_val = datasets.coco.build("val", args)
        base_ds = get_coco_api_from_dataset(coco_val)
    else:
        base_ds = get_coco_api_from_dataset(dataset_val)

    if args.frozen_weights is not None:
        checkpoint = torch.load(args.frozen_weights, map_location='cpu')
        model_without_ddp.detr.load_state_dict(checkpoint['model'], strict=False)

    output_dir = Path(args.output_dir)
    if os.path.exists(os.path.join(args.output_dir, 'checkpoint.pth')): # Resume from output_dir's checkpoint if it exists
        args.resume = os.path.join(args.output_dir, 'checkpoint.pth')
        logger.info(f"Found checkpoint.pth in output_dir, setting resume to: {args.resume}")


    if args.resume:
        if args.resume.startswith('https'):
            checkpoint = torch.hub.load_state_dict_from_url(
                args.resume, map_location='cpu', check_hash=True)
        else:
            checkpoint = torch.load(args.resume, map_location='cpu')
        model_without_ddp.load_state_dict(checkpoint['model'], strict=False)
        if args.use_ema:
            if 'ema_model' in checkpoint:
                logger.info("Loading EMA model from resume checkpoint")
                ema_m.module.load_state_dict(utils.clean_state_dict(checkpoint['ema_model']))
            else:
                logger.warning("No EMA model in resume checkpoint, reinitializing EMA.")
                del ema_m
                ema_m = ModelEma(model, args.ema_decay) # Reinitialize if not in checkpoint

        if not args.eval and 'optimizer' in checkpoint and 'lr_scheduler' in checkpoint and 'epoch' in checkpoint:
            optimizer.load_state_dict(checkpoint['optimizer'])
            lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
            args.start_epoch = checkpoint['epoch'] + 1
            logger.info(f"Resuming training from epoch {args.start_epoch}")


    if (not args.resume or args.eval) and args.pretrain_model_path: # Load pretrain if not resuming for training or if evaluating
        logger.info(f"Loading pretrain model from {args.pretrain_model_path}")
        checkpoint = torch.load(args.pretrain_model_path, map_location='cpu')

        # Handle cases where checkpoint might be just the model state_dict or a dict containing 'model'
        if 'model' in checkpoint:
            model_state_dict = checkpoint['model']
        elif 'state_dict' in checkpoint: # Another common key
            model_state_dict = checkpoint['state_dict']
        else: # Assume the checkpoint itself is the state_dict
            model_state_dict = checkpoint
            logger.info("Pretrain checkpoint does not have 'model' or 'state_dict' key, assuming it's the state_dict itself.")

        from collections import OrderedDict
        _ignorekeywordlist = args.finetune_ignore if args.finetune_ignore else []
        ignorelist = [] # Stores actually ignored keys

        def check_keep(keyname, ignorekeywordlist):
            for keyword in ignorekeywordlist:
                if keyword in keyname:
                    ignorelist.append(keyname) # Log the key that was ignored
                    return False
            return True

        logger.info(f"Attempting to load weights, ignoring keywords: {_ignorekeywordlist}")

        # Clean state dict (e.g., remove 'module.' prefix)
        cleaned_state_dict = utils.clean_state_dict(model_state_dict)

        # Filter state dict
        _tmp_st = OrderedDict({k: v for k, v in cleaned_state_dict.items() if check_keep(k, _ignorekeywordlist)})

        #############################################################################################################
        if 'transformer.tgt_embed.weight' in _tmp_st:
            old_embed = _tmp_st['transformer.tgt_embed.weight']

            # Try multiple ways to get num_queries dynamically
            current_num_queries = None

            # Method 1: From global variable (if available in main.py)
            if 'num_queries' in globals():
                current_num_queries = num_queries
                logger.info(f"num_queries set to global variable: {current_num_queries}")

            # Method 2: From model's current state
            elif hasattr(model_without_ddp, 'transformer') and hasattr(model_without_ddp.transformer, 'tgt_embed'):
                current_num_queries = model_without_ddp.transformer.tgt_embed.weight.shape[0]
                logger.info(f"num_queries set to global variable: {current_num_queries}")

            # Method 3: From args
            elif hasattr(args, 'num_queries'):
                current_num_queries = args.num_queries
                logger.info(f"num_queries set to global variable: {current_num_queries}")

            # Fallback: Use old size (no change)
            else:
                current_num_queries = old_embed.shape[0]
                logger.info("Warning: Could not determine num_queries, using checkpoint size")
                logger.info(f"num_queries set to global variable: {current_num_queries}")

            embed_dim = old_embed.shape[1]
            new_embed = torch.zeros(current_num_queries, embed_dim)

            # Keep original embeddings and initialize new ones
            min_queries = min(old_embed.shape[0], current_num_queries)
            new_embed[:min_queries] = old_embed[:min_queries]

            if current_num_queries > old_embed.shape[0]:
                additional_queries = current_num_queries - old_embed.shape[0]
                new_embed[old_embed.shape[0]:] = torch.randn(additional_queries, embed_dim) * 0.02
                logger.info(f"Dynamically resized: Preserved {old_embed.shape[0]} embeddings, initialized {additional_queries} new ones")

            _tmp_st['transformer.tgt_embed.weight'] = new_embed
        #############################################################################################################

        logger.info("Keys ignored due to finetune_ignore: {}".format(json.dumps(sorted(list(set(ignorelist))), indent=2)))

        # Log missing keys in the model that are not in the filtered checkpoint
        model_keys = set(model_without_ddp.state_dict().keys())
        checkpoint_keys = set(_tmp_st.keys())
        missing_in_checkpoint = list(model_keys - checkpoint_keys)
        unexpected_in_checkpoint = list(checkpoint_keys - model_keys)

        logger.info(f"Keys in model but not in (filtered) checkpoint (will be randomly initialized if not ignored): {json.dumps(sorted(missing_in_checkpoint), indent=2)}")
        logger.info(f"Keys in (filtered) checkpoint but not in model (will be ignored by load_state_dict): {json.dumps(sorted(unexpected_in_checkpoint), indent=2)}")

        _load_output = model_without_ddp.load_state_dict(_tmp_st, strict=False)
        logger.info(f"load_state_dict output (missing_keys, unexpected_keys): {str(_load_output)}")

        # For EMA, if pretraining and EMA is used, should ideally also load EMA weights if available,
        # but typically pretrain_model_path refers to non-EMA weights.
        # Re-initializing EMA here is safer if not resuming EMA state.
        if args.use_ema and not args.resume : # If not resuming an EMA state
            logger.info("Re-initializing EMA after loading pretrain_model_path as EMA state was not loaded from pretrain.")
            if ema_m is not None:
                del ema_m
            ema_m = ModelEma(model, args.ema_decay)


    if args.eval:
        os.environ['EVAL_FLAG'] = 'TRUE'
        test_stats, coco_evaluator = evaluate(model, criterion, postprocessors,
                                              data_loader_val, base_ds, device, args.output_dir,
                                              wo_class_error=wo_class_error, args=args, logger=logger) # Pass logger
        if args.output_dir and coco_evaluator is not None and hasattr(coco_evaluator, 'coco_eval') and "bbox" in coco_evaluator.coco_eval:
            utils.save_on_master(coco_evaluator.coco_eval["bbox"].eval, output_dir / "eval.pth")

        log_stats = {**{f'test_{k}': v for k, v in test_stats.items()}}
        if args.output_dir and utils.is_main_process():
            with (output_dir / "log.txt").open("a") as f:
                f.write(json.dumps(log_stats) + "\\n")
        return

    print("Start training")
    start_time = time.time()
    best_map_holder = BestMetricHolder(use_ema=args.use_ema)

    # --- DEBUG NOTE ---
    logger.info("DEBUG main.py: For detailed DN loss debugging (why they might be zero), "
                "consider adding print statements inside 'engine.py:train_one_epoch' to inspect: "
                "1. The 'targets' list/dict for DN components before passing to criterion. "
                "2. The 'outputs' from the model, especially parts relevant to DN. "
                "3. The individual DN loss values calculated inside the criterion's forward method.")
    # --- END DEBUG NOTE ---

    for epoch in range(args.start_epoch, args.epochs):
        epoch_start_time = time.time()
        if args.distributed:
            sampler_train.set_epoch(epoch)

        train_stats = train_one_epoch(
            model, criterion, data_loader_train, optimizer, device, epoch,
            args.clip_max_norm, wo_class_error=wo_class_error, lr_scheduler=lr_scheduler, args=args,
            logger=(logger if args.save_log else None), ema_m=ema_m)

        if args.output_dir:
            checkpoint_paths = [output_dir / 'checkpoint.pth'] # Always save the latest
            # extra checkpoint before LR drop and every N epochs
            if hasattr(args, 'lr_drop') and (epoch + 1) % args.lr_drop == 0 :
                 checkpoint_paths.append(output_dir / f'checkpoint_lr_drop_epoch{epoch:04}.pth')
            if hasattr(args, 'save_checkpoint_interval') and (epoch + 1) % args.save_checkpoint_interval == 0:
                checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')

            # De-duplicate paths if multiple conditions met
            checkpoint_paths = sorted(list(set(checkpoint_paths)))

            for checkpoint_path in checkpoint_paths:
                weights = {
                    'model': model_without_ddp.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'lr_scheduler': lr_scheduler.state_dict(),
                    'epoch': epoch,
                    'args': args, # Save args for reproducibility
                }
                if args.use_ema:
                    weights.update({
                        'ema_model': ema_m.module.state_dict(),
                    })
                utils.save_on_master(weights, checkpoint_path)

        if not args.onecyclelr: # StepLR and MultiStepLR are stepped per epoch
            lr_scheduler.step()

        test_stats, coco_evaluator = evaluate(
            model, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir,
            wo_class_error=wo_class_error, args=args, logger=(logger if args.save_log else None)
        )

        map_regular = test_stats['coco_eval_bbox'][0] if 'coco_eval_bbox' in test_stats and test_stats['coco_eval_bbox'] else 0.0
        _isbest = best_map_holder.update(map_regular, epoch, is_ema=False)
        if _isbest:
            checkpoint_path = output_dir / 'checkpoint_best_regular.pth'
            utils.save_on_master({
                'model': model_without_ddp.state_dict(),
                # also save optimizer and scheduler?
                'epoch': epoch,
                'args': args,
            }, checkpoint_path)


        log_stats = {
            **{f'train_{k}': v for k, v in train_stats.items()},
            **{f'test_{k}': v for k, v in test_stats.items()},
        }

        if args.use_ema:
            ema_test_stats, ema_coco_evaluator = evaluate(
                ema_m.module, criterion, postprocessors, data_loader_val, base_ds, device, args.output_dir,
                wo_class_error=wo_class_error, args=args, logger=(logger if args.save_log else None)
            )
            log_stats.update({f'ema_test_{k}': v for k, v in ema_test_stats.items()})
            map_ema = ema_test_stats['coco_eval_bbox'][0] if 'coco_eval_bbox' in ema_test_stats and ema_test_stats['coco_eval_bbox'] else 0.0
            _isbest_ema = best_map_holder.update(map_ema, epoch, is_ema=True)
            if _isbest_ema:
                checkpoint_path = output_dir / 'checkpoint_best_ema.pth'
                utils.save_on_master({
                    'model': ema_m.module.state_dict(),
                    'epoch': epoch,
                    'args': args,
                }, checkpoint_path)

        log_stats.update(best_map_holder.summary())

        ep_paras = {
            'epoch': epoch,
            'n_parameters': n_parameters
        }
        log_stats.update(ep_paras)
        try:
            log_stats.update({'now_time': str(datetime.datetime.now())})
        except:
            pass

        epoch_time = time.time() - epoch_start_time
        epoch_time_str = str(datetime.timedelta(seconds=int(epoch_time)))
        log_stats['epoch_time'] = epoch_time_str

        if args.output_dir and utils.is_main_process():
            with (output_dir / "log.txt").open("a") as f:
                f.write(json.dumps(log_stats) + "\\n")

            if coco_evaluator is not None:
                eval_dir = output_dir / 'eval'
                eval_dir.mkdir(exist_ok=True)
                if "bbox" in coco_evaluator.coco_eval:
                    filenames = [eval_dir / 'latest.pth']
                    if epoch % 50 == 0: # Save historical eval results less frequently
                        filenames.append(eval_dir / f'{epoch:03}.pth')
                    for name in filenames:
                        torch.save(coco_evaluator.coco_eval["bbox"].eval, name)

    total_time = time.time() - start_time
    total_time_str = str(datetime.timedelta(seconds=int(total_time)))
    print('Training time {}'.format(total_time_str))

    # remove the copied files.
    copyfilelist = vars(args).get('copyfilelist')
    if copyfilelist and args.local_rank == 0 :
        from datasets.data_util import remove_list_of_files # Assuming a function that can remove a list
        print(f"Removing copied files: {copyfilelist}")
        remove_list_of_files(copyfilelist) # Adapt if your util is different


if __name__ == '__main__':
    parser = argparse.ArgumentParser('DINO training and evaluation script', parents=[get_args_parser()]) # Updated description
    args = parser.parse_args()
    if args.output_dir:
        Path(args.output_dir).mkdir(parents=True, exist_ok=True)
    main(args)
"""

with open("/content/DINO/main_2.py", "w") as f:
    f.write(main_2.strip())

!ls /content/DINO

config				   main_2.py	     run_with_submitit.py
datasets			   main.py	     scripts
engine.py			   models	     tools
figs				   __pycache__	     util
inference_and_visualization.ipynb  README.md
LICENSE				   requirements.txt


In [None]:
# To fix this, you need to edit the file /usr/local/lib/python3.11/dist-packages/pycocotools/cocoeval.py at line 378 and
# change np.float to float or np.float64 as suggested by the error message.

new_f = """__author__ = 'tsungyi'

import numpy as np
import datetime
import time
from collections import defaultdict
from . import mask as maskUtils
import copy

class COCOeval:
    # Interface for evaluating detection on the Microsoft COCO dataset.
    #
    # The usage for CocoEval is as follows:
    #  cocoGt=..., cocoDt=...       # load dataset and results
    #  E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
    #  E.params.recThrs = ...;      # set parameters as desired
    #  E.evaluate();                # run per image evaluation
    #  E.accumulate();              # accumulate per image results
    #  E.summarize();               # display summary metrics of results
    # For example usage see evalDemo.m and http://mscoco.org/.
    #
    # The evaluation parameters are as follows (defaults in brackets):
    #  imgIds     - [all] N img ids to use for evaluation
    #  catIds     - [all] K cat ids to use for evaluation
    #  iouThrs    - [.5:.05:.95] T=10 IoU thresholds for evaluation
    #  recThrs    - [0:.01:1] R=101 recall thresholds for evaluation
    #  areaRng    - [...] A=4 object area ranges for evaluation
    #  maxDets    - [1 10 100] M=3 thresholds on max detections per image
    #  iouType    - ['segm'] set iouType to 'segm', 'bbox' or 'keypoints'
    #  iouType replaced the now DEPRECATED useSegm parameter.
    #  useCats    - [1] if true use category labels for evaluation
    # Note: if useCats=0 category labels are ignored as in proposal scoring.
    # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
    #
    # evaluate(): evaluates detections on every image and every category and
    # concats the results into the "evalImgs" with fields:
    #  dtIds      - [1xD] id for each of the D detections (dt)
    #  gtIds      - [1xG] id for each of the G ground truths (gt)
    #  dtMatches  - [TxD] matching gt id at each IoU or 0
    #  gtMatches  - [TxG] matching dt id at each IoU or 0
    #  dtScores   - [1xD] confidence of each dt
    #  gtIgnore   - [1xG] ignore flag for each gt
    #  dtIgnore   - [TxD] ignore flag for each dt at each IoU
    #
    # accumulate(): accumulates the per-image, per-category evaluation
    # results in "evalImgs" into the dictionary "eval" with fields:
    #  params     - parameters used for evaluation
    #  date       - date evaluation was performed
    #  counts     - [T,R,K,A,M] parameter dimensions (see above)
    #  precision  - [TxRxKxAxM] precision for every evaluation setting
    #  recall     - [TxKxAxM] max recall for every evaluation setting
    # Note: precision and recall==-1 for settings with no gt objects.
    #
    # See also coco, mask, pycocoDemo, pycocoEvalDemo
    #
    # Microsoft COCO Toolbox.      version 2.0
    # Data, paper, and tutorials available at:  http://mscoco.org/
    # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
    # Licensed under the Simplified BSD License [see coco/license.txt]
    def __init__(self, cocoGt=None, cocoDt=None, iouType='segm'):
        '''
        Initialize CocoEval using coco APIs for gt and dt
        :param cocoGt: coco object with ground truth annotations
        :param cocoDt: coco object with detection results
        :return: None
        '''
        if not iouType:
            print('iouType not specified. use default iouType segm')
        self.cocoGt   = cocoGt              # ground truth COCO API
        self.cocoDt   = cocoDt              # detections COCO API
        self.evalImgs = defaultdict(list)   # per-image per-category evaluation results [KxAxI] elements
        self.eval     = {}                  # accumulated evaluation results
        self._gts = defaultdict(list)       # gt for evaluation
        self._dts = defaultdict(list)       # dt for evaluation
        self.params = Params(iouType=iouType) # parameters
        self._paramsEval = {}               # parameters for evaluation
        self.stats = []                     # result summarization
        self.ious = {}                      # ious between all gts and dts
        if not cocoGt is None:
            self.params.imgIds = sorted(cocoGt.getImgIds())
            self.params.catIds = sorted(cocoGt.getCatIds())


    def _prepare(self):
        '''
        Prepare ._gts and ._dts for evaluation based on params
        :return: None
        '''
        def _toMask(anns, coco):
            # modify ann['segmentation'] by reference
            for ann in anns:
                rle = coco.annToRLE(ann)
                ann['segmentation'] = rle
        p = self.params
        if p.useCats:
            gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
            dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
        else:
            gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
            dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))

        # convert ground truth to mask if iouType == 'segm'
        if p.iouType == 'segm':
            _toMask(gts, self.cocoGt)
            _toMask(dts, self.cocoDt)
        # set ignore flag
        for gt in gts:
            gt['ignore'] = gt['ignore'] if 'ignore' in gt else 0
            gt['ignore'] = 'iscrowd' in gt and gt['iscrowd']
            if p.iouType == 'keypoints':
                gt['ignore'] = (gt['num_keypoints'] == 0) or gt['ignore']
        self._gts = defaultdict(list)       # gt for evaluation
        self._dts = defaultdict(list)       # dt for evaluation
        for gt in gts:
            self._gts[gt['image_id'], gt['category_id']].append(gt)
        for dt in dts:
            self._dts[dt['image_id'], dt['category_id']].append(dt)
        self.evalImgs = defaultdict(list)   # per-image per-category evaluation results
        self.eval     = {}                  # accumulated evaluation results

    def evaluate(self):
        '''
        Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
        :return: None
        '''
        tic = time.time()
        print('Running per image evaluation...')
        p = self.params
        # add backward compatibility if useSegm is specified in params
        if not p.useSegm is None:
            p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
            print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
        print('Evaluate annotation type *{}*'.format(p.iouType))
        p.imgIds = list(np.unique(p.imgIds))
        if p.useCats:
            p.catIds = list(np.unique(p.catIds))
        p.maxDets = sorted(p.maxDets)
        self.params=p

        self._prepare()
        # loop through images, area range, max detection number
        catIds = p.catIds if p.useCats else [-1]

        if p.iouType == 'segm' or p.iouType == 'bbox':
            computeIoU = self.computeIoU
        elif p.iouType == 'keypoints':
            computeIoU = self.computeOks
        self.ious = {(imgId, catId): computeIoU(imgId, catId) \
                        for imgId in p.imgIds
                        for catId in catIds}

        evaluateImg = self.evaluateImg
        maxDet = p.maxDets[-1]
        self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
                 for catId in catIds
                 for areaRng in p.areaRng
                 for imgId in p.imgIds
             ]
        self._paramsEval = copy.deepcopy(self.params)
        toc = time.time()
        print('DONE (t={:0.2f}s).'.format(toc-tic))

    def computeIoU(self, imgId, catId):
        p = self.params
        if p.useCats:
            gt = self._gts[imgId,catId]
            dt = self._dts[imgId,catId]
        else:
            gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
            dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
        if len(gt) == 0 and len(dt) ==0:
            return []
        inds = np.argsort([-d['score'] for d in dt], kind='mergesort')
        dt = [dt[i] for i in inds]
        if len(dt) > p.maxDets[-1]:
            dt=dt[0:p.maxDets[-1]]

        if p.iouType == 'segm':
            g = [g['segmentation'] for g in gt]
            d = [d['segmentation'] for d in dt]
        elif p.iouType == 'bbox':
            g = [g['bbox'] for g in gt]
            d = [d['bbox'] for d in dt]
        else:
            raise Exception('unknown iouType for iou computation')

        # compute iou between each dt and gt region
        iscrowd = [int(o['iscrowd']) for o in gt]
        ious = maskUtils.iou(d,g,iscrowd)
        return ious

    def computeOks(self, imgId, catId):
        p = self.params
        # dimention here should be Nxm
        gts = self._gts[imgId, catId]
        dts = self._dts[imgId, catId]
        inds = np.argsort([-d['score'] for d in dts], kind='mergesort')
        dts = [dts[i] for i in inds]
        if len(dts) > p.maxDets[-1]:
            dts = dts[0:p.maxDets[-1]]
        # if len(gts) == 0 and len(dts) == 0:
        if len(gts) == 0 or len(dts) == 0:
            return []
        ious = np.zeros((len(dts), len(gts)))
        sigmas = p.kpt_oks_sigmas
        vars = (sigmas * 2)**2
        k = len(sigmas)
        # compute oks between each detection and ground truth object
        for j, gt in enumerate(gts):
            # create bounds for ignore regions(double the gt bbox)
            g = np.array(gt['keypoints'])
            xg = g[0::3]; yg = g[1::3]; vg = g[2::3]
            k1 = np.count_nonzero(vg > 0)
            bb = gt['bbox']
            x0 = bb[0] - bb[2]; x1 = bb[0] + bb[2] * 2
            y0 = bb[1] - bb[3]; y1 = bb[1] + bb[3] * 2
            for i, dt in enumerate(dts):
                d = np.array(dt['keypoints'])
                xd = d[0::3]; yd = d[1::3]
                if k1>0:
                    # measure the per-keypoint distance if keypoints visible
                    dx = xd - xg
                    dy = yd - yg
                else:
                    # measure minimum distance to keypoints in (x0,y0) & (x1,y1)
                    z = np.zeros((k))
                    dx = np.max((z, x0-xd),axis=0)+np.max((z, xd-x1),axis=0)
                    dy = np.max((z, y0-yd),axis=0)+np.max((z, yd-y1),axis=0)
                e = (dx**2 + dy**2) / vars / (gt['area']+np.spacing(1)) / 2
                if k1 > 0:
                    e=e[vg > 0]
                ious[i, j] = np.sum(np.exp(-e)) / e.shape[0]
        return ious

    def evaluateImg(self, imgId, catId, aRng, maxDet):
        '''
        perform evaluation for single category and image
        :return: dict (single image results)
        '''
        p = self.params
        if p.useCats:
            gt = self._gts[imgId,catId]
            dt = self._dts[imgId,catId]
        else:
            gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
            dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
        if len(gt) == 0 and len(dt) ==0:
            return None

        for g in gt:
            if g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
                g['_ignore'] = 1
            else:
                g['_ignore'] = 0

        # sort dt highest score first, sort gt ignore last
        gtind = np.argsort([g['_ignore'] for g in gt], kind='mergesort')
        gt = [gt[i] for i in gtind]
        dtind = np.argsort([-d['score'] for d in dt], kind='mergesort')
        dt = [dt[i] for i in dtind[0:maxDet]]
        iscrowd = [int(o['iscrowd']) for o in gt]
        # load computed ious
        ious = self.ious[imgId, catId][:, gtind] if len(self.ious[imgId, catId]) > 0 else self.ious[imgId, catId]

        T = len(p.iouThrs)
        G = len(gt)
        D = len(dt)
        gtm  = np.zeros((T,G))
        dtm  = np.zeros((T,D))
        gtIg = np.array([g['_ignore'] for g in gt])
        dtIg = np.zeros((T,D))
        if not len(ious)==0:
            for tind, t in enumerate(p.iouThrs):
                for dind, d in enumerate(dt):
                    # information about best match so far (m=-1 -> unmatched)
                    iou = min([t,1-1e-10])
                    m   = -1
                    for gind, g in enumerate(gt):
                        # if this gt already matched, and not a crowd, continue
                        if gtm[tind,gind]>0 and not iscrowd[gind]:
                            continue
                        # if dt matched to reg gt, and on ignore gt, stop
                        if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
                            break
                        # continue to next gt unless better match made
                        if ious[dind,gind] < iou:
                            continue
                        # if match successful and best so far, store appropriately
                        iou=ious[dind,gind]
                        m=gind
                    # if match made store id of match for both dt and gt
                    if m ==-1:
                        continue
                    dtIg[tind,dind] = gtIg[m]
                    dtm[tind,dind]  = gt[m]['id']
                    gtm[tind,m]     = d['id']
        # set unmatched detections outside of area range to ignore
        a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
        dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
        # store results for given image and category
        return {
                'image_id':     imgId,
                'category_id':  catId,
                'aRng':         aRng,
                'maxDet':       maxDet,
                'dtIds':        [d['id'] for d in dt],
                'gtIds':        [g['id'] for g in gt],
                'dtMatches':    dtm,
                'gtMatches':    gtm,
                'dtScores':     [d['score'] for d in dt],
                'gtIgnore':     gtIg,
                'dtIgnore':     dtIg,
            }

    def accumulate(self, p = None):
        '''
        Accumulate per image evaluation results and store the result in self.eval
        :param p: input params for evaluation
        :return: None
        '''
        print('Accumulating evaluation results...')
        tic = time.time()
        if not self.evalImgs:
            print('Please run evaluate() first')
        # allows input customized parameters
        if p is None:
            p = self.params
        p.catIds = p.catIds if p.useCats == 1 else [-1]
        T           = len(p.iouThrs)
        R           = len(p.recThrs)
        K           = len(p.catIds) if p.useCats else 1
        A           = len(p.areaRng)
        M           = len(p.maxDets)
        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
        recall      = -np.ones((T,K,A,M))
        scores      = -np.ones((T,R,K,A,M))

        # create dictionary for future indexing
        _pe = self._paramsEval
        catIds = _pe.catIds if _pe.useCats else [-1]
        setK = set(catIds)
        setA = set(map(tuple, _pe.areaRng))
        setM = set(_pe.maxDets)
        setI = set(_pe.imgIds)
        # get inds to evaluate
        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
        I0 = len(_pe.imgIds)
        A0 = len(_pe.areaRng)
        # retrieve E at each category, area range, and max number of detections
        for k, k0 in enumerate(k_list):
            Nk = k0*A0*I0
            for a, a0 in enumerate(a_list):
                Na = a0*I0
                for m, maxDet in enumerate(m_list):
                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
                    E = [e for e in E if not e is None]
                    if len(E) == 0:
                        continue
                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])

                    # different sorting method generates slightly different results.
                    # mergesort is used to be consistent as Matlab implementation.
                    inds = np.argsort(-dtScores, kind='mergesort')
                    dtScoresSorted = dtScores[inds]

                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
                    npig = np.count_nonzero(gtIg==0 )
                    if npig == 0:
                        continue
                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )

                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float64)
                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float64)
                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
                        tp = np.array(tp)
                        fp = np.array(fp)
                        nd = len(tp)
                        rc = tp / npig
                        pr = tp / (fp+tp+np.spacing(1))
                        q  = np.zeros((R,))
                        ss = np.zeros((R,))

                        if nd:
                            recall[t,k,a,m] = rc[-1]
                        else:
                            recall[t,k,a,m] = 0

                        # numpy is slow without cython optimization for accessing elements
                        # use python array gets significant speed improvement
                        pr = pr.tolist(); q = q.tolist()

                        for i in range(nd-1, 0, -1):
                            if pr[i] > pr[i-1]:
                                pr[i-1] = pr[i]

                        inds = np.searchsorted(rc, p.recThrs, side='left')
                        try:
                            for ri, pi in enumerate(inds):
                                q[ri] = pr[pi]
                                ss[ri] = dtScoresSorted[pi]
                        except:
                            pass
                        precision[t,:,k,a,m] = np.array(q)
                        scores[t,:,k,a,m] = np.array(ss)
        self.eval = {
            'params': p,
            'counts': [T, R, K, A, M],
            'date': datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
            'precision': precision,
            'recall':   recall,
            'scores': scores,
        }
        toc = time.time()
        print('DONE (t={:0.2f}s).'.format( toc-tic))

    def summarize(self):
        '''
        Compute and display summary metrics for evaluation results.
        Note this functin can *only* be applied on the default parameter setting
        '''
        def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ):
            p = self.params
            iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
            titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
            typeStr = '(AP)' if ap==1 else '(AR)'
            iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
                if iouThr is None else '{:0.2f}'.format(iouThr)

            aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
            mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
            if ap == 1:
                # dimension of precision: [TxRxKxAxM]
                s = self.eval['precision']
                # IoU
                if iouThr is not None:
                    t = np.where(iouThr == p.iouThrs)[0]
                    s = s[t]
                s = s[:,:,:,aind,mind]
            else:
                # dimension of recall: [TxKxAxM]
                s = self.eval['recall']
                if iouThr is not None:
                    t = np.where(iouThr == p.iouThrs)[0]
                    s = s[t]
                s = s[:,:,aind,mind]
            if len(s[s>-1])==0:
                mean_s = -1
            else:
                mean_s = np.mean(s[s>-1])
            print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
            return mean_s
        def _summarizeDets():
            stats = np.zeros((12,))
            stats[0] = _summarize(1)
            stats[1] = _summarize(1, iouThr=.5, maxDets=self.params.maxDets[2])
            stats[2] = _summarize(1, iouThr=.75, maxDets=self.params.maxDets[2])
            stats[3] = _summarize(1, areaRng='small', maxDets=self.params.maxDets[2])
            stats[4] = _summarize(1, areaRng='medium', maxDets=self.params.maxDets[2])
            stats[5] = _summarize(1, areaRng='large', maxDets=self.params.maxDets[2])
            stats[6] = _summarize(0, maxDets=self.params.maxDets[0])
            stats[7] = _summarize(0, maxDets=self.params.maxDets[1])
            stats[8] = _summarize(0, maxDets=self.params.maxDets[2])
            stats[9] = _summarize(0, areaRng='small', maxDets=self.params.maxDets[2])
            stats[10] = _summarize(0, areaRng='medium', maxDets=self.params.maxDets[2])
            stats[11] = _summarize(0, areaRng='large', maxDets=self.params.maxDets[2])
            return stats
        def _summarizeKps():
            stats = np.zeros((10,))
            stats[0] = _summarize(1, maxDets=20)
            stats[1] = _summarize(1, maxDets=20, iouThr=.5)
            stats[2] = _summarize(1, maxDets=20, iouThr=.75)
            stats[3] = _summarize(1, maxDets=20, areaRng='medium')
            stats[4] = _summarize(1, maxDets=20, areaRng='large')
            stats[5] = _summarize(0, maxDets=20)
            stats[6] = _summarize(0, maxDets=20, iouThr=.5)
            stats[7] = _summarize(0, maxDets=20, iouThr=.75)
            stats[8] = _summarize(0, maxDets=20, areaRng='medium')
            stats[9] = _summarize(0, maxDets=20, areaRng='large')
            return stats
        if not self.eval:
            raise Exception('Please run accumulate() first')
        iouType = self.params.iouType
        if iouType == 'segm' or iouType == 'bbox':
            summarize = _summarizeDets
        elif iouType == 'keypoints':
            summarize = _summarizeKps
        self.stats = summarize()

    def __str__(self):
        self.summarize()

class Params:
    '''
    Params for coco evaluation api
    '''
    def setDetParams(self):
        self.imgIds = []
        self.catIds = []
        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
        self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
        self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True)
        self.maxDets = [1, 10, 100]
        self.areaRng = [[0 ** 2, 1e5 ** 2], [0 ** 2, 32 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
        self.areaRngLbl = ['all', 'small', 'medium', 'large']
        self.useCats = 1

    def setKpParams(self):
        self.imgIds = []
        self.catIds = []
        # np.arange causes trouble.  the data point on arange is slightly larger than the true value
        self.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
        self.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, endpoint=True)
        self.maxDets = [20]
        self.areaRng = [[0 ** 2, 1e5 ** 2], [32 ** 2, 96 ** 2], [96 ** 2, 1e5 ** 2]]
        self.areaRngLbl = ['all', 'medium', 'large']
        self.useCats = 1
        self.kpt_oks_sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62,.62, 1.07, 1.07, .87, .87, .89, .89])/10.0

    def __init__(self, iouType='segm'):
        if iouType == 'segm' or iouType == 'bbox':
            self.setDetParams()
        elif iouType == 'keypoints':
            self.setKpParams()
        else:
            raise Exception('iouType not supported')
        self.iouType = iouType
        # useSegm is deprecated
        self.useSegm = None
"""
with open("/usr/local/lib/python3.11/dist-packages/pycocotools/cocoeval.py", "w") as f:
    f.write(new_f.strip())


In [None]:
!rm -rf dino_swinl_ft/
!rm -rf dino_swinl_ultra/
!rm gpu_monitor.py
!rm nohup.out
!rm training.log

rm: cannot remove 'gpu_monitor.py': No such file or directory
rm: cannot remove 'nohup.out': No such file or directory
rm: cannot remove 'training.log': No such file or directory


In [None]:
!python -u /content/DINO/main_2.py \
  --config_file /content/configs/dino_swinl_ultra.py \
  --coco_path /content/coco_dataset \
  --pretrain_model_path /content/checkpoint0029_5scale_swin.pth \
  --finetune_ignore label_enc.weight class_embed \
  --output_dir /content/dino_swinl_ultra \
  --num_workers 10 \
  --amp \
  --seed 42 \
  --options batch_size=2 data.workers_per_gpu=10

In [None]:
# Check if the specific failing files exist
!ls -la /content/coco_dataset/train2017/13641.jpg
!ls -la /content/coco_dataset/train2017/13642.jpg

# Check total file count
!ls /content/coco_dataset/train2017/ | wc -l
!ls /content/coco_dataset/


-rw------- 1 root root 437950 May 24 02:56 /content/coco_dataset/train2017/13641.jpg
-rw------- 1 root root 590885 May 24 02:56 /content/coco_dataset/train2017/13642.jpg
16000
annotations  train2017	val2017
