In [None]:
!git clone https://github.com/wujx2001/QwT.git

In [None]:
%cd /content/QwT/detection

In [None]:
import torch

if torch.cuda.is_available():
    num_gpus = torch.cuda.device_count()
    print(f"✓ Number of CUDA GPUs: {num_gpus}")
    for i in range(num_gpus):
        print(f"  GPU {i}: {torch.cuda.get_device_name(i)}")
    print(f"  CUDA Version: {torch.version.cuda}")
    print(f"  PyTorch Version: {torch.__version__}")
else:
    print("✗ CUDA is not available on this system.")

In [None]:
# Install openmim and mmcv-full
!pip install -U openmim
!mim install mmcv-full

In [None]:
# Install mmdetection and additional dependencies
!pip install -v -e .
!pip install tqdm scipy

In [None]:
import os
os.makedirs("../pretrained_weights", exist_ok=True)

In [None]:
# Download Cascade Mask R-CNN Swin-Tiny checkpoint
!wget -P ../pretrained_weights https://github.com/SwinTransformer/storage/releases/download/v1.0.2/cascade_mask_rcnn_swin_tiny_patch4_window7.pth

In [None]:
# Optional: Download other checkpoints
# Mask R-CNN Swin-Tiny
# !wget -P ../pretrained_weights https://github.com/SwinTransformer/storage/releases/download/v1.0.2/mask_rcnn_swin_tiny_patch4_window7.pth

# Cascade Mask R-CNN Swin-Small
# !wget -P ../pretrained_weights https://github.com/SwinTransformer/storage/releases/download/v1.0.2/cascade_mask_rcnn_swin_small_patch4_window7.pth

In [None]:
# Create data directory
!mkdir -p /content/coco

# Download COCO 2017 annotations
!wget -P /content/coco http://images.cocodataset.org/annotations/annotations_trainval2017.zip
!unzip -q /content/coco/annotations_trainval2017.zip -d /content/coco

In [None]:
# Download COCO 2017 validation images
!wget -P /content/coco http://images.cocodataset.org/zips/val2017.zip
!unzip -q /content/coco/val2017.zip -d /content/coco

In [None]:
# Optional: Download training images (large file ~19GB)
# !wget -P /content/coco http://images.cocodataset.org/zips/train2017.zip
# !unzip -q /content/coco/train2017.zip -d /content/coco

In [None]:
# Update the data_root in the config file
import os

config_file = "configs/_base_/datasets/coco_instance.py"

# Read the config
with open(config_file, 'r') as f:
    lines = f.readlines()

# Update data_root path
new_lines = []
for line in lines:
    if line.strip().startswith("data_root"):
        new_lines.append("data_root = '/content/coco/'\n")
        print(f"✓ Updated data_root to: /content/coco/")
    else:
        new_lines.append(line)

# Write back
with open(config_file, 'w') as f:
    f.writelines(new_lines)

print("✓ Config updated successfully")

In [None]:
#optional
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
print("✓ CUDA memory configuration set")

## Step 8: Run Quantization Evaluation

Run QwT quantization on the detection model.

**Expected Runtime:** ~35-40 minutes on single GPU
- Loading & calibration: ~5 min
- Baseline evaluation: ~15 min
- QwT compensation: ~5 min
- Final evaluation: ~15 min

In [None]:
# W4/A4 Quantization - Cascade Mask R-CNN with Swin-Tiny
!CUDA_VISIBLE_DEVICES=0 python tools/test.py \
    configs/swin/cascade_mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py \
    ../pretrained_weights/cascade_mask_rcnn_swin_tiny_patch4_window7.pth \
    --eval bbox segm \
    --w_bit 4 \
    --a_bits 4

### Expected Results for W4/A4:

**RepQ-ViT Baseline:**
- bbox AP: ~47.0
- segm AP: ~41.4

**RepQ-ViT + QwT:**
- bbox AP: ~47.6 (+0.6% improvement)
- segm AP: ~41.8 (+0.4% improvement)

In [None]:
# Optional: W6/A6 Quantization (better accuracy)
# !CUDA_VISIBLE_DEVICES=0 python tools/test.py \
#     configs/swin/cascade_mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py \
#     ../pretrained_weights/cascade_mask_rcnn_swin_tiny_patch4_window7.pth \
#     --eval bbox segm \
#     --w_bit 6 \
#     --a_bits 6

In [None]:
# Optional: W8/A8 Quantization (near full precision)
# !CUDA_VISIBLE_DEVICES=0 python tools/test.py \
#     configs/swin/cascade_mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py \
#     ../pretrained_weights/cascade_mask_rcnn_swin_tiny_patch4_window7.pth \
#     --eval bbox segm \
#     --w_bit 8 \
#     --a_bits 8

## Step 9: Archive Results (Optional)

Compress and save the work directory with logs and checkpoints.

In [None]:
# Archive work directory
!zip -r /content/QwT_detection_results.zip work_dirs/
print("✓ Results archived to /content/QwT_detection_results.zip")

## Additional Experiments

You can try different models and precision levels:

### Different Models:
- Mask R-CNN + Swin-T (config: `mask_rcnn_swin_tiny_patch4_window7_mstrain_480-800_adamw_3x_coco.py`)
- Cascade Mask R-CNN + Swin-S (config: `cascade_mask_rcnn_swin_small_patch4_window7_mstrain_480-800_giou_4conv1f_adamw_3x_coco.py`)

### Different Bit Precisions:
- W4/A4: Aggressive quantization
- W6/A6: Balanced accuracy/efficiency
- W8/A8: Near full precision

### Multi-GPU Training:
Use `tools/dist_test.sh` script for multi-GPU evaluation.