## Conda setup

In [1]:
!pip install -q condacolab
import condacolab
condacolab.install()
# Then kernel will be restarted automatically

⏬ Downloading https://github.com/jaimergp/miniforge/releases/latest/download/Mambaforge-colab-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:22
🔁 Restarting kernel...


# Clone official ViDT repo (NAVER AI)

In [1]:
!git clone https://github.com/naver-ai/vidt.git

Cloning into 'vidt'...
remote: Enumerating objects: 119, done.[K
remote: Counting objects: 100% (119/119), done.[K
remote: Compressing objects: 100% (105/105), done.[K
remote: Total 119 (delta 48), reused 67 (delta 13), pack-reused 0[K
Receiving objects: 100% (119/119), 1.13 MiB | 18.39 MiB/s, done.
Resolving deltas: 100% (48/48), done.


## Conda env set-up
 - `conda-colab` supports only `base` env to use. Therefore, didn't create `deformable-detr` env as shown in the `README.md`

In [2]:
!conda install python=3.7 pip
!conda install pytorch=1.5.1 torchvision=0.6.1 cudatoolkit=9.2 -c pytorch

Collecting package metadata (current_repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / done
Solving environment: \ | / - \ done

## Package Plan ##

  environment location: /usr/local

  added / updated specs:
    - pip
    - python=3.7


The following packages will be downloaded:

    package                    |            build
    ---------------------------|-----------------
    ca-certificates-2021.10.8  |       ha878542_0         139 KB  conda-forge
    certifi-2021.10.8          |   py37h89c1867_1         145 KB  conda-forge
    conda-4.12.0               |   py37h89c1867_0         1.0 MB  conda-forge
    pip-22.0.4                 |     pyhd8ed1ab_0         1.5 MB  conda-forge
    python_abi-3.7             |          2_cp37m           4 KB  conda-f

## Compile CUDA operators, install requirements

In [3]:
%cd vidt/ops
!sh ./make.sh
!python test.py
%cd ../
!pip install -r requirements.txt

/content/vidt/ops
running build
running build_py
creating build
creating build/lib.linux-x86_64-3.7
creating build/lib.linux-x86_64-3.7/modules
copying modules/ms_deform_attn.py -> build/lib.linux-x86_64-3.7/modules
copying modules/__init__.py -> build/lib.linux-x86_64-3.7/modules
creating build/lib.linux-x86_64-3.7/functions
copying functions/__init__.py -> build/lib.linux-x86_64-3.7/functions
copying functions/ms_deform_attn_func.py -> build/lib.linux-x86_64-3.7/functions
running build_ext
building 'MultiScaleDeformableAttention' extension
creating /content/vidt/ops/build/temp.linux-x86_64-3.7
creating /content/vidt/ops/build/temp.linux-x86_64-3.7/content
creating /content/vidt/ops/build/temp.linux-x86_64-3.7/content/vidt
creating /content/vidt/ops/build/temp.linux-x86_64-3.7/content/vidt/ops
creating /content/vidt/ops/build/temp.linux-x86_64-3.7/content/vidt/ops/src
creating /content/vidt/ops/build/temp.linux-x86_64-3.7/content/vidt/ops/src/cpu
creating /content/vidt/ops/build/temp.

# COCO dataset preparation
 - This example assumes your `COCO dataset` would be in `Google Drive`.
 - But might be applicable to other settings.

In [4]:
%cd /content

/content


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
!cp /content/drive/MyDrive/coco_2017.zip ./
!unzip coco_2017.zip
!rm coco_2017.zip

[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
  inflating: content/coco_2017/val2017/000000162415.jpg  
  inflating: content/coco_2017/val2017/000000411774.jpg  
  inflating: content/coco_2017/val2017/000000055299.jpg  
  inflating: content/coco_2017/val2017/000000484893.jpg  
  inflating: content/coco_2017/val2017/000000378139.jpg  
  inflating: content/coco_2017/val2017/000000269121.jpg  
  inflating: content/coco_2017/val2017/000000102820.jpg  
  inflating: content/coco_2017/val2017/000000304180.jpg  
  inflating: content/coco_2017/val2017/000000560011.jpg  
  inflating: content/coco_2017/val2017/000000278353.jpg  
  inflating: content/coco_2017/val2017/000000491613.jpg  
  inflating: content/coco_2017/val2017/000000463647.jpg  
  inflating: content/coco_2017/val2017/000000031050.jpg  
  inflating: content/coco_2017/val2017/000000418281.jpg  
  inflating: content/coco_2017/val2017/000000309484.jpg  
  inflating: content/coco_2017/val2017/000000317999.jpg  
  inflating: content/c

In [2]:
%cd vidt

/content/vidt


# Train

In [None]:
%mkdir /content/vidt_nano_outputs
!python -m torch.distributed.launch \
    --nproc_per_node=1 \
    --nnodes=1 \
    --use_env main.py \
    --method vidt \
    --backbone_name swin_nano \
    --epochs 50 \
    --lr 1e-4 \
    --min-lr 1e-7 \
    --batch_size 16 \
    --num_workers 2 \
    --aux_loss True \
    --with_box_refine True \
    --coco_path /content/content/coco_2017 \
    --output_dir /content/vidt_nano_outputs \
    --n_iter_to_acc 8

log /content/vidt_nano_outputs
Gradient Accumulation is applied.
The batch:  16 -> 2 but updated every  8 steps.
| distributed init (rank 0 / world 1): env://
git:
  sha: ce0df48a94758e41bce5945e1fb5dc84a1d13752, status: clean, branch: main

Namespace(aux_loss=True, backbone_name='swin_nano', batch_size=2, bbox_loss_coef=5, clip_max_norm=0.1, cls_loss_coef=2, coco_panoptic_path=None, coco_path='/content/content/coco_2017', cross_indices=[3], cross_scale_fusion=False, dataset_file='coco', dec_layers=6, dec_n_points=4, decay_rate=0.1, det_token_num=100, device='cuda', dice_loss_coef=1, dim_feedforward=1024, dist_backend='nccl', dist_url='env://', distil_loss_coef=4.0, distil_model=None, distil_model_path=None, distributed=True, dropout=0.1, eos_coef=0.1, epochs=50, eval=False, eval_size=800, focal_alpha=0.25, giou_loss_coef=2, gpu=0, iou_aware=False, iouaware_loss_coef=2, lr=0.0001, lr_backbone=1e-05, lr_drop=40, lr_linear_proj_mult=0.1, lr_linear_proj_names=['reference_points', 'samplin

# Evaluate

In [None]:
!python -m torch.distributed.launch \
    --nproc_per_node=1 \ 
    --nnodes=1 \
    --use_env main.py \
    --method vidt \
    --backbone_name swin_nano \
    --batch_size 16 \
    --num_workers 2 \
    --aux_loss True \
    --with_box_refine True \
    --coco_path /content/content/coco_2017 \
    --resume /content/vidt_nano_outputs/checkpoint.pth \
    --pre_trained none \
    --eval True