# ImageNet A/B/E Benchmark (Colab)

This notebook prepares ImageNet-1k (official route), then runs A/B/E param-matched benchmarks using paper-style training flags.

Prerequisites:
- You have access to ILSVRC2012 (ImageNet-1k) and placed these files in Google Drive:
  - `ILSVRC2012_img_train.tar`
  - `ILSVRC2012_img_val.tar`
  - `ILSVRC2012_devkit_t12.tar`



In [None]:
# Clone and install
!pip -q install -U pip
!if [ ! -d MoP ]; then git clone https://github.com/Eran-BA/MoP.git ; fi
%cd MoP/
!pip -q install -r requirements.txt


In [None]:
# Mount Drive and configure paths
from google.colab import drive
import os

drive.mount('/content/drive', force_remount=True)
IMNET_SRC = '/content/drive/MyDrive/imagenet'  # folder holding the three tar files
IMNET_ROOT = '/content/imagenet'               # extraction target
print('Source:', IMNET_SRC, '\nTarget:', IMNET_ROOT)
os.makedirs(IMNET_ROOT, exist_ok=True)


In [None]:
# Kaggle download (optional)
# Installs Kaggle, configures credentials via kaggle.json upload, and downloads CLS-LOC files
!pip -q install kaggle
from google.colab import files
import os

print('Upload kaggle.json from your Kaggle Account page (https://www.kaggle.com/<you>/account)')
uploaded = files.upload()  # choose kaggle.json
if 'kaggle.json' in uploaded:
    os.makedirs('/root/.kaggle', exist_ok=True)
    with open('/root/.kaggle/kaggle.json','wb') as f:
        f.write(uploaded['kaggle.json'])
    os.chmod('/root/.kaggle/kaggle.json', 0o600)
    print('kaggle.json installed at /root/.kaggle/kaggle.json')
else:
    print('kaggle.json not uploaded; if you already configured Kaggle, ignore this message.')

# Download competition data to IMNET_SRC (defined above)
os.makedirs(IMNET_SRC, exist_ok=True)
!kaggle competitions download -c imagenet-object-localization-challenge -p "$IMNET_SRC"
!unzip -q "$IMNET_SRC"/*.zip -d "$IMNET_SRC"
print('Kaggle download complete at:', IMNET_SRC)

# Alternative (env vars) — uncomment and set if you prefer environment variables
# import os
# os.environ['KAGGLE_USERNAME'] = 'YOUR_KAGGLE_USERNAME'
# os.environ['KAGGLE_KEY'] = 'YOUR_KAGGLE_API_KEY'
# !kaggle competitions download -c imagenet-object-localization-challenge -p "$IMNET_SRC"
# !unzip -q "$IMNET_SRC"/*.zip -d "$IMNET_SRC"


In [None]:
# Verify ImageNet files (supports tarballs and Kaggle layout)
print("Checking for ImageNet files...")

# Option A: Original tarball format
tarball_files = ['ILSVRC2012_img_train.tar','ILSVRC2012_img_val.tar','ILSVRC2012_devkit_t12.tar']
tarball_missing = [f for f in tarball_files if not os.path.exists(os.path.join(IMNET_SRC,f))]

# Option B: Kaggle layout
kaggle_train_dir = os.path.join(IMNET_SRC, "ILSVRC", "Data", "CLS-LOC", "train")
kaggle_val_dir = os.path.join(IMNET_SRC, "ILSVRC", "Data", "CLS-LOC", "val")
USE_KAGGLE_LAYOUT = os.path.isdir(kaggle_train_dir) and os.path.isdir(kaggle_val_dir)

if not tarball_missing:
    print("✅ Found tarball format (original ImageNet files)")
    USE_KAGGLE_LAYOUT = False
elif USE_KAGGLE_LAYOUT:
    print("✅ Found Kaggle layout")
else:
    raise FileNotFoundError(
        f"Missing tarballs: {tarball_missing} and Kaggle layout not found under "
        f"{os.path.join(IMNET_SRC,'ILSVRC','Data','CLS-LOC')}"
    )


### Kaggle CLI reference
If you prefer running the raw command, use:

```bash
kaggle competitions download -c imagenet-object-localization-challenge -p "$IMNET_SRC"
unzip -q "$IMNET_SRC"/*.zip -d "$IMNET_SRC"
```

Note: ensure your `kaggle.json` is configured (`~/.kaggle/kaggle.json`) or upload it in the previous cell.


In [None]:
# Prepare data: use Kaggle layout if available, else extract tarballs
import os, subprocess

kaggle_train_dir = os.path.join(IMNET_SRC, "ILSVRC", "Data", "CLS-LOC", "train")
kaggle_val_dir = os.path.join(IMNET_SRC, "ILSVRC", "Data", "CLS-LOC", "val")

os.makedirs(os.path.join(IMNET_ROOT, 'train'), exist_ok=True)
os.makedirs(os.path.join(IMNET_ROOT, 'val'), exist_ok=True)
os.makedirs(os.path.join(IMNET_ROOT, 'devkit'), exist_ok=True)

if 'USE_KAGGLE_LAYOUT' in globals() and USE_KAGGLE_LAYOUT:
    print("Using Kaggle layout → copying to target...")
    if not os.listdir(os.path.join(IMNET_ROOT,'train')):
        subprocess.run(['bash','-lc', f'cp -r "{kaggle_train_dir}"/* "{IMNET_ROOT}/train/"'], check=True)
    if not os.listdir(os.path.join(IMNET_ROOT,'val')):
        subprocess.run(['bash','-lc', f'cp -r "{kaggle_val_dir}"/* "{IMNET_ROOT}/val/"'], check=True)
else:
    print("Using tarballs → extracting...")
    train_tar = os.path.join(IMNET_SRC,'ILSVRC2012_img_train.tar')
    val_tar = os.path.join(IMNET_SRC,'ILSVRC2012_img_val.tar')
    devkit_tar = None
    for cand in ['ILSVRC2012_devkit_t12.tar','ILSVRC2012_devkit_t12.tar.gz']:
        p = os.path.join(IMNET_SRC, cand)
        if os.path.exists(p):
            devkit_tar = p; break
    if not os.listdir(os.path.join(IMNET_ROOT,'train')):
        subprocess.run(['bash','-lc', f'tar -xf "{train_tar}" -C "{IMNET_ROOT}/train"'], check=True)
        subprocess.run(['bash','-lc', 'cd "{}/train" && find . -name "*.tar" -print0 | xargs -0 -I{{}} bash -lc '\''d=$(basename "{}" .tar); mkdir -p "$d"; tar -xf "{}" -C "$d"; rm "{}"'\'''.format(IMNET_ROOT)], check=True)
    if not os.listdir(os.path.join(IMNET_ROOT,'val')):
        subprocess.run(['bash','-lc', f'tar -xf "{val_tar}" -C "{IMNET_ROOT}/val"'], check=True)
    if devkit_tar and not os.listdir(os.path.join(IMNET_ROOT,'devkit')):
        subprocess.run(['bash','-lc', f'tar -xf "{devkit_tar}" -C "{IMNET_ROOT}/devkit"'], check=True)
print("Done preparing data.")


In [None]:
# Organize validation set into class folders
import os, shutil
import scipy.io as sio

devkit = '/content/imagenet/devkit/ILSVRC2012_devkit_t12'
meta = sio.loadmat(os.path.join(devkit,'data','meta.mat'))['synsets']
# Build mapping: ILSVRC2012_ID -> WNID
id2wnid = {}
for entry in meta:
    ILSVRC2012_ID = int(entry['ILSVRC2012_ID'][0][0]) if entry['ILSVRC2012_ID'].size else 0
    if ILSVRC2012_ID>0:
        wnid = str(entry['WNID'][0])
        id2wnid[ILSVRC2012_ID] = wnid

with open(os.path.join(devkit,'data','ILSVRC2012_validation_ground_truth.txt'),'r') as f:
    gt = [int(x.strip()) for x in f if x.strip()]

val_dir = '/content/imagenet/val'
imgs = sorted([x for x in os.listdir(val_dir) if x.lower().endswith(('.jpeg','.jpg'))])
assert len(imgs)==len(gt), f'mismatch: {len(imgs)} images vs {len(gt)} labels'

for img, cls_id in zip(imgs, gt):
    wnid = id2wnid[cls_id]
    dst = os.path.join(val_dir, wnid)
    os.makedirs(dst, exist_ok=True)
    shutil.move(os.path.join(val_dir,img), os.path.join(dst,img))


In [None]:
# Run A/B/E for ViT-B/16 (~86M). Reduce batch if OOM.
IMAGENET_ROOT = '/content/imagenet'
!python experiments/imagenet_ab_param_budgets.py \
  --data_root $IMAGENET_ROOT \
  --targets 86000000 \
  --models A B E \
  --img_size 224 --patch 16 \
  --steps 90000 --eval_every 1000 --batch 128 \
  --lr_large 0.001 --warmup_frac 0.1 --weight_decay 0.1 \
  --use_randaug --randaug_n 2 --randaug_m 9 --random_erasing 0.25 \
  --mixup_alpha 0.8 --cutmix_alpha 1.0 --mix_prob 0.5 \
  --drop_path 0.4 --grad_clip 1.0 --ema --ema_decay 0.9999 \
  --ew_views 5 --ew_use_k3 --ew_share_qkv --ew_mlp_ratio 4.0


In [None]:
# Optional: ViT-L/16 and ViT-H/14
!python experiments/imagenet_ab_param_budgets.py --data_root $IMAGENET_ROOT \
  --targets 307000000 --models A B E --img_size 224 --patch 16 --batch 128 \
  --lr_large 0.001 --warmup_frac 0.1 --weight_decay 0.1 --use_randaug --randaug_n 2 --randaug_m 9 \
  --random_erasing 0.25 --mixup_alpha 0.8 --cutmix_alpha 1.0 --mix_prob 0.5 \
  --drop_path 0.4 --grad_clip 1.0 --ema --ema_decay 0.9999 \
  --ew_views 5 --ew_use_k3 --ew_share_qkv --ew_mlp_ratio 4.0

!python experiments/imagenet_ab_param_budgets.py --data_root $IMAGENET_ROOT \
  --targets 632000000 --models A B E --img_size 224 --patch 14 --batch 128 \
  --lr_large 0.001 --warmup_frac 0.1 --weight_decay 0.1 --use_randaug --randaug_n 2 --randaug_m 9 \
  --random_erasing 0.25 --mixup_alpha 0.8 --cutmix_alpha 1.0 --mix_prob 0.5 \
  --drop_path 0.4 --grad_clip 1.0 --ema --ema_decay 0.9999 \
  --ew_views 5 --ew_use_k3 --ew_share_qkv --ew_mlp_ratio 4.0


In [None]:
# Generate paper tables
!python experiments/ab5_paper_benchmark.py
!ls -la results/paper_benchmark
