<a href="https://colab.research.google.com/github/AlexRaudvee/MultiArchPDD-CV/blob/main/main_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Setup of environment

In [1]:
import os
import shutil
import zipfile
from pathlib import Path
from google.colab import drive

def mount_google_drive(mount_point: Path = Path('/content/drive')) -> Path:
    """Mounts Google Drive and returns the mount point."""
    drive.mount(str(mount_point))
    return mount_point

def extract_zip(zip_path: Path, extract_to: Path) -> None:
    """Extracts a zip file to the given directory."""
    if not zip_path.is_file():
        raise FileNotFoundError(f"Could not find zip file at {zip_path}")
    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall(str(extract_to))

def move_contents(src_dir: Path, dst_dir: Path) -> None:
    """
    Moves everything from src_dir into dst_dir.
    Overwrites any existing files or folders of the same name.
    Cleans up the now-empty src_dir at the end.
    """
    if not src_dir.is_dir():
        raise FileNotFoundError(f"{src_dir} does not exist")
    for item in src_dir.iterdir():
        target = dst_dir / item.name
        if target.exists():
            print(f"Warning: {target} already exists, overwriting")
            if target.is_dir():
                shutil.rmtree(target)
            else:
                target.unlink()
        shutil.move(str(item), str(target))
    src_dir.rmdir()

def setup_directories(*dirs: Path) -> None:
    """Ensures that each directory in `dirs` exists."""
    for d in dirs:
        d.mkdir(parents=True, exist_ok=True)

def zip_folder(folder_path: Path, output_path: Path) -> None:
    """
    Recursively zip the contents of folder_path into a .zip file at output_path.
    """
    with zipfile.ZipFile(output_path, 'w', compression=zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(folder_path):
            for fname in files:
                fpath = Path(root) / fname
                arcname = fpath.relative_to(folder_path)
                zipf.write(str(fpath), arcname)

In [2]:
# ——— Constants ———
DRIVE_MOUNT_POINT = Path('/content/drive')
ZIP_PATH            = DRIVE_MOUNT_POINT / 'MyDrive/.colab.zip'
EXTRACT_TO          = Path('/content')
SRC_DIR             = EXTRACT_TO / '.colab'
DST_DIR             = EXTRACT_TO
DISTILLED_DIR       = EXTRACT_TO / 'data' / 'Distilled'
MODEL_DIR           = EXTRACT_TO / 'data' / 'checkpoints'
ASSETS_DIR          = EXTRACT_TO / 'assets' / 'viz_synthetic'

# ——— SetUp ———
mount_google_drive(DRIVE_MOUNT_POINT)
extract_zip(ZIP_PATH, EXTRACT_TO)
move_contents(SRC_DIR, DST_DIR)
setup_directories(DISTILLED_DIR)
setup_directories(ASSETS_DIR)
setup_directories(MODEL_DIR)

Mounted at /content/drive


In [3]:
!pip install matplotlib



### Launch of Dataset Distillation

### Meta-model Matching

In [None]:
!python main.py meta-model-matching \
    --dataset mnist \
    --model convnet \
    --batch-size 32 \
    --ipc 1 \
    --P 5 \
    --K 50 \
    --T 1 \
    --lr-model 1e-3 \
    --lr-syn-data 1e-2 \
    --syn-optimizer momentum \
    --inner-optimizer momentum \
    --debug True \
    --out-dir data/Distilled \
    --ckpt-dir data/checkpoints

[Dataloader]:
     - Loading...
     - Done.
[Distillator]:
Stage 1/5:   0%|                                         | 0/50 [00:00<?, ?it/s]T Loss=2.3053462505340576
g_norm = tensor(0.1037, grad_fn=<LinalgVectorNormBackward0>)
alpha_t= 0.0010000233305618167
K Loss      =2303.702880859375
||∇_X meta|| = 4.426256055012345e-05
ΔX norm: 4.4262552023610624e-07
Saved synthetic image grid to assets/debug/synthetic.png
Stage 1/5:   2%|▋                                | 1/50 [00:00<00:24,  2.02it/s]T Loss=2.3036932945251465
g_norm = tensor(0.0792, grad_fn=<LinalgVectorNormBackward0>)
alpha_t= 0.0010000233305618167
K Loss      =2304.285400390625
||∇_X meta|| = 3.970310717704706e-05
ΔX norm: 3.9703087395537295e-07
Stage 1/5:   4%|█▎                               | 2/50 [00:00<00:21,  2.29it/s]T Loss=2.3041889667510986
g_norm = tensor(0.0935, grad_fn=<LinalgVectorNormBackward0>)
alpha_t= 0.0010000233305618167
K Loss      =2305.0849609375
||∇_X meta|| = 5.7964574807556346e-05
ΔX norm: 5.79645586640

In [None]:
!python main.py meta-model-matching \
    --dataset cifar10 \
    --model convnet \
    --batch-size 32 \
    --ipc 10 \
    --P 10 \
    --K 300 \
    --T 5 \
    --lr-model 1e-3 \
    --lr-syn-data 1e-2 \
    --regularisation 1 \
    --syn-optimizer momentum \
    --inner-optimizer momentum \
    --debug True \
    --out-dir data/Distilled \
    --ckpt-dir data/checkpoints

[Dataloader]:
     - Loading...
     - Done.
[Distillator]:
Stage 1/10:   0%|                                       | 0/300 [00:00<?, ?it/s]T Loss=2.305534601211548
g_norm = tensor(0.1072, grad_fn=<LinalgVectorNormBackward0>)
alpha_t= 0.0010000233305618167
T Loss=2.3032116889953613
g_norm = tensor(0.0963, grad_fn=<LinalgVectorNormBackward0>)
alpha_t= 0.0010000233305618167
T Loss=2.3034181594848633
g_norm = tensor(0.0868, grad_fn=<LinalgVectorNormBackward0>)
alpha_t= 0.0010000233305618167
T Loss=2.3034520149230957
g_norm = tensor(0.0954, grad_fn=<LinalgVectorNormBackward0>)
alpha_t= 0.0010000233305618167
T Loss=2.303032636642456
g_norm = tensor(0.0990, grad_fn=<LinalgVectorNormBackward0>)
alpha_t= 0.0010000233305618167
K Loss      =229.1239471435547
||∇_X meta|| = 0.003849260974675417
ΔX norm: 3.849255881505087e-05
Saved synthetic image grid to assets/debug/synthetic.png
Stage 1/10:   0%|                               | 1/300 [00:02<11:19,  2.27s/it]T Loss=2.304659366607666
g_norm = ten

### Gradient Aggregation

In [None]:
!python main.py gradient-aggregation \
    --dataset mnist \
    --model convnet resnet10 \
    --batch-size 32 \
    --ipc 1 \
    --P 5 \
    --K 50 \
    --T 1 \
    --lr-model 1e-3 \
    --lr-syn-data 1e-2 \
    --regularisation 1e-2 \
    --syn-optimizer momentum \
    --inner-optimizer momentum \
    --debug True \
    --out-dir data/Distilled \
    --ckpt-dir data/checkpoints

[Dataloader]:
     - Loading...
     - Done.
[Distillator]:
Stage 1/5:   0% 0/50 [00:00<?, ?it/s]     - Model 1: T Loss      =[tensor(2.3029, device='cuda:0', grad_fn=<DivBackward0>), tensor(2.3672, device='cuda:0', grad_fn=<DivBackward0>)]
     - Model 1: g_norm      = 0.09846246987581253
     - Model 1: alpha_t     = 0.0010000233305618167
     - Model 2: T Loss      =[tensor(2.3029, device='cuda:0', grad_fn=<DivBackward0>), tensor(2.3672, device='cuda:0', grad_fn=<DivBackward0>)]
     - Model 2: g_norm      = 2.4604780673980713
     - Model 2: alpha_t     = 0.0010000233305618167
K Losses    =[tensor(2.3057, device='cuda:0', grad_fn=<AddBackward0>), tensor(2.3676, device='cuda:0', grad_fn=<AddBackward0>)]
||∇_X meta|| = 7.433712016791105e-05
ΔX norm:       7.433711743942695e-07
Saved synthetic image grid to assets/debug/synthetic.png
Stage 1/5:   2% 1/50 [00:01<01:03,  1.30s/it]     - Model 1: T Loss      =[tensor(2.3031, device='cuda:0', grad_fn=<DivBackward0>), tensor(2.3260, device

In [None]:
!python main.py gradient-aggregation \
    --dataset cifar10 \
    --model convnet resnet10\
    --batch-size 32 \
    --ipc 10 \
    --P 10 \
    --K 200 \
    --T 5 \
    --lr-model 1e-3 \
    --lr-syn-data 1e-2 \
    --regularisation 1 \
    --syn-optimizer momentum \
    --inner-optimizer momentum \
    --debug True \
    --out-dir data/Distilled \
    --ckpt-dir data/checkpoints

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
     - Model 1: g_norm      = 0.04541247710585594
     - Model 1: alpha_t     = 0.0010000233305618167
     - Model 2: T Loss      =[tensor(2.3031, device='cuda:0', grad_fn=<DivBackward0>), tensor(2.3553, device='cuda:0', grad_fn=<DivBackward0>)]
     - Model 2: g_norm      = 1.2733683586120605
     - Model 2: alpha_t     = 0.0010000233305618167
K Losses    =[tensor(2.4453, device='cuda:0', grad_fn=<AddBackward0>), tensor(2.4577, device='cuda:0', grad_fn=<AddBackward0>)]
||∇_X meta|| = 0.002634822390973568
ΔX norm:       2.6348223400418647e-05
Stage 10/10:  25% 50/200 [01:34<04:41,  1.88s/it]     - Model 1: T Loss      =[tensor(2.3036, device='cuda:0', grad_fn=<DivBackward0>), tensor(2.3551, device='cuda:0', grad_fn=<DivBackward0>)]
     - Model 1: g_norm      = 0.06099986657500267
     - Model 1: alpha_t     = 0.0010000233305618167
     - Model 2: T Loss      =[tensor(2.3036, device='cuda:0', grad_fn=<Div

In [None]:
zip_folder("/content/assets", "/content/assets_GA_CIFAR10.zip")

### Composite Loss

In [None]:
!python main.py composite-loss \
    --dataset mnist \
    --model convnet resnet10\
    --batch-size 32 \
    --ipc 1 \
    --P 5 \
    --K 50 \
    --T 1 \
    --lr-model 1e-3 \
    --lr-syn-data 1e-2 \
    --regularisation 1e-2 \
    --syn-optimizer momentum \
    --inner-optimizer momentum \
    --debug True \
    --out-dir data/Distilled \
    --ckpt-dir data/checkpoints

[Dataloader]:
     - Loading...
100% 9.91M/9.91M [00:00<00:00, 16.6MB/s]
100% 28.9k/28.9k [00:00<00:00, 482kB/s]
100% 1.65M/1.65M [00:00<00:00, 4.55MB/s]
100% 4.54k/4.54k [00:00<00:00, 9.61MB/s]
     - Done.
[Distillator]:
Stage 1/5:   0% 0/50 [00:00<?, ?it/s]     - Model 1: T Loss =        [tensor(2.3047, device='cuda:0', grad_fn=<DivBackward0>), tensor(2.3281, device='cuda:0', grad_fn=<DivBackward0>)]
     - Model 1: g_norm =         0.10291890799999237
     - Model 1: alpha_t =        0.0010000233305618167
     - Model 2: T Loss =        [tensor(2.3047, device='cuda:0', grad_fn=<DivBackward0>), tensor(2.3281, device='cuda:0', grad_fn=<DivBackward0>)]
     - Model 2: g_norm =         2.261667013168335
     - Model 2: alpha_t =        0.0010000233305618167
     - K Loss       =2.2971901893615723
     - ||∇_X meta||  = 7.194097270257771e-05
     - ΔX norm       = 7.194097406681976e-07
Saved synthetic image grid to assets/debug/synthetic.png
Stage 1/5:   2% 1/50 [00:01<00:56,  1.15s/it]

In [None]:
zip_folder("/content/assets", "/content/assets_CL_MNIST.zip")

In [None]:
!python main.py composite-loss \
    --dataset cifar10 \
    --model convnet resnet10\
    --batch-size 32 \
    --ipc 10 \
    --P 10 \
    --K 200 \
    --T 5 \
    --lr-model 1e-3 \
    --lr-syn-data 1e-2 \
    --regularisation 1 \
    --syn-optimizer momentum \
    --inner-optimizer momentum \
    --debug True \
    --out-dir data/Distilled \
    --ckpt-dir data/checkpoints

python3: can't open file '/content/main.py': [Errno 2] No such file or directory


In [None]:
zip_folder("/content/assets", "/content/drive/MyDrive/assets_CL_CIFAR10.zip")

### Multi-branch with consistency allignment

In [None]:
!python main.py multi-branch \
    --dataset mnist \
    --model convnet resnet10\
    --batch-size 32 \
    --ipc 1 \
    --P 5 \
    --K 50 \
    --T 1 \
    --lr-model 1e-3 \
    --lr-syn-data 1e-2 \
    --regularisation 1e-2 \
    --syn-optimizer adam \
    --inner-optimizer momentum \
    --debug True \
    --out-dir data/Distilled \
    --ckpt-dir data/checkpoints

[Dataloader]:
     - Loading...
     - Done.
[Distillator]:
Stage 1/5:   0% 0/50 [00:00<?, ?it/s]     - Model 1: T Loss =2.3045589923858643
     - Model 1: g_norm = tensor(0.0854, device='cuda:0', grad_fn=<LinalgVectorNormBackward0>)
     - Model 1: alpha_t = 0.0010000233305618167
     - Model 2: T Loss =2.331651449203491
     - Model 2: g_norm = tensor(2.3108, device='cuda:0', grad_fn=<LinalgVectorNormBackward0>)
     - Model 2: alpha_t = 0.0010000233305618167
     - K Loss      =228.7445068359375
     - Model 1: ||∇_X meta|| = 0.635129988193512
     - Model 1: ΔX norm  = 0.006351300049573183
     - Model 2: ||∇_X meta|| = 0.6356841325759888
     - Model 2: ΔX norm  = 0.006356840953230858
     - Saved synthetic image grid to assets/debug/synthetic.png
Stage 1/5:   2% 1/50 [00:01<00:55,  1.14s/it]     - Model 1: T Loss =2.302825689315796
     - Model 1: g_norm = tensor(0.0833, device='cuda:0', grad_fn=<LinalgVectorNormBackward0>)
     - Model 1: alpha_t = 0.0010000233305618167
     - M

In [None]:
zip_folder("/content/assets", "/content/drive/MyDrive/assets_MB_MNIST.zip")

In [None]:
zip_folder("/content/data/Distilled", "/content/drive/MyDrive/distilled_MB_MNIST.zip")

In [None]:
!python main.py multi-branch \
    --dataset cifar10 \
    --model convnet resnet10\
    --batch-size 32 \
    --ipc 10 \
    --P 10 \
    --K 200 \
    --T 5 \
    --lr-model 1e-3 \
    --lr-syn-data 1e-2 \
    --regularisation 1 \
    --regularisation 1 \
    --syn-optimizer momentum \
    --inner-optimizer momentum \
    --debug True \
    --out-dir data/Distilled \
    --ckpt-dir data/checkpoints

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m
     - Model 1: alpha_t = 0.0010000233305618167
     - Model 2: T Loss =2.3410587310791016
     - Model 2: g_norm = tensor(1.2769, device='cuda:0', grad_fn=<LinalgVectorNormBackward0>)
     - Model 2: alpha_t = 0.0010000233305618167
     - Model 2: T Loss =2.575721263885498
     - Model 2: g_norm = tensor(1.2694, device='cuda:0', grad_fn=<LinalgVectorNormBackward0>)
     - Model 2: alpha_t = 0.0010000233305618167
     - Model 2: T Loss =2.6099514961242676
     - Model 2: g_norm = tensor(1.2618, device='cuda:0', grad_fn=<LinalgVectorNormBackward0>)
     - Model 2: alpha_t = 0.0010000233305618167
     - Model 2: T Loss =2.603341817855835
     - Model 2: g_norm = tensor(1.2813, device='cuda:0', grad_fn=<LinalgVectorNormBackward0>)
     - Model 2: alpha_t = 0.0010000233305618167
     - Model 2: T Loss =2.6018316745758057
     - Model 2: g_norm = tensor(1.2259, device='cuda:0', grad_fn=<LinalgVectorNormBackwar

In [None]:
zip_folder("/content/assets", "/content/drive/MyDrive/assets_MB_CIFAR10.zip")

In [None]:
zip_folder("/content/data/Distilled", "/content/drive/MyDrive/distilled_MB_CIFAR10.zip")

### Benchmarking of Distilled Dataset (dev - accuracy performance)

In [None]:
!python main.py benchmark \
    --distilled-path data/Distilled/meta-model-matching_mnist_convnet.pt \
    --benchmark-mode synthetic \
    --model convnet \
    --syn-batch-size 64 \
    --test-batch-size 64 \
    --lr 1e-3  \
    --epochs-per-stage 5 \
    --till-stage 5 \
    --real-size 1000

[Benchmarker]:
     - Using device: cpu
     - Loading distilled data from data/Distilled/meta-model-matching_mnist_convnet.pt
     - Total synthetic examples = 50; real subset size = 1000

     - [Syn] Stage 1/5: 10 examples
       - Epoch 1/5 → loss 2.3028
       - Epoch 2/5 → loss 2.2992
       - Epoch 3/5 → loss 2.2956
       - Epoch 4/5 → loss 2.2907
       - Epoch 5/5 → loss 2.2848

     - [Syn] Stage 2/5: 10 examples
       - Epoch 1/5 → loss 2.2840
       - Epoch 2/5 → loss 2.2735
       - Epoch 3/5 → loss 2.2591
       - Epoch 4/5 → loss 2.2403
       - Epoch 5/5 → loss 2.2182

     - [Syn] Stage 3/5: 10 examples
       - Epoch 1/5 → loss 2.2271
       - Epoch 2/5 → loss 2.2002
       - Epoch 3/5 → loss 2.1649
       - Epoch 4/5 → loss 2.1189
       - Epoch 5/5 → loss 2.0616

     - [Syn] Stage 4/5: 10 examples
       - Epoch 1/5 → loss 1.9607
       - Epoch 2/5 → loss 1.8696
       - Epoch 3/5 → loss 1.7591
       - Epoch 4/5 → loss 1.6334
       - Epoch 5/5 → loss 1.4954

  

In [None]:
!python main.py benchmark \
    --distilled-path data/Distilled/composite-loss_mnist_convnet_resnet10.pt \
    --benchmark-mode synthetic \
    --model vgg11 \
    --syn-batch-size 64 \
    --test-batch-size 64 \
    --lr 1e-3  \
    --epochs-per-stage 7 \
    --till-stage 5 \
    --real-size 1000

[Benchmarker]:
     - Using device: cpu
     - Loading distilled data from data/Distilled/composite-loss_mnist_convnet_resnet10.pt
     - Total synthetic examples = 50; real subset size = 1000

     - [Syn] Stage 1/5: 10 examples
       - Epoch 1/7 → loss 2.3760
       - Epoch 2/7 → loss 2.0907
       - Epoch 3/7 → loss 1.9722
       - Epoch 4/7 → loss 2.0916
       - Epoch 5/7 → loss 1.8893
       - Epoch 6/7 → loss 1.6800
       - Epoch 7/7 → loss 1.5475

     - [Syn] Stage 2/5: 10 examples
       - Epoch 1/7 → loss 1.7490
       - Epoch 2/7 → loss 1.5962
       - Epoch 3/7 → loss 1.5965
       - Epoch 4/7 → loss 1.3779
       - Epoch 5/7 → loss 1.1781
       - Epoch 6/7 → loss 1.2387
       - Epoch 7/7 → loss 1.0771

     - [Syn] Stage 3/5: 10 examples
       - Epoch 1/7 → loss 1.2146
       - Epoch 2/7 → loss 1.0372
       - Epoch 3/7 → loss 0.7914
       - Epoch 4/7 → loss 0.9563
       - Epoch 5/7 → loss 0.7105
       - Epoch 6/7 → loss 0.7611
       - Epoch 7/7 → loss 0.6891

  

In [8]:
!python main.py benchmark \
    --distilled-path data/Distilled/grad-aggregation_cifar10_convnet_resnet10.pt \
    --benchmark-mode synthetic \
    --model resnet10 \
    --syn-batch-size 64 \
    --test-batch-size 64 \
    --lr 1e-3  \
    --epochs-per-stage 5 \
    --till-stage 10 \
    --real-size 1000

[Benchmarker]:
     - Using device: cuda
     - Loading distilled data from data/Distilled/grad-aggregation_cifar10_convnet_resnet10.pt
     - Total synthetic examples = 1000; real subset size = 1000

     - [Syn] Stage 1/10: 100 examples
       - Epoch 1/5 → loss 1.9839
       - Epoch 2/5 → loss 0.7848
       - Epoch 3/5 → loss 0.4749
       - Epoch 4/5 → loss 0.3361
       - Epoch 5/5 → loss 0.1517

     - [Syn] Stage 2/10: 100 examples
       - Epoch 1/5 → loss 0.3822
       - Epoch 2/5 → loss 0.2674
       - Epoch 3/5 → loss 0.1500
       - Epoch 4/5 → loss 0.1254
       - Epoch 5/5 → loss 0.0659

     - [Syn] Stage 3/10: 100 examples
       - Epoch 1/5 → loss 0.5678
       - Epoch 2/5 → loss 0.3041
       - Epoch 3/5 → loss 0.2251
       - Epoch 4/5 → loss 0.1845
       - Epoch 5/5 → loss 0.0834

     - [Syn] Stage 4/10: 100 examples
       - Epoch 1/5 → loss 0.4209
       - Epoch 2/5 → loss 0.2408
       - Epoch 3/5 → loss 0.2066
       - Epoch 4/5 → loss 0.0989
       - Epoch 5/

In [None]:
!python main.py benchmark \
    --distilled-path data/Distilled/meta-model-matching_cifar10_convnet.pt \
    --benchmark-mode synthetic \
    --model convnet \
    --syn-batch-size 32 \
    --test-batch-size 64 \
    --lr 1e-3  \
    --epochs-per-stage 10 \
    --till-stage 10 \
    --real-size 1000

[Benchmarker]:
     - Using device: cpu
     - Loading distilled data from data/Distilled/grad-aggregation_cifar10_convnet_resnet10.pt
     - Total synthetic examples = 1000; real subset size = 1000

     - [Syn] Stage 1/10: 100 examples
       - Epoch 1/10 → loss 2.3085
       - Epoch 2/10 → loss 2.2988
       - Epoch 3/10 → loss 2.2883
       - Epoch 4/10 → loss 2.2752
       - Epoch 5/10 → loss 2.2457
       - Epoch 6/10 → loss 2.1768
       - Epoch 7/10 → loss 2.0744
       - Epoch 8/10 → loss 1.9924
       - Epoch 9/10 → loss 1.8618
       - Epoch 10/10 → loss 1.7042

     - [Syn] Stage 2/10: 100 examples
       - Epoch 1/10 → loss 1.6008
       - Epoch 2/10 → loss 1.4466
       - Epoch 3/10 → loss 1.4674
       - Epoch 4/10 → loss 1.2329
       - Epoch 5/10 → loss 1.1822
       - Epoch 6/10 → loss 1.0661
       - Epoch 7/10 → loss 0.9316
       - Epoch 8/10 → loss 0.9610
       - Epoch 9/10 → loss 0.8328
       - Epoch 10/10 → loss 0.8068

     - [Syn] Stage 3/10: 100 examples
  

In [10]:
!python main.py benchmark \
    --distilled-path data/Distilled/grad-aggregation_cifar10_convnet_resnet10.pt \
    --benchmark-mode synthetic \
    --model vgg11 \
    --syn-batch-size 64 \
    --test-batch-size 64 \
    --lr 1e-3  \
    --epochs-per-stage 7 \
    --till-stage 10 \
    --real-size 1000

[Benchmarker]:
     - Using device: cuda
     - Loading distilled data from data/Distilled/grad-aggregation_cifar10_convnet_resnet10.pt
     - Total synthetic examples = 1000; real subset size = 1000

     - [Syn] Stage 1/10: 100 examples
       - Epoch 1/7 → loss 2.2443
       - Epoch 2/7 → loss 1.9141
       - Epoch 3/7 → loss 1.7641
       - Epoch 4/7 → loss 1.5361
       - Epoch 5/7 → loss 1.4534
       - Epoch 6/7 → loss 1.3282
       - Epoch 7/7 → loss 1.2283

     - [Syn] Stage 2/10: 100 examples
       - Epoch 1/7 → loss 1.1558
       - Epoch 2/7 → loss 1.0365
       - Epoch 3/7 → loss 0.9054
       - Epoch 4/7 → loss 0.7801
       - Epoch 5/7 → loss 0.6766
       - Epoch 6/7 → loss 0.5937
       - Epoch 7/7 → loss 0.6237

     - [Syn] Stage 3/10: 100 examples
       - Epoch 1/7 → loss 0.7477
       - Epoch 2/7 → loss 0.6761
       - Epoch 3/7 → loss 0.5677
       - Epoch 4/7 → loss 0.6180
       - Epoch 5/7 → loss 0.5591
       - Epoch 6/7 → loss 0.4457
       - Epoch 7/7 → lo

In [25]:
!python main.py benchmark \
    --distilled-path data/Distilled/mult-branch_cifar10_convnet_resnet10.pt \
    --benchmark-mode synthetic \
    --model resnet10 \
    --syn-batch-size 64 \
    --test-batch-size 64 \
    --lr 1e-3  \
    --epochs-per-stage 7 \
    --till-stage 10 \
    --real-size 1000

[Benchmarker]:
     - Using device: cuda
     - Loading distilled data from data/Distilled/mult-branch_cifar10_convnet_resnet10.pt
     - Total synthetic examples = 1000; real subset size = 1000

     - [Syn] Stage 1/10: 100 examples
       - Epoch 1/7 → loss 2.1376
       - Epoch 2/7 → loss 0.8945
       - Epoch 3/7 → loss 0.4267
       - Epoch 4/7 → loss 0.2608
       - Epoch 5/7 → loss 0.1428
       - Epoch 6/7 → loss 0.1264
       - Epoch 7/7 → loss 0.0509

     - [Syn] Stage 2/10: 100 examples
       - Epoch 1/7 → loss 0.4110
       - Epoch 2/7 → loss 0.2667
       - Epoch 3/7 → loss 0.0823
       - Epoch 4/7 → loss 0.0784
       - Epoch 5/7 → loss 0.0341
       - Epoch 6/7 → loss 0.0286
       - Epoch 7/7 → loss 0.0216

     - [Syn] Stage 3/10: 100 examples
       - Epoch 1/7 → loss 0.3086
       - Epoch 2/7 → loss 0.1003
       - Epoch 3/7 → loss 0.0922
       - Epoch 4/7 → loss 0.0474
       - Epoch 5/7 → loss 0.0232
       - Epoch 6/7 → loss 0.0281
       - Epoch 7/7 → loss 0.

In [26]:
!python main.py benchmark \
    --distilled-path data/Distilled/mult-branch_cifar10_convnet_resnet10.pt \
    --benchmark-mode synthetic \
    --model vgg11 \
    --syn-batch-size 64 \
    --test-batch-size 64 \
    --lr 1e-3  \
    --epochs-per-stage 7 \
    --till-stage 10 \
    --real-size 1000

[Benchmarker]:
     - Using device: cuda
     - Loading distilled data from data/Distilled/mult-branch_cifar10_convnet_resnet10.pt
     - Total synthetic examples = 1000; real subset size = 1000

     - [Syn] Stage 1/10: 100 examples
       - Epoch 1/7 → loss 2.2673
       - Epoch 2/7 → loss 1.9050
       - Epoch 3/7 → loss 1.7667
       - Epoch 4/7 → loss 1.6206
       - Epoch 5/7 → loss 1.4621
       - Epoch 6/7 → loss 1.2777
       - Epoch 7/7 → loss 1.2935

     - [Syn] Stage 2/10: 100 examples
       - Epoch 1/7 → loss 1.1955
       - Epoch 2/7 → loss 1.1693
       - Epoch 3/7 → loss 1.0308
       - Epoch 4/7 → loss 0.8843
       - Epoch 5/7 → loss 0.8993
       - Epoch 6/7 → loss 0.7517
       - Epoch 7/7 → loss 0.6067

     - [Syn] Stage 3/10: 100 examples
       - Epoch 1/7 → loss 0.7726
       - Epoch 2/7 → loss 0.6576
       - Epoch 3/7 → loss 0.5876
       - Epoch 4/7 → loss 0.4533
       - Epoch 5/7 → loss 0.5456
       - Epoch 6/7 → loss 0.4005
       - Epoch 7/7 → loss 0.

In [8]:
!python Benchmark/evaluate_distilled.py \
    --distilled data/Distilled/meta-model-matching_mnist_convnet.pt \
    --dataset mnist \
    --batch-size 1024

→ Computing activations for real MNIST…
→ Stage 1: computing activations for synthetic…
   • Stage 1 → FID: 358.50, MMD: 0.4342
→ Stage 2: computing activations for synthetic…
   • Stage 2 → FID: 331.29, MMD: 0.3807
→ Stage 3: computing activations for synthetic…
   • Stage 3 → FID: 331.64, MMD: 0.3968
→ Stage 4: computing activations for synthetic…
   • Stage 4 → FID: 311.17, MMD: 0.3425
→ Stage 5: computing activations for synthetic…
   • Stage 5 → FID: 291.11, MMD: 0.3362

=== Summary ===
Stage 1:  FID = 358.50,  MMD = 0.4342
Stage 2:  FID = 331.29,  MMD = 0.3807
Stage 3:  FID = 331.64,  MMD = 0.3968
Stage 4:  FID = 311.17,  MMD = 0.3425
Stage 5:  FID = 291.11,  MMD = 0.3362


In [10]:
!python Benchmark/evaluate_distilled.py \
    --distilled data/Distilled/composite-loss_mnist_convnet_resnet10.pt \
    --dataset mnist \
    --batch-size 1024

→ Computing activations for real MNIST…
→ Stage 1: computing activations for synthetic…
   • Stage 1 → FID: 346.63, MMD: 0.4446
→ Stage 2: computing activations for synthetic…
   • Stage 2 → FID: 319.78, MMD: 0.4017
→ Stage 3: computing activations for synthetic…
   • Stage 3 → FID: 328.72, MMD: 0.3765
→ Stage 4: computing activations for synthetic…
   • Stage 4 → FID: 293.97, MMD: 0.3289
→ Stage 5: computing activations for synthetic…
   • Stage 5 → FID: 294.26, MMD: 0.3314

=== Summary ===
Stage 1:  FID = 346.63,  MMD = 0.4446
Stage 2:  FID = 319.78,  MMD = 0.4017
Stage 3:  FID = 328.72,  MMD = 0.3765
Stage 4:  FID = 293.97,  MMD = 0.3289
Stage 5:  FID = 294.26,  MMD = 0.3314


In [11]:
!python Benchmark/evaluate_distilled.py \
    --distilled data/Distilled/mult-branch_mnist_convnet_resnet10.pt \
    --dataset mnist \
    --batch-size 1024

→ Computing activations for real MNIST…
→ Stage 1: computing activations for synthetic…
   • Stage 1 → FID: 346.40, MMD: 0.4805
→ Stage 2: computing activations for synthetic…
   • Stage 2 → FID: 328.15, MMD: 0.3931
→ Stage 3: computing activations for synthetic…
   • Stage 3 → FID: 360.81, MMD: 0.4479
→ Stage 4: computing activations for synthetic…
   • Stage 4 → FID: 320.34, MMD: 0.3772
→ Stage 5: computing activations for synthetic…
   • Stage 5 → FID: 291.00, MMD: 0.3138

=== Summary ===
Stage 1:  FID = 346.40,  MMD = 0.4805
Stage 2:  FID = 328.15,  MMD = 0.3931
Stage 3:  FID = 360.81,  MMD = 0.4479
Stage 4:  FID = 320.34,  MMD = 0.3772
Stage 5:  FID = 291.00,  MMD = 0.3138


In [12]:
!python Benchmark/evaluate_distilled.py \
    --distilled data/Distilled/grad-aggregation_mnist_convnet_resnet10.pt \
    --dataset mnist \
    --batch-size 1024

→ Computing activations for real MNIST…
→ Stage 1: computing activations for synthetic…
   • Stage 1 → FID: 398.26, MMD: 0.6754
→ Stage 2: computing activations for synthetic…
   • Stage 2 → FID: 385.13, MMD: 0.6222
→ Stage 3: computing activations for synthetic…
   • Stage 3 → FID: 405.67, MMD: 0.6790
→ Stage 4: computing activations for synthetic…
   • Stage 4 → FID: 398.67, MMD: 0.6728
→ Stage 5: computing activations for synthetic…
   • Stage 5 → FID: 394.64, MMD: 0.6256

=== Summary ===
Stage 1:  FID = 398.26,  MMD = 0.6754
Stage 2:  FID = 385.13,  MMD = 0.6222
Stage 3:  FID = 405.67,  MMD = 0.6790
Stage 4:  FID = 398.67,  MMD = 0.6728
Stage 5:  FID = 394.64,  MMD = 0.6256


In [9]:
!python Benchmark/evaluate_distilled.py \
    --distilled data/Distilled/meta-model-matching_cifar10_convnet.pt \
    --dataset cifar10 \
    --batch-size 1024

→ Computing activations for real MNIST…
→ Stage 1: computing activations for synthetic…
   • Stage 1 → FID: 301.50, MMD: 0.2607
→ Stage 2: computing activations for synthetic…
   • Stage 2 → FID: 300.15, MMD: 0.2545
→ Stage 3: computing activations for synthetic…
   • Stage 3 → FID: 299.43, MMD: 0.2564
→ Stage 4: computing activations for synthetic…
   • Stage 4 → FID: 302.15, MMD: 0.2592
→ Stage 5: computing activations for synthetic…
   • Stage 5 → FID: 301.54, MMD: 0.2615
→ Stage 6: computing activations for synthetic…
   • Stage 6 → FID: 300.73, MMD: 0.2553
→ Stage 7: computing activations for synthetic…
   • Stage 7 → FID: 301.51, MMD: 0.2634
→ Stage 8: computing activations for synthetic…
   • Stage 8 → FID: 300.35, MMD: 0.2576
→ Stage 9: computing activations for synthetic…
   • Stage 9 → FID: 298.18, MMD: 0.2534
→ Stage 10: computing activations for synthetic…
   • Stage 10 → FID: 300.80, MMD: 0.2582

=== Summary ===
Stage 1:  FID = 301.50,  MMD = 0.2607
Stage 2:  FID = 300.15,

In [13]:
!python Benchmark/evaluate_distilled.py \
    --distilled data/Distilled/mult-branch_cifar10_convnet_resnet10.pt \
    --dataset cifar10 \
    --batch-size 1024

→ Computing activations for real MNIST…
→ Stage 1: computing activations for synthetic…
   • Stage 1 → FID: 333.83, MMD: 0.3016
→ Stage 2: computing activations for synthetic…
   • Stage 2 → FID: 327.63, MMD: 0.2936
→ Stage 3: computing activations for synthetic…
   • Stage 3 → FID: 331.70, MMD: 0.2997
→ Stage 4: computing activations for synthetic…
   • Stage 4 → FID: 331.74, MMD: 0.3006
→ Stage 5: computing activations for synthetic…
   • Stage 5 → FID: 331.24, MMD: 0.3047
→ Stage 6: computing activations for synthetic…
   • Stage 6 → FID: 330.17, MMD: 0.2964
→ Stage 7: computing activations for synthetic…
   • Stage 7 → FID: 337.31, MMD: 0.3094
→ Stage 8: computing activations for synthetic…
   • Stage 8 → FID: 329.19, MMD: 0.3012
→ Stage 9: computing activations for synthetic…
   • Stage 9 → FID: 329.44, MMD: 0.3008
→ Stage 10: computing activations for synthetic…
   • Stage 10 → FID: 331.72, MMD: 0.3003

=== Summary ===
Stage 1:  FID = 333.83,  MMD = 0.3016
Stage 2:  FID = 327.63,

In [14]:
!python Benchmark/evaluate_distilled.py \
    --distilled data/Distilled/grad-aggregation_cifar10_convnet_resnet10.pt \
    --dataset cifar10 \
    --batch-size 1024

→ Computing activations for real MNIST…
→ Stage 1: computing activations for synthetic…
   • Stage 1 → FID: 396.71, MMD: 0.3972
→ Stage 2: computing activations for synthetic…
   • Stage 2 → FID: 394.07, MMD: 0.3852
→ Stage 3: computing activations for synthetic…
   • Stage 3 → FID: 398.29, MMD: 0.3946
→ Stage 4: computing activations for synthetic…
   • Stage 4 → FID: 391.05, MMD: 0.3906
→ Stage 5: computing activations for synthetic…
   • Stage 5 → FID: 396.98, MMD: 0.3946
→ Stage 6: computing activations for synthetic…
   • Stage 6 → FID: 392.35, MMD: 0.3885
→ Stage 7: computing activations for synthetic…
   • Stage 7 → FID: 397.06, MMD: 0.3964
→ Stage 8: computing activations for synthetic…
   • Stage 8 → FID: 391.38, MMD: 0.3833
→ Stage 9: computing activations for synthetic…
   • Stage 9 → FID: 395.22, MMD: 0.3894
→ Stage 10: computing activations for synthetic…
   • Stage 10 → FID: 397.49, MMD: 0.3963

=== Summary ===
Stage 1:  FID = 396.71,  MMD = 0.3972
Stage 2:  FID = 394.07,