For ResNet18 and ResNet-34

| cut...          | CIFAR numel     | bits (bytes) to adress output coordinates | | CORe50 output numel | bits (bytes) to adress output coordinates |
|-----------------|-----------------|-------------------------------------------|-|---------------------|-------------------------------------------|
| full ResNet     | 32x32x3 =  3072 | 12 (2)                                    | | 128x128x3 = 49152   | 16 (2)                                    |
| after Block 1   | 8x8x64  =  4096 | 12 (2)                                    | | 32x32x64  = 65536   | 16 (2)                                    |
| after Block 2   | 4x4x128 =  2048 | 11 (2)                                    | | 16x16x128 = 32768   | 15 (2)                                    |
| after Block 3   | 2x2x256 =  1024 | 10 (2)                                    | | 8x8x256   = 16384   | 14 (2)                                    |

For ResNet 32

| cut...          | CIFAR numel      |
|-----------------|------------------|
| full ResNet     | 32x23x3  =  3072 |
| after Block 1   | 32x32x16 = 16384 |
| after Block 2   | 16x16x32 =  8192 |
| after Block 3   | 64x8x8   =  4096 |

In [None]:
import pandas as pd
from math import ceil, floor, log2

In [None]:
SEED = 0
FLOAT_SIZE_BYTE = 4
UINT_SIZE_BYTE = 1
COORDINATE_SIZE_BYTE = 2
MEMORY_SIZE_BYTE = 1.536 * 10**6
LOG_DIR = '/home/marwei/Code/encodedgdumb/logs/ae_setting_b'
DATA_DIR = '/data/marwei/pytorch/'
MAX_EPOCHS = 256

DATASETS = ['CIFAR10']
ENCODING_BLOCKS = [3]
K_QUANTIZATION = [4]
K_THINNING = [0.95]
K_AE = [1, 2, 4, 8, 16]


# QUANTIZATION_STRATEGY = 'tiny_imagenet_transfer'
QUANTIZATION_STRATEGY = 'cifar10_transfer'
# QUANTIZATION_STRATEGY = 'cifar100_transfer'

# CONVAE_PRETRAINING_PARAMS = 'TinyImagenet'
CONVAE_PRETRAINING_PARAMS = 'CIFAR10_01'


In [None]:

import pandas as pd
ds_info = pd.read_pickle('ds_info.pickle')
# ds_info = pd.read_pickle('ds_info_resnet32.pickle')
# ds_info.loc[(ds_info.index.get_level_values(0) == 'CIFAR10'), 'model'] = 'resnet'
ds_info.loc[(ds_info.index.get_level_values(0) == 'CIFAR10'), 'model'] = 'resnet18_cifar'
# ds_info.loc[(ds_info.index.get_level_values(0) == 'CIFAR10') & (ds_info.index.get_level_values(1)!= 0), 'encoder'] = 'cutr'
ds_info.loc[(ds_info.index.get_level_values(0) == 'CIFAR10') & (ds_info.index.get_level_values(1)!= 0), 'encoder'] = 'cutr_cifar'
ds_info

In [None]:
from torchvision.models import ResNet34_Weights

In [None]:
exps = []
names = []

Basecase

In [None]:
for this_dataset in DATASETS:
    for this_encoding_block in ENCODING_BLOCKS:
        output_numel = ds_info.loc[(this_dataset, this_encoding_block), 'output_numel']
        this_model = ds_info.loc[(this_dataset, this_encoding_block), 'model']
        if this_encoding_block == 0:
            n_memory_samples = floor(MEMORY_SIZE_BYTE / (output_numel * UINT_SIZE_BYTE))
            encoder_options = [""]
        else:
            n_memory_samples = floor(MEMORY_SIZE_BYTE / (output_numel * FLOAT_SIZE_BYTE))
            encoder_options = ["--encoding_block", str(this_encoding_block)]

        assert(n_memory_samples < ds_info.loc[(this_dataset, this_encoding_block), 'total_dataset_size'])

        n = f"{this_dataset}_m{n_memory_samples}_{this_model}_c{this_encoding_block}_s{SEED}"

        l =  ["python3 src/main.py",
            "--dataset", this_dataset,
            "--num_classes_per_task", str(ds_info.loc[(this_dataset, this_encoding_block), 'n_classes_per_task']),
            "--num_tasks", str(ds_info.loc[(this_dataset, this_encoding_block), 'n_tasks']),
            "--seed", str(SEED),
            "--memory_size", str(n_memory_samples),
            "--num_passes", str(MAX_EPOCHS),
            "--sampler", "greedy_sampler",
            "--encoder", ds_info.loc[(this_dataset, this_encoding_block), 'encoder']] + \
            encoder_options + \
            ["--compressor", "none",
            "--backbone", this_model,
            "--backbone_block", str(this_encoding_block),
            "--data_dir", DATA_DIR,
            "--log_dir", LOG_DIR,
            "--exp_name", n]
        exps.append(" ".join(l))
        names.append(n)

Quantization

In [None]:
for this_dataset in DATASETS:
    for this_encoding_block in ENCODING_BLOCKS:
        output_numel = ds_info.loc[(this_dataset, this_encoding_block), 'output_numel']
        this_model = ds_info.loc[(this_dataset, this_encoding_block), 'model']
        for n_quantization_states in K_QUANTIZATION:
            available_mem = MEMORY_SIZE_BYTE - n_quantization_states * FLOAT_SIZE_BYTE  # substract space for quantile centers
            bit_for_compressed_number = ceil(log2(n_quantization_states))
            sample_size_byte = ceil(output_numel * bit_for_compressed_number / 8)
            n_memory_samples = floor(available_mem / sample_size_byte)
            assert(n_memory_samples < ds_info.loc[(this_dataset, this_encoding_block), 'total_dataset_size'])
            
            if this_encoding_block == 0:
                encoder_options = [""]
            else:
                encoder_options = ["--encoding_block", str(this_encoding_block)]

            n = f"{this_dataset}_m{n_memory_samples}_{this_model}_c{this_encoding_block}_quantizationTransfer{n_quantization_states}_s{SEED}"

            l =  ["python3 src/main.py",
                "--dataset", this_dataset,
                "--num_classes_per_task", str(ds_info.loc[(this_dataset, this_encoding_block), 'n_classes_per_task']),
                "--num_tasks", str(ds_info.loc[(this_dataset, this_encoding_block), 'n_tasks']),
                "--seed", str(SEED),
                "--memory_size", str(n_memory_samples),
                "--num_passes", str(MAX_EPOCHS),
                "--sampler", "greedy_sampler",
                "--encoder", ds_info.loc[(this_dataset, this_encoding_block), 'encoder']] + \
                encoder_options + \
               ["--compressor", "quantization",
                "--n_states", str(n_quantization_states),
                "--strategy", QUANTIZATION_STRATEGY,
                "--backbone", this_model,
                "--backbone_block", str(this_encoding_block),
                "--data_dir", DATA_DIR,
                "--log_dir", LOG_DIR,
                "--exp_name", n]
            exps.append(" ".join(l))
            names.append(n)

Thinning

In [None]:
for this_dataset in DATASETS:
    for this_encoding_block in ENCODING_BLOCKS:
        output_numel = ds_info.loc[(this_dataset, this_encoding_block), 'output_numel']
        this_model = ds_info.loc[(this_dataset, this_encoding_block), 'model']
        for this_compression_factor in K_THINNING:
            n_elements_per_sample = floor(output_numel * (1-this_compression_factor))
            if this_encoding_block == 0:
                sample_size_byte = n_elements_per_sample * UINT_SIZE_BYTE + n_elements_per_sample * COORDINATE_SIZE_BYTE
                encoder_options = [""]
            else:
                sample_size_byte = n_elements_per_sample * FLOAT_SIZE_BYTE + n_elements_per_sample * COORDINATE_SIZE_BYTE
                encoder_options = ["--encoding_block", str(this_encoding_block)]

            n_memory_samples = floor(MEMORY_SIZE_BYTE / sample_size_byte)

            assert(n_memory_samples < ds_info.loc[(this_dataset, this_encoding_block), 'total_dataset_size'])
            
            n = f"{this_dataset}_m{n_memory_samples}_{this_model}_c{this_encoding_block}_thinning{int(this_compression_factor*100)}_s{SEED}"

            l =  ["python3 src/main.py",
                "--dataset", this_dataset,
                "--num_classes_per_task", str(ds_info.loc[(this_dataset, this_encoding_block), 'n_classes_per_task']),
                "--num_tasks", str(ds_info.loc[(this_dataset, this_encoding_block), 'n_tasks']),
                "--seed", str(SEED),
                "--memory_size", str(n_memory_samples),
                "--num_passes", str(MAX_EPOCHS),
                "--sampler", "greedy_sampler",
                "--encoder", ds_info.loc[(this_dataset, this_encoding_block), 'encoder']] + \
                encoder_options + \
               ["--compressor", "thinning",
                "--compression_factor", str(this_compression_factor),
                "--backbone", this_model,
                "--backbone_block", str(this_encoding_block),
                "--data_dir", DATA_DIR,
                "--log_dir", LOG_DIR,
                "--exp_name", n]
            exps.append(" ".join(l))
            names.append(n)

Convolutional Autoencoder

In [None]:
latent_spatial_sizes = {
    0: 8*8,
    # 1: 2*2, #this works but is not trained, size is 2x2
    # 2: 1*1, #this works but is not trained, size is 1x1
    2.5: None,
    3: None,
    3.5: None
}

# n_latent_channels: memory_for_ae_in_mb
ae_memory_mb = {
    1:   4524,
    2:   5630,
    4:   7843,
    8:  12268,
    16: 21118,
}


for this_dataset in DATASETS:
    for this_encoding_block in ENCODING_BLOCKS:
        this_model = ds_info.loc[(this_dataset, this_encoding_block), 'model']
        this_latent_spatial_size = latent_spatial_sizes[this_encoding_block]
        if this_latent_spatial_size is None:
            print('Skipping because the spatial size in the bottleneck would be below 0')
            continue
        for this_latent_size in K_AE:
            n_elements_per_sample = this_latent_spatial_size * this_latent_size
            sample_size_byte = n_elements_per_sample * FLOAT_SIZE_BYTE
            free_memory_for_samples = MEMORY_SIZE_BYTE - ae_memory_mb[this_latent_size]
            n_memory_samples = floor(free_memory_for_samples / sample_size_byte)

            if this_encoding_block == 0:
                encoder_options = [""]
            else:
                encoder_options = ["--encoding_block", str(this_encoding_block)]

            assert(n_memory_samples < ds_info.loc[(this_dataset, this_encoding_block), 'total_dataset_size'])
            
            n = f"{this_dataset}_m{n_memory_samples}_{this_model}_c{this_encoding_block}_convae{this_latent_size}_s{SEED}"

            l =  ["python3 src/main.py",
                "--dataset", this_dataset,
                "--num_classes_per_task", str(ds_info.loc[(this_dataset, this_encoding_block), 'n_classes_per_task']),
                "--num_tasks", str(ds_info.loc[(this_dataset, this_encoding_block), 'n_tasks']),
                "--seed", str(SEED),
                "--memory_size", str(n_memory_samples),
                "--num_passes", str(MAX_EPOCHS),
                "--sampler", "greedy_sampler",
                "--encoder", ds_info.loc[(this_dataset, this_encoding_block), 'encoder']] + \
                encoder_options + \
               ["--compressor", "convae",
                "--latent_channels", str(this_latent_size),
                "--pretraining_params", CONVAE_PRETRAINING_PARAMS,
                "--backbone", this_model,
                "--backbone_block", str(this_encoding_block),
                "--data_dir", DATA_DIR,
                "--log_dir", LOG_DIR,
                "--exp_name", n]
            exps.append(" ".join(l))
            names.append(n)

In [None]:
exps

In [None]:
assert len(names) == len(set(names))

In [None]:
with open('../scripts/fixb.sh', 'w') as fp:
    fp.write("\n".join(exps)+'\n')