# CIFAR100 fixed memory slots

Produces Plots with with compression-factor vs accuracy and fices number of memory slots

In [None]:
from pathlib import Path

N_SLOTS = 30000
BACKBONE_BLOCK = 3
SEED = 1
DATA_DIR = '/home/marwei/pytorch'
LOG_DIR = '/home/marwei/code/encodedgdumb/logs/cifar100_fixed_slots'
outfile = Path('..', 'scripts', 'cifar100_fixed_slots.sh').resolve()

In [None]:
experiments = []

# Base Case

In [None]:
encoders = ['none', 'cutr34']

In [None]:
for encoder in encoders:
        l =  ["python3 src/main.py",
            "--dataset", "CIFAR100",
            "--num_classes_per_task", "5",
            "--num_tasks", "20",
            "--seed", str(SEED),
            "--memory_size", str(N_SLOTS),
            "--num_passes", "128",
            "--sampler", "greedy_sampler",
            "--compressor", "none",
            "--backbone", "resnet34",
            "--data_dir", DATA_DIR,
            "--log_dir", LOG_DIR,
            ]
        if encoder == 'cutr34':
            n = f"cifar100_m{N_SLOTS}_cutr34_{BACKBONE_BLOCK}_splitr34_{BACKBONE_BLOCK}__s{SEED}"
            x = [
                "--encoder", "cutr34",
                "--encoding_block", str(BACKBONE_BLOCK),
                "--backbone_block", str(BACKBONE_BLOCK),
                "--exp_name", n,
            ]
        else:
            n = f"cifar100_m{N_SLOTS}_splitr34_{BACKBONE_BLOCK}__s{SEED}"
            x = [
                "--encoder", "none",
                "--exp_name", n,
            ]
        l = l + x
        experiments.append(" ".join(l))


## Quantization Compression

In [None]:
n_states_list = [2, 4, 8, 16, 32]
encoders = ['cutr34', 'none']
strategies = ['local', 'tiny_imagenet_transfer']

In [None]:
for n_states in n_states_list:
    for encoder in encoders:
        for strategy in strategies:
            l =  ["python3 src/main.py",
                "--dataset", "CIFAR100",
                "--num_classes_per_task", "5",
                "--num_tasks", "20",
                "--seed", str(SEED),
                "--memory_size", str(N_SLOTS),
                "--num_passes", "128",
                "--sampler", "greedy_sampler",
                "--compressor", "quantization",
                "--strategy", strategy,
                "--n_states", str(n_states),
                "--backbone", "resnet34",
                "--data_dir", DATA_DIR,
                "--log_dir", LOG_DIR,
                ]
            if encoder == 'cutr34':
                n = f"cifar100_m{N_SLOTS}_cutr34_{BACKBONE_BLOCK}_quantization_{strategy}_{n_states}_splitr34_{BACKBONE_BLOCK}__s{SEED}"
                x = [
                    "--encoder", "cutr34",
                    "--encoding_block", str(BACKBONE_BLOCK),
                    "--backbone_block", str(BACKBONE_BLOCK),
                    "--exp_name", n,
                ]
            else:
                n = f"cifar100_m{N_SLOTS}__quantization_{strategy}_{n_states}_splitr34_{BACKBONE_BLOCK}__s{SEED}"
                x = [
                    "--encoder", "none",
                    "--exp_name", n,
                ]
            l = l + x
            experiments.append(" ".join(l))

# Thinning Compression

In [None]:
compression_factors = [0.5, 0.8, 0.9, 0.95]
encoders = ['cutr34', 'none']

In [None]:
for compression_factor in compression_factors:
    for encoder in encoders:
            l =  ["python3 src/main.py",
                "--dataset", "CIFAR100",
                "--num_classes_per_task", "5",
                "--num_tasks", "20",
                "--seed", str(SEED),
                "--memory_size", str(N_SLOTS),
                "--num_passes", "128",
                "--sampler", "greedy_sampler",
                "--compressor", "thinning",
                "--compression_factor", str(compression_factor),
                "--backbone", "resnet34",
                "--data_dir", DATA_DIR,
                "--log_dir", LOG_DIR,
                ]
            if encoder == 'cutr34':
                n = f"cifar100_m{N_SLOTS}_cutr34_{BACKBONE_BLOCK}_thinning{compression_factor}_splitr34_{BACKBONE_BLOCK}__s{SEED}"
                x = [
                    "--encoder", "cutr34",
                    "--encoding_block", str(BACKBONE_BLOCK),
                    "--backbone_block", str(BACKBONE_BLOCK),
                    "--exp_name", n,
                ]
            else:
                n = f"cifar100_m{N_SLOTS}_thinning{compression_factor}_splitr34_{BACKBONE_BLOCK}__s{SEED}"
                x = [
                    "--encoder", "none",
                    "--exp_name", n,
                ]
            l = l + x
            experiments.append(" ".join(l))

# Autoencoder Compression (conv Autoencoder)

Because we cannot apply pooling after CutR-Compression we dont use the Encoder in tis Case

In [None]:
latent_sizes = [1, 2, 4, 8, 16]
encoders = ['cutr34', 'none']

In [None]:
for latent_size in latent_sizes:
    n = f"cifar100_m{N_SLOTS}_convae{latent_size}_splitr34_{BACKBONE_BLOCK}__s{SEED}"
    l =  ["python3 src/main.py",
        "--dataset", "CIFAR100",
        "--num_classes_per_task", "5",
        "--num_tasks", "20",
        "--seed", str(SEED),
        "--memory_size", str(N_SLOTS),
        "--num_passes", "128",
        "--sampler", "greedy_sampler",
        "--compressor", "convae",
        "--latent_channels", str(latent_size),
        "--backbone", "resnet34",
        "--data_dir", DATA_DIR,
        "--log_dir", LOG_DIR,
        "--encoder", "none",
        "--exp_name", n,
    ]
    experiments.append(" ".join(l))

# Fully Connected Autoencoder

In [None]:
bottleneck_sizes = [2, 4, 8, 16, 32, 64]
encoders = ['cutr34', 'none']

In [None]:
for bottleneck_size in bottleneck_sizes:
    l =  ["python3 src/main.py",
        "--dataset", "CIFAR100",
        "--num_classes_per_task", "5",
        "--num_tasks", "20",
        "--seed", str(SEED),
        "--memory_size", str(N_SLOTS),
        "--num_passes", "128",
        "--sampler", "greedy_sampler",
        "--compressor", "fcae",
        "--bottleneck_neurons", str(bottleneck_size),
        "--data_dir", DATA_DIR,
        "--log_dir", LOG_DIR,
    ]

    n_no_enc = f"cifar100_m{N_SLOTS}_fcae{bottleneck_size}_resnet34__s{SEED}"
    n_enc = f"cifar100_m{N_SLOTS}_cutr{BACKBONE_BLOCK}_fcae{bottleneck_size}_resnet34__s{SEED}"

    l_no_enc = l + [
        "--encoder", "none",
        "--backbone", "resnet34",
        "--exp_name", n_no_enc
    ]
    l_enc = l + [
        "--encoder", "cutr34",
        "--encoding_block", str(BACKBONE_BLOCK),
        "--backbone", "resnet34",
        "--backbone_block", str(BACKBONE_BLOCK),
        "--exp_name", n_enc        
    ]

    experiments.append(' '.join(l_no_enc))
    experiments.append(' '.join(l_enc))

# Write output file

In [None]:
if outfile.exists():
    inp = input(f"output file {outfile} already exists. Overwrite [y/ N]?")

    if not (inp == 'y' or inp == 'Y'):
        raise FileExistsError(f'{outfile} exists, nothing has been overwritten') 

with open(outfile, 'w') as f:
    f.write('\n'.join(experiments))
    print('saved!')

# Results

In [None]:
import re
import pandas as pd
from pathlib import Path

logs_dir = Path(LOG_DIR)

dd = []
for this_dir in logs_dir.glob('*'):
    try:
        with open(Path(this_dir, 'checkpoint.log').resolve()) as f:
            loglines = f.readlines()

        final_acc = float(re.findall(r"Acc: \[(.*?)\]", loglines[-1])[0])
        encoder = re.findall(r"encoder=\'(.*?)\'", loglines[0])[0]
        compressor = re.findall(r"compressor=\'(.*?)\'", loglines[0])[0]
        
        if compressor == 'none':
            compressor_name = 'none'
            compressor_param = None
        elif compressor == 'thinning':
            compressor_name = 'Thinning'
            compressor_param = float(re.findall(r"compression_factor=(.*?)\,", loglines[0])[0])
        elif compressor == 'autoencoder' or compressor == 'convae':
            compressor_name = 'Convolutional Autoencoder'
            compressor_param = int(re.findall(r"latent_channels=(\d+)", loglines[0])[0])
        elif compressor == 'fcae':
            compressor_name = 'Fully Connected Autoencoder'
            compressor_param = int(re.findall(r"bottleneck_neurons=(\d+)", loglines[0])[0])
        elif compressor == 'quantization':
            strategy = re.findall(r"strategy=\'(.*?)\'", loglines[0])[0]
            if strategy == 'tiny_imagenet_transfer':
                compressor_name = 'Quantization (transfer)'
            elif strategy == 'local':
                compressor_name = 'Quantization (local)'
            else:
                raise ValueError(f'Unknown Quantization strategy: {strategy}')
            compressor_param = int(re.findall(r"n_states=(\d+)", loglines[0])[0])
        else:
            raise ValueError(f'Unknown Compressor: {compressor}')
        
        dd.append({
            'final_acc': final_acc,
            'encoder': encoder,
            'compressor': compressor_name,
            'param': compressor_param
        })
    except IndexError:
        print(this_dir.stem)

In [None]:
df = pd.DataFrame.from_records(dd).sort_values(['compressor', 'param'])

In [None]:
import plotly.express as px
import plotly.graph_objects as go
from plot_utils import science_template

df['legend'] = df['encoder']
df.loc[df['legend']=='none', 'legend'] = 'No Encoding'
df.loc[df['legend']=='cutr34', 'legend'] = 'CutR34(3)'
view = df.loc[df.compressor != 'none']

fig = px.line(
    view,
    x='param',
    y='final_acc',
    color='legend',
    facet_col='compressor',
    template=science_template,
    markers=True,
    labels={
        'final_acc': 'Accuracy',
        'param': 'k',
        'encoder': 'Encoder',
    },
    category_orders={
        'compressor': ['Thinning', 'Quantization (local)', 'Quantization (transfer)', 'Convolutional Autoencoder', 'Fully Connected Autoencoder']
    }
)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig.update_xaxes(matches=None, rangemode="tozero")
fig.update_yaxes(rangemode="tozero")
fig.update_layout(legend={'title_text':''})

# add baseline-lines
y_gdumb = df.loc[(df['encoder']=='none') & (df['compressor'] == 'none'), 'final_acc'].item()
y_cutr3 = df.loc[(df['encoder']=='cutr34') & (df['compressor'] == 'none'), 'final_acc'].item()
fig.add_hline(y=y_gdumb,
              line_dash="solid",
              line_width=1,
              annotation_text="",
              annotation_position="top right")
fig.add_hline(y=y_cutr3,
              line_dash="dot",
              line_width=1,
              annotation_text="",
              annotation_position="bottom right")
fig.add_hline(y=0.01,
              line_dash="dash",
              line_width=1,
              annotation_text="",
              annotation_position="bottom right")

# add invisible data so the baseline-lines appear in the legend
fig.add_traces(
    [
        go.Scatter(
            x=[fig.data[0].x[0]],
            y=[fig.data[0].y[0]], 
            showlegend=True,
            name='GDumb',
            mode='lines',
            line_dash='solid',
            line_color='black',
            line_width=1
        ),
        go.Scatter(
            x=[fig.data[0].x[0]],
            y=[fig.data[0].y[0]], 
            showlegend=True,
            name='CutR34(3) without Compression',
            mode='lines',
            line_dash='dot',
            line_color='black',
            line_width=1
        ),
        go.Scatter(
            x=[fig.data[0].x[0]],
            y=[fig.data[0].y[0]], 
            showlegend=True,
            name='random guessing',
            mode='lines',
            line_dash='dash',
            line_color='black',
            line_width=1
        ),
    ]
)

In [None]:
config = {
    'displaylogo': False,
    'toImageButtonOptions': {
        'format': 'svg', # one of png, svg, jpeg, webp
        'filename': 'plot',
        'height': 300,
        'width': 1300,
        'scale': 1 # Multiply title/legend/axis/canvas sizes by this factor
    }
}

fig.show(renderer='browser', config=config)