| cut...                    | output shape | output size | bits (bytes) to adress output coordinates |
|---------------------------|--------------|-------------|-------------------------------------------|
| after Block 1             | 8x8x64       |  4096       | 12 (2)                                    |
| after Block 2             | 4x4x128      |  2048       | 11 (2)                                    |
| after Block 3             | 2x2x256      |  1024       | 10 (2)                                    |
| after Block 4             | 1x1x512      |   512       |  9 (2)                                    |

We assume

* one float takes up 4 Byte of Memory
* uints are used for output-coordinates

In [None]:
from pathlib import Path

MODEL_SIZE_MB = 0
OUTPUT_SIZE_ENCODED = 1024
OUTPUT_SIZE_UNENCODED = 32*32*3
FLOAT_SIZE_BYTE = 4
UINT_SIZE_BYTE = 1
COORDINATE_SIZE_BYTE = 2

log_paths = [Path(d, 'checkpoint.log') for d in Path('/Users/marwei/AFS/home/urz/m/marwei/Logs/IDEAL/cifar10_rescon').glob('cifar10*')] \
          + [Path(d, 'checkpoint.log') for d in Path('/Users/marwei/AFS/home/urz/m/marwei/Logs/IDEAL/cifar100_rescon').glob('cifar10*')] 

# Parse logs to pandas dataframe

In [None]:
import re
import pandas as pd

c = []

for exp in log_paths:
    with open(exp) as infile:
        loglines = infile.read().splitlines()

    mem_size = int(re.findall(r"memory_size=(\d+)", loglines[0])[0])
    final_acc = float(re.findall(r"Acc: \[(.*?)\]", loglines[-1])[0])
    compressor = re.findall(r"compressor=\'(.*?)\'", loglines[0])[0]
    encoder = re.findall(r"encoder=\'(.*?)\'", loglines[0])[0]
    dataset = re.findall(r"dataset=\'(.*?)\'", loglines[0])[0]

    compressor_param = None

    if compressor == 'thinning':
        compressor_param = float(re.findall(r"compression_factor=(.*?),", loglines[0])[0])
    elif compressor == 'quantization':
        compressor_param = int(re.findall(r"n_states=(\d+)", loglines[0])[0])
        try:
            strategy = re.findall(r"strategy=\'(.*?)\'", loglines[0])[0]
            assert strategy == 'tiny_imagenet_transfer' or strategy == 'local'
            if strategy == 'tiny_imagenet_transfer':
                compressor_new = 'quantization transfer'
            elif strategy == 'local':
                compressor_new = 'quantization local'
            else:
                raise ValueError('Unknown Quantization Stragegy: ' + strategy)
        except IndexError:
            compressor_new = 'quantization local'
        compressor = compressor_new
    elif compressor == 'convae':
        compressor_param = int(re.findall(r"latent_channels=(\d+)", loglines[0])[0])
    elif compressor == 'fcae':
        compressor_param = int(re.findall(r"bottleneck_neurons=(\d+)", loglines[0])[0])
    elif compressor == 'none':
        compressor_param = 0
    else:
        raise ValueError(f'Unknown Compressor: {compressor}')

    encoder = 'cutr' if encoder == 'cutr34' else encoder

    if encoder == 'cutr':
        encoding_block = int(re.findall(r"encoding_block=(\d+)", loglines[0])[0])
        output_size = OUTPUT_SIZE_ENCODED
    elif encoder == 'none':
        output_size = OUTPUT_SIZE_UNENCODED
    else:
        raise ValueError('Unknown Encoder')

    c.append({
        'mem_size': mem_size,
        'final_acc': final_acc,
        'output_size': output_size,
        'encoder': encoder,
        'compressor': compressor,
        'compressor_param': compressor_param,
        'dataset': dataset
    })

df = pd.DataFrame.from_records(c)

In [None]:
df['mem_size_mb'] = 0
df.loc[df['encoder']=='none', 'bytes_for_datatype'] = UINT_SIZE_BYTE
df.loc[df['encoder']=='cutr', 'bytes_for_datatype'] = FLOAT_SIZE_BYTE

# Compute Storage Consumption

Baseline

In [None]:
df.loc[df['compressor']=='none', 'mem_size_mb'] = \
    df.loc[df['compressor']=='none', 'bytes_for_datatype'] \
    * df.loc[df['compressor']=='none', 'output_size'] \
    * df.loc[df['compressor']=='none', 'mem_size'] \
    / (1024*1024) \
    + MODEL_SIZE_MB

Thinning

In [None]:
df['n_elements_per_sample'] = 0
df['sample_size_byte'] = 0

df.loc[df.compressor=='thinning', 'n_elements_per_sample'] = \
    df.loc[df.compressor=='thinning', 'output_size'] \
    * (1 - df.loc[df.compressor=='thinning', 'compressor_param'])
df.loc[df.compressor=='thinning', 'sample_size_byte'] = \
    df.loc[df.compressor=='thinning', 'n_elements_per_sample'] \
    * df.loc[df.compressor=='thinning', 'bytes_for_datatype'] \
    + df.loc[df.compressor=='thinning', 'n_elements_per_sample'] \
    * COORDINATE_SIZE_BYTE
df.loc[df.compressor=='thinning', 'mem_size_mb'] = \
    df.loc[df.compressor=='thinning', 'sample_size_byte'] \
    * df.loc[df.compressor=='thinning', 'mem_size'] \
    / (1024*1024) \
    + MODEL_SIZE_MB
df.drop(columns=['n_elements_per_sample', 'sample_size_byte'], inplace=True)


Quantization

In [None]:
import numpy as np
df['compressed_data'] = 0
df['inverval_centers'] = 0

mask = (df.compressor=='quantization local') | (df.compressor=='quantization transfer')
df.loc[mask, 'compressed_data'] = \
    df.loc[mask, 'mem_size'] \
    * np.ceil(
        np.ceil(np.log2(df.loc[mask, 'compressor_param']))
        * df.loc[mask, 'output_size']
        / 8
    )

df.loc[df['compressor']=='quantization local', 'inverval_centers'] = \
    df.loc[df['compressor']=='quantization local', 'mem_size'] \
    * df.loc[df['compressor']=='quantization local', 'compressor_param'] \
    * FLOAT_SIZE_BYTE

df.loc[df['compressor']=='quantization transfer', 'inverval_centers'] = \
    df.loc[df['compressor']=='quantization transfer', 'compressor_param'] \
    * FLOAT_SIZE_BYTE

df.loc[mask, 'mem_size_mb'] = \
    MODEL_SIZE_MB \
    + (
        df.loc[mask, 'inverval_centers']
        + df.loc[mask, 'compressed_data']
    ) / (8*1024*1024)

df.drop(columns=['compressed_data', 'inverval_centers'], inplace=True)

Conv Autoencoder

In [None]:
ae_size_mb = {
    1: 0.00452423095703125,
    2: 0.0056304931640625,
    4: 0.007843017578125,
    8: 0.01226806640625,
    16: 0.0211181640625,
}

df['convae_size'] = df['compressor_param'].astype('int')
df.loc[df['compressor']=='convae', 'convae_size'] = df.loc[df['compressor']=='convae', 'convae_size'].replace(ae_size_mb)
df.loc[df['compressor']=='convae', 'mem_size_mb'] = \
    MODEL_SIZE_MB \
    + df.loc[df['compressor']=='convae', 'convae_size'] \
    + ( 
        df.loc[df['compressor']=='convae', 'mem_size']
        * 8 * 8 * df.loc[df['compressor']=='convae', 'compressor_param']
        * FLOAT_SIZE_BYTE
        / (1024*1024)
    )

df.drop(columns=['convae_size'], inplace=True)

FC Autoencoder

In [None]:
SIZE_FCAE_NONE_MB = {
    64: 52.08887481689453,
    32: 43.0804443359375,
    16: 36.295753479003906,
    8: 30.972145080566406,
    4: 26.713584899902344,
    2: 23.205177307128906,
}
SIZE_FCAE_CUTR_MB = {
    64: 8.331954956054688,
    32: 6.5640106201171875,
    16: 5.3397216796875,
    8: 4.450263977050781,
    4: 3.7687225341796875,
    2: 3.2361679077148438,
}

df['fcae_size'] = df['compressor_param'].astype('int')
df.loc[(df['compressor']=='fcae') & (df['encoder']=='none'), 'fcae_size'] = df.loc[(df['compressor']=='fcae') & (df['encoder']=='none'), 'fcae_size'].replace(SIZE_FCAE_NONE_MB)
df.loc[(df['compressor']=='fcae') & (df['encoder']=='cutr'), 'fcae_size'] = df.loc[(df['compressor']=='fcae') & (df['encoder']=='cutr'), 'fcae_size'].replace(SIZE_FCAE_CUTR_MB)
df.loc[df['compressor']=='fcae', 'mem_size_mb'] = \
    df.loc[df['compressor']=='fcae', 'fcae_size'] \
    + (
        df.loc[df['compressor']=='fcae', 'mem_size']
        * df.loc[df['compressor']=='fcae', 'compressor_param']
        * FLOAT_SIZE_BYTE
        / (1024*1024)
    )
df.drop(columns=['fcae_size'], inplace=True)

# Plot

we only want to plot

* GDumb (no compression, no encoding)
* Conv-AE + no Encoding
* Thinning + Encoding
* Quantization transfer + Encoding


In [None]:
view = df.loc[
    ((df['encoder']=='none') & (df['compressor']=='none'))
    | ((df['encoder']=='cutr') & (df['compressor']=='none'))
    | ((df['encoder']=='none') & (df['compressor']=='convae') & (df['compressor_param'] == 16))
    | ((df['encoder']=='cutr') & (df['compressor']=='thinning') & (df['compressor_param'] == 0.5))
    | ((df['encoder']=='cutr') & (df['compressor']=='thinning') & (df['compressor_param'] == 0.95))
    | ((df['encoder']=='cutr') & (df['compressor']=='quantization transfer') & (df['compressor_param'] == 2))
    | ((df['encoder']=='cutr') & (df['compressor']=='quantization transfer') & (df['compressor_param'] == 4))
    | ((df['encoder']=='cutr') & (df['compressor']=='quantization transfer') & (df['compressor_param'] == 32))
]


encoder_names = {
    'none': '',
    'cutr': 'Cut ResNet + '
}

compressor_names = {
    'none': '',
    'convae': 'Autoencoder',
    'thinning': 'Thinning',
    'quantization transfer': 'Quantization'
}

compressor_ordering = {
    'none': 3,
    'convae': 2,
    'thinning': 1,
    'quantization transfer': 0
}

view['legend'] = view.apply(
    lambda row: encoder_names[row['encoder']] + compressor_names[row['compressor']] + (f" (k={row['compressor_param']:.2f})" if row['compressor'] == 'thinning' else f" (k={row['compressor_param']:.0f})"),
    axis=1
)

view['ordering'] = view.apply(
    lambda row: compressor_ordering[row['compressor']],
    axis=1
)

view.loc[(view['encoder']=='none') & (view['compressor']=='none'), 'legend'] = 'GDumb'
view.loc[(view['encoder']=='cutr') & (view['compressor']=='none'), 'legend'] = 'Cut ResNet + GDumb'

In [None]:
view

In [None]:
import plotly.express as px
from plot_utils import science_template

config = {
    'displaylogo': False,
    'toImageButtonOptions': {
        'format': 'png', # one of png, svg, jpeg, webp
        'filename': 'rescon',
        'scale': 3 # Multiply title/legend/axis/canvas sizes by this factor
    }
}

fig = px.line(
    view.sort_values(['mem_size_mb', 'ordering']),
    y='mem_size_mb',
    x='final_acc',
    color='legend',
    markers=True,
    facet_col='dataset',
    log_y=True,
    template=science_template,
    range_x=[0, None],
    hover_data={
        'mem_size': True,
    },
    category_orders={
        'dataset': ['CIFAR10', 'CIFAR100'],
    },
    labels={
        'mem_size': 'Number of Memory Slots',
        'final_acc': 'Accuracy',
        'mem_size_mb': 's<sub>Σ</sub>',
    },
    line_dash='compressor',
    
)

fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))

fig.update_xaxes(matches=None)

fig.update_layout(
    legend_title='',
    legend_font_size=12.5,
    xaxis1={
        'dtick': 0.1,
        'title_standoff': 3
    },
    xaxis2={
        'range': [0, 0.5],
        'dtick': 0.1,
        'title_standoff': 3
    },
    yaxis_titlefont_size=15,
    yaxis_tickfont_size=10,
)

legend_names = [('solid', '#636EFA', 'FETCH with Quantization (k=2)'),
                ('solid', '#636EFA', 'FETCH with Quantization (k=2)'),
                ('dash', '#636EFA', 'FETCH with Quantization (k=4)'),
                ('dash', '#636EFA', 'FETCH with Quantization (k=4)'),
                ('dot', '#636EFA', 'FETCH with Quantization (k=32)'),
                ('dot', '#636EFA', 'FETCH with Quantization (k=32)'),
                ('solid', '#EF553B', 'FETCH with Thinning (k=0.95)'),
                ('solid', '#EF553B', 'FETCH with Thinning (k=0.95)'),
                ('dot', '#EF553B', 'FETCH with Thinning (k=0.50)'),
                ('dot', '#EF553B', 'FETCH with Thinning (k=0.50)'),
                ('solid', '#00CC96', 'GDumb'),
                ('solid', '#00CC96', 'GDumb'),
                ('solid', '#AB63FA', 'FETCH, no Compressor'),
                ('solid', '#AB63FA', 'FETCH, no Compressor'),
                ('solid', '#FFa15A', 'Autoencoder (k=16)'),
                ('solid', '#FFa15A', 'Autoencoder (k=16)')]

for i,trace in enumerate(fig.data):
    trace.line.dash = legend_names[i][0]
    trace.line.color = legend_names[i][1]
    trace.name = legend_names[i][2]

fig.update_traces(
    line=dict(width=2),
    marker=dict(size=5),
)

fig.show()

In [None]:
fig.write_image(file='../plots/rescon.pdf', width=800, height=320, scale=5)

In [None]:
! pdfcrop ../plots/rescon.pdf /Users/marwei/Documents/publications/2023/IDEAL/images/rescon.pdf
! (cd /Users/marwei/Documents/publications/2023/IDEAL && make fast)