# Collect and convert results to Latex tables

In [2]:
import glob
import numpy as np

In [3]:
path = 'mobilenet_v3-cifar10.txt'.lower()
num_exams = "Number of examples in computing weights heuristically: ".lower()
error_msg = "A error occured".lower()
not_found = "not found from".lower()

model_version = "Model version:".lower()

def empty_res_obj():
  return {
    'subset_size': [],
  'h_train_loss': [],
  'h_train_acc': [],
  'h_test_loss': [],
  'h_test_acc': [],
  'gh_train_loss': [],
  'gh_train_acc': [],
  'gh_test_loss': [],
  'gh_test_acc': [],
  'g_train_loss': [],
  'g_train_acc': [],
  'g_test_loss': [],
  'g_test_acc': []}

def extract_values(line: str):
  arrs = line.split(' ')
  # [:-1] dropping ','
  return float(arrs[2][:-1]), float(arrs[4])

def collect_results(path):
  all_res = {}
  with open(path, 'r') as f:
    for line in f:
      line = line.lower()
      if model_version not in line: continue
      
      # get model version
      m_v = line[line.rfind(' ')+1:-1]
      res = empty_res_obj()
      all_res[m_v] = res
      for size in [64, 128, 256, 512, 1024, 2048]:
        # skip empty line
        f.readline()
        # get the subset size
        line = f.readline().lower()
        set_size = int(line[line.rfind(' ')+1:])
        res['subset_size'].append(set_size)
        # skips: shape
        f.readline()
        # heuristic info
        f.readline()

        line = f.readline().lower()
        # check is whether error or info
        if error_msg in line: continue
        line = f.readline().lower()
        if not_found in line: continue

        # get results: heuristics on train
        loss, acc = extract_values(line)
        res['h_train_acc'].append(acc)
        res['h_train_loss'].append(loss)

        # get results: heuristics on test
        loss, acc = extract_values(f.readline())
        res['h_test_acc'].append(acc)
        res['h_test_loss'].append(loss)

        # skip the next info line:
        # Training model on all heurstic weights with epochs 5
        f.readline()
        # get results: train on the heuristic weights
        line = f.readline()
        loss, acc = extract_values(line)
        res['gh_train_acc'].append(acc)
        res['gh_train_loss'].append(loss)

        # get results: train on the heuristic weights
        loss, acc = extract_values(f.readline())
        res['gh_test_acc'].append(acc)
        res['gh_test_loss'].append(loss)

        # skip the next info line:
        # Training model on all initial weights with epochs 5
        f.readline()
        # get results: train on the initial weights
        loss, acc = extract_values(f.readline())
        res['g_train_acc'].append(acc)
        res['g_train_loss'].append(loss)

        # get results: train on the initial weights
        loss, acc = extract_values(f.readline())
        res['g_test_acc'].append(acc)
        res['g_test_loss'].append(loss)
  
  return all_res

In [5]:
ds_name = 'cifar10'
all_files = glob.glob(f'./res-logs/*{ds_name}.txt')

for file in all_files:
  path = f'./{file}'
  all_res = collect_results(path)
  m_name = file.replace(f'{ds_name}.txt', '')[:-1]
  m_name = m_name.replace('_', '')
  for m_v, res in all_res.items():
    m_v = m_v.replace('classification', 'cl.')\
      .replace('feature-vector', 'fe.')\
      .replace('-fe', '-fe.')\
      .replace('..', '.')
    print(f'{m_name}({m_v})', end='')
    for tr_acc, te_acc in zip(res['h_train_acc'], res['h_test_acc']):
      print(f" & {tr_acc:.2f}/{te_acc:.2f}", end='')
    print(' \\\\')

./res-logs/visiontransformer-Adam-entropy-False-0.00(vit-b16-cl.) & 0.40/0.40 & 0.64/0.64 & 0.64/0.64 & 0.69/0.69 & 0.67/0.68 & 0.69/0.69 \\
./res-logs/visiontransformer-Adam-entropy-False-0.00(vit-b16-fe.) & 0.58/0.59 & 0.66/0.66 & 0.74/0.74 & 0.72/0.72 & 0.75/0.75 & 0.76/0.76 \\
./res-logs/convnext-Adam-entropy-False-0.00(base-1k-224) & 0.23/0.23 & 0.62/0.61 & 0.70/0.71 & 0.73/0.73 & 0.74/0.73 & 0.74/0.74 \\
./res-logs/efficientnet-Adam-entropy-False-0.00(b0-cl.) & 0.30/0.31 & 0.35/0.35 & 0.47/0.47 & 0.50/0.50 & 0.51/0.51 & 0.50/0.50 \\
./res-logs/efficientnet-Adam-entropy-False-0.00(b0-fe.) & 0.39/0.39 & 0.51/0.50 & 0.58/0.58 & 0.59/0.58 & 0.62/0.62 & 0.62/0.62 \\
./res-logs/efficientnet-Adam-entropy-False-0.00(b1-cl.) & 0.24/0.25 & 0.42/0.42 & 0.49/0.48 & 0.60/0.59 & 0.60/0.59 & 0.59/0.59 \\
./res-logs/efficientnet-Adam-entropy-False-0.00(b1-fe.) & 0.47/0.47 & 0.59/0.59 & 0.67/0.66 & 0.68/0.68 & 0.64/0.64 & 0.66/0.65 \\
./res-logs/efficientnet-Adam-entropy-False-0.00(b4-cl.) & 0.45

# Generating the synthetic dataset

In [4]:
import numpy as np

In [5]:
border_0 = 0.7
border_1 = 0.3

In [6]:
n_exp = 10000
# the target values
y = np.zeros(15)
y[9:] = 1

accs_1 = []
accs_2 = []

for i in range(n_exp):
  class_0 = np.random.uniform(0, border_0, size=(9, 3))
  class_1 = np.random.uniform(border_1, 1, size=(6, 3))
  X = np.concatenate([class_0, class_1], axis=0)
  X[:, 1] = np.concatenate([
    np.random.uniform(border_1, 1, size=(9, )), 
    np.random.uniform(0, border_0, size=(6, ))])
  
  # first case
  y_hat = np.sum(X, axis=1)
  # making the first 9 as the first class objects after sorting
  # other will be as the second class
  y_pred = np.ones(15)
  y_pred[np.argsort(y_hat)[:9]] = 0

  # accuracy
  acc = (y_pred == y).mean()
  accs_1.append(acc)

  # second case
  X_ = X.copy()
  X_[:, 1] *= -1
  y_hat = np.sum(X_, axis=1)
  # making the first 9 as the first class objects after sorting
  # other will be as the second class
  y_pred = np.ones(15)
  y_pred[np.argsort(y_hat)[:9]] = 0

  # accuracy
  acc = (y_pred == y).mean()
  accs_2.append(acc)

print(f"1st case: {np.mean(accs_1):.2f}, {np.std(accs_1):.2f}")
print(f"2nd case: {np.mean(accs_2):.2f}, {np.std(accs_2):.2f}")

1st case: 0.67, 0.12
2nd case: 0.91, 0.09


## Converting to latex

In [7]:
# this is one of the case since the process is randomness
for i in range(X.shape[1]):
  print(' & '.join([f'{val:.2f}' for val in X[:, i]]))

0.24 & 0.59 & 0.26 & 0.42 & 0.36 & 0.25 & 0.53 & 0.45 & 0.22 & 0.51 & 0.56 & 0.90 & 0.51 & 0.62 & 0.99
0.32 & 0.67 & 0.46 & 0.46 & 0.45 & 0.54 & 0.46 & 0.99 & 0.34 & 0.10 & 0.62 & 0.38 & 0.32 & 0.07 & 0.46
0.67 & 0.01 & 0.46 & 0.24 & 0.70 & 0.34 & 0.35 & 0.11 & 0.55 & 0.57 & 0.86 & 0.46 & 0.42 & 0.78 & 0.41


# The second figure

In [8]:
import matplotlib as mpl
mpl.use('PS')
mpl.rcParams['text.usetex'] = True
import matplotlib.pyplot as plt

In [9]:
w_types = ['nikolay',  'entropy', 'gini']
markers = {'nikolay': '*', 'entropy': 'x', 'gini': '1'}
colors = {'nikolay': 'red', 'entropy': 'black', 'gini': 'yellow'}
for w_type in w_types:
  all_files = glob.glob(f'./res-logs/*{w_type}*.txt')
  all_files = sorted(all_files)
  j = 0
  for file in all_files:
    j += 1
    if j < 6:
      continue
    path = f'./{file}'
    all_res = collect_results(path)
    # over inner dict
    i = 0
    for model_type in all_res:
      i += 1
      subset_size = all_res[model_type]['subset_size']
      subset_size = [str(size) for size in subset_size]
      h_test_acc = all_res[model_type]['h_test_acc']
      plt.plot(subset_size, h_test_acc, marker=markers[w_type])
      # if i > 1:
      break
    if j > 20:
      break
  break

plt.ylim((0, 1))
plt.xticks(('64', '128', '256', '512', '1024', '2048'))
plt.show()
plt.savefig('2th.eps', format='eps')

## The third figure

In [14]:
w_types = ['nikolay',  'entropy', 'gini']

res_files = {w_type: {file_name.replace(f'{w_type}-False-0.00-', '').replace('-Adam', ''): file_name for file_name in glob.glob(f'./res-logs/*{w_type}*.txt')} for w_type in w_types}

stats = {
  'nikolay': np.zeros(6),
  'entropy': np.zeros(6),
  'gini': np.zeros(6),
}
subset_sizes = [64, 128, 256, 512, 1024, 2048]
l = 0
for nik_key, ent_key, gini_key in zip(sorted(res_files['nikolay'].keys()),
                                      sorted(res_files['entropy'].keys()),
                                      sorted(res_files['gini'].keys())):
  path_nik = './' + res_files['nikolay'][nik_key]
  path_ent = './' + res_files['entropy'][ent_key]
  path_gini = './' + res_files['gini'][gini_key]

  res_nik = collect_results(path_nik)
  res_ent = collect_results(path_ent)
  res_gini = collect_results(path_gini)

  for item_nik, item_ent, item_gini in zip(res_nik.values(),
                                           res_ent.values(),
                                           res_gini.values()):
    for i in range(6):
      l += 1
      nik_acc = item_nik['h_test_acc'][i]
      ent_acc = item_ent['h_test_acc'][i]
      gini_acc = item_gini['h_test_acc'][i]
      array = np.array([nik_acc, ent_acc, gini_acc])
      stats[w_types[array.argmax()]][i] += 1

In [15]:
stats

{'nikolay': array([76., 66., 47., 57., 55., 56.]),
 'entropy': array([31., 35., 54., 50., 48., 41.]),
 'gini': array([38., 44., 44., 38., 42., 48.])}

In [36]:
x = np.arange(len(subset_sizes))  # the label locations
width = 0.25  # the width of the bars
multiplier = 0

fig, ax = plt.subplots(layout='constrained')

for attribute, measurement in stats.items():
    offset = width * multiplier
    rects = ax.bar(x + offset, measurement, width, label=attribute)
    ax.bar_label(rects, padding=3)
    multiplier += 1

# Add some text for labels, title and custom x-axis tick labels, etc.
ax.set_ylabel('Number of outperformes')
ax.set_xlabel('Subset sizes')
ax.set_xticks(x + width, subset_sizes)
ax.legend(loc='upper center', ncols=3)
ax.set_ylim(0, 80)

plt.savefig('3th.png')

# Variances of accuracies

In [26]:
w_types = ['nikolay',  'entropy', 'gini']

res_files = {w_type: {file_name.replace(f'{w_type}-False-0.00-', '').replace('-Adam', ''): file_name for file_name in glob.glob(f'./res-logs/*{w_type}*.txt')} for w_type in w_types}

vars = np.zeros((6, 3, 3))
subset_sizes = [64, 128, 256, 512, 1024, 2048]
for nik_key, ent_key, gini_key in zip(sorted(res_files['nikolay'].keys()),
                                      sorted(res_files['entropy'].keys()),
                                      sorted(res_files['gini'].keys())):
  path_nik = './' + res_files['nikolay'][nik_key]
  path_ent = './' + res_files['entropy'][ent_key]
  path_gini = './' + res_files['gini'][gini_key]

  res_nik = collect_results(path_nik)
  res_ent = collect_results(path_ent)
  res_gini = collect_results(path_gini)

  for item_nik, item_ent, item_gini in zip(res_nik.values(),
                                           res_ent.values(),
                                           res_gini.values()):
    for i in range(6):
      nik_acc = item_nik['h_test_acc'][i]
      ent_acc = item_ent['h_test_acc'][i]
      gini_acc = item_gini['h_test_acc'][i]
      vars[i, 0, 1] += abs(nik_acc - ent_acc)
      vars[i, 0, 2] += abs(nik_acc - gini_acc)
      vars[i, 1, 2] += abs(ent_acc - gini_acc)

In [37]:
np.mean(vars / 145, axis=0) * 100

array([[0.        , 4.86789655, 4.6172069 ],
       [0.        , 0.        , 2.68055172],
       [0.        , 0.        , 0.        ]])

# Pre-trained models details

In [38]:
from utils import img_model_links, text_model_links

2024-04-17 17:01:33.435467: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-17 17:01:33.461389: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [47]:
for key in text_model_links:
  for version, link in text_model_links[key].items():
    print(key + " & \href{" + link + "}{" + version + "} \\\\")

small_bert & \href{https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-128_A-2/1}{small_bert/bert_en_uncased_L-2_H-128_A-2} \\
small_bert & \href{https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-256_A-4/1}{small_bert/bert_en_uncased_L-2_H-256_A-4} \\
small_bert & \href{https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-2_H-512_A-8/1}{small_bert/bert_en_uncased_L-2_H-512_A-8} \\
small_bert & \href{https://tfhub.dev/tensorflow/small_bert/bert_en_uncased_L-4_H-512_A-8/1}{small_bert/bert_en_uncased_L-4_H-512_A-8} \\
bert & \href{https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3}{bert_en_uncased_L-12_H-768_A-12} \\
bert & \href{https://tfhub.dev/tensorflow/bert_en_cased_L-12_H-768_A-12/3}{bert_en_cased_L-12_H-768_A-12} \\
bert & \href{https://tfhub.dev/tensorflow/bert_multi_cased_L-12_H-768_A-12/3}{bert_multi_cased_L-12_H-768_A-12} \\
albert & \href{https://tfhub.dev/tensorflow/albert_en_base/2}{albert_en_base} \\
electra & \href{https://tfhub.dev/