
<H3 align='center'> An Attention-Based Architecture for
Hierarchical Classification with CNNs </H3>

<H5 align='center'> CIFAR-10 </H3>

<hr style="height:2px;border:none"/>


# Dependencies

In [39]:
import keras
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
#from keras.datasets import cifar10
#from keras.models import Model
#from keras.layers import Dense, Dropout, Activation, Flatten, Concatenate, Add, Softmax
#from keras.layers import Conv2D, MaxPooling2D, Input, BatchNormalization
#from keras.initializers import he_normal
#from keras import optimizers
#from keras.callbacks import LearningRateScheduler, TensorBoard, CSVLogger
from tensorflow.keras.utils import get_file
#from keras import backend as K
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import optimizers
from tensorflow.keras.utils import plot_model
import pickle
#from keras.models import load_model
#from keras.callbacks import CSVLogger
from tensorflow.keras.utils import set_random_seed
from scipy import stats

from tensorflow.keras.datasets import cifar10
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (
    Dense, Dropout, Activation, Flatten, Concatenate, Add, Softmax,
    Conv2D, MaxPooling2D, Input, BatchNormalization
)
from tensorflow.keras.initializers import he_normal
from tensorflow.keras import optimizers
from tensorflow.keras.callbacks import (
    LearningRateScheduler, TensorBoard, CSVLogger
)
from tensorflow.keras import backend as K

# Correct relations for CIFAR-10
relations = [[0,2,9],[0,3,17],[0,4,10],[0,4,18],[1,5,11],[1,6,15],[1,7,12],[1,7,14],[1,8,13],[1,8,16]]

# Computes hierarchical metrics
def hierarchical_metrics(true,pred):
  true_labels = []
  true_fine = true[2].argmax(axis=1)+9
  true_c2 = true[1].argmax(axis=1)+2
  true_c1 = true[0].argmax(axis=1)
  for i in range(len(true_fine)):
    true_labels.append([true_c1[i],true_c2[i],true_fine[i]])
  pred_labels = []
  pred_c1 = pred[0].argmax(axis = 1)
  pred_c2 = pred[1].argmax(axis = 1)+2
  pred_fine = pred[2].argmax(axis = 1)+9
  for i in range(len(pred_c1)):
    pred_labels.append([pred_c1[i],pred_c2[i],pred_fine[i]])
  preci = precision(true_labels,pred_labels)
  reca = recall(true_labels,pred_labels)
  f_1 = f1(true_labels,pred_labels)

  consistent_examples = 1
  correct_pred = 0
  test_set_size = len(true_labels)
  for i in range(test_set_size):
    if [pred_c1[i],pred_c2[i],pred_fine[i]] in relations:
        consistent_examples = consistent_examples + 1
    if [pred_c1[i],pred_c2[i],pred_fine[i]] == true_labels[i]:
        correct_pred = correct_pred +1
  h_accuracy = correct_pred/test_set_size
  h_consistency = (consistent_examples-1)/test_set_size

  return h_accuracy,h_consistency,f_1

# Hierarchical metrics, proposed by Kiritchenko et al (2005)
# Implementation
# https://gitlab.com/dacs-hpi/hiclass/-/blob/main/hiclass/metrics.py


def precision(y_true: np.ndarray, y_pred: np.ndarray):
    """
    Compute precision score for hierarchical classification.

    hP = sum(|S intersection T|) / sum(|S|),
    where S is the set consisting of the most specific class(es) predicted for a test example and all respective ancestors
    and T is the set consisting of the true most specific class(es) for a test example and all respective ancestors.

    Parameters
    ----------
    y_true : np.array of shape (n_samples, n_levels)
        Ground truth (correct) labels.
    y_pred : np.array of shape (n_samples, n_levels)
        Predicted labels, as returned by a classifier.
    Returns
    -------
    precision : float
        What proportion of positive identifications was actually correct?
    """
    assert len(y_true) == len(y_pred)
    sum_intersection = 0
    sum_prediction_and_ancestors = 0
    for ground_truth, prediction in zip(y_true, y_pred):
        sum_intersection = sum_intersection + len(
            set(ground_truth).intersection(set(prediction))
        )
        sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(
            set(prediction)
        )
    precision = sum_intersection / sum_prediction_and_ancestors
    return precision


def recall(y_true: np.ndarray, y_pred: np.ndarray):
    """
    Compute recall score for hierarchical classification.

    hR = sum(|S intersection T|) / sum(|T|),
    where S is the set consisting of the most specific class(es) predicted for a test example and all respective ancestors
    and T is the set consisting of the true most specific class(es) for a test example and all respective ancestors.

    Parameters
    ----------
    y_true : np.array of shape (n_samples, n_levels)
        Ground truth (correct) labels.
    y_pred : np.array of shape (n_samples, n_levels)
        Predicted labels, as returned by a classifier.
    Returns
    -------
    recall : float
        What proportion of actual positives was identified correctly?
    """
    assert len(y_true) == len(y_pred)
    sum_intersection = 0
    sum_prediction_and_ancestors = 0
    for ground_truth, prediction in zip(y_true, y_pred):
        sum_intersection = sum_intersection + len(
            set(ground_truth).intersection(set(prediction))
        )
        sum_prediction_and_ancestors = sum_prediction_and_ancestors + len(
            set(ground_truth)
        )
    recall = sum_intersection / sum_prediction_and_ancestors
    return recall


def f1(y_true: np.ndarray, y_pred: np.ndarray):
    """
    Compute f1 score for hierarchical classification.

    hF = 2 * hP * hR / (hP + hR),
    where hP is the hierarchical precision and hR is the hierarchical recall.

    Parameters
    ----------
    y_true : np.array of shape (n_samples, n_levels)
        Ground truth (correct) labels.
    y_pred : np.array of shape (n_samples, n_levels)
        Predicted labels, as returned by a classifier.
    Returns
    -------
    f1 : float
        Weighted average of the precision and recall
    """
    assert len(y_true) == len(y_pred)
    prec = precision(y_true, y_pred)
    rec = recall(y_true, y_pred)
    f1 = 2 * prec * rec / (prec + rec)
    return f1

# General Settings

In [40]:
#-------- dimensions ---------
img_rows, img_cols = 32, 32
if K.image_data_format() == 'channels_first':
    input_shape = (3, img_rows, img_cols)
else:
    input_shape = (img_rows, img_cols, 3)
#-----------------------------

train_size = 50000

#--- coarse 1 classes ---
num_c_1 = 2
#--- coarse 2 classes ---
num_c_2 = 7
#--- fine classes ---
num_classes  = 10

batch_size   = 128
epochs       = 60

In [41]:
#-------------------- data loading ----------------------
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = to_categorical(y_train, num_classes)
y_cm = y_test
y_test = to_categorical(y_test, num_classes)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

#---------------- data preprocessing -------------------
x_train = (x_train-np.mean(x_train)) / np.std(x_train)
x_test = (x_test-np.mean(x_test)) / np.std(x_test)

In [42]:
#---------------------- make coarse 2 labels --------------------------
parent_f = {
  2:3, 3:5, 5:5,
  1:2, 7:6, 4:6,
  0:0, 6:4, 8:1, 9:2
}

y_c2_train = np.zeros((y_train.shape[0], num_c_2)).astype("float32")
y_c2_test = np.zeros((y_test.shape[0], num_c_2)).astype("float32")
for i in range(y_c2_train.shape[0]):
  y_c2_train[i][parent_f[np.argmax(y_train[i])]] = 1.0
for i in range(y_c2_test.shape[0]):
  y_c2_test[i][parent_f[np.argmax(y_test[i])]] = 1.0

#---------------------- make coarse 1 labels --------------------------
parent_c2 = {
  0:0, 1:0, 2:0,
  3:1, 4:1, 5:1, 6:1
}
y_c1_train = np.zeros((y_c2_train.shape[0], num_c_1)).astype("float32")
y_c1_test = np.zeros((y_c2_test.shape[0], num_c_1)).astype("float32")
for i in range(y_c1_train.shape[0]):
  y_c1_train[i][parent_c2[np.argmax(y_c2_train[i])]] = 1.0
for i in range(y_c1_test.shape[0]):
  y_c1_test[i][parent_c2[np.argmax(y_c2_test[i])]] = 1.0

In [43]:
# Learning rate scheduler
def scheduler(epoch):
  learning_rate_init = 0.003
  if epoch > 42:
    learning_rate_init = 0.0005
  if epoch > 52:
    learning_rate_init = 0.0001
  return learning_rate_init

In [44]:
# Loss Weights modifier, when BT-strategy is used
class LossWeightsModifier(keras.callbacks.Callback):
  def __init__(self, alpha, beta, gamma):
    self.alpha = alpha
    self.beta = beta
    self.gamma = gamma
  def on_epoch_end(self, epoch, logs={}):
    if epoch == 10:
      K.set_value(self.alpha, 0.1)
      K.set_value(self.beta, 0.8)
      K.set_value(self.gamma, 0.1)
    if epoch == 20:
      K.set_value(self.alpha, 0.1)
      K.set_value(self.beta, 0.2)
      K.set_value(self.gamma, 0.7)
    if epoch == 30:
      K.set_value(self.alpha, 0)
      K.set_value(self.beta, 0)
      K.set_value(self.gamma, 1)

# Flat CNN Base B

In [7]:
#----------------------- model definition ---------------------------
img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(1024, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu', name='fc2')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, fine_pred, name='flat_cnn_base_b')

#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(learning_rate=0.003, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'],
              )


In [None]:
change_lr = LearningRateScheduler(scheduler)# Training
history_base_b = model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=change_lr,
          validation_data=(x_test, y_test))

# Evaluation on test set
score_base_b = model.evaluate(x_test, y_test, verbose=0)
parameters_base_b = np.sum([K.count_params(w) for w in model.trainable_weights])

# Results
print("--- Flat CNN Base B ---")
print("Accuracy:",score_base_b[1])
print("Parameters:","{:,}".format(parameters_base_b))

In [9]:
# Save non-model variables
with open("experiment_data.pkl", "wb") as f:
    pickle.dump({
        "score_base_b": score_base_b,
        "parameters_base_b": parameters_base_b,
        "history_base_b": history_base_b.history,  # Only the history dict
    }, f)

# Save model
model.save("flat_cnn_base_b.keras")  # or "flat_cnn_base_b.h5"

In [27]:
import pickle
from tensorflow import keras

# Load non-model variables
with open("./CIFAR10_models/saved_flat_cnn_b/flat_cnn_experiment_data.pkl", "rb") as f:
    flat_cnn_b_data = pickle.load(f)

# Unpack the loaded data
flat_cnn_score_base_b = flat_cnn_b_data["score_base_b"]
flat_cnn_parameters_base_b = flat_cnn_b_data["parameters_base_b"]
flat_cnn_history_base_b = flat_cnn_b_data["history_base_b"]

# Load the Keras model
flat_cnn_b_model = keras.models.load_model("./CIFAR10_models/saved_flat_cnn_b/flat_cnn_base_b.keras")

# (Optional) If you want, you can reconstruct the history as a History-like object
# But usually, people just use the dictionary

# B-CNN Base B

In [15]:
# if True, the model uses BT-strategy for training
bt_strategy = True

In [None]:
#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper

img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(256, activation='relu', name='c1_fc_cifar10_1')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch = Dense(256, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch = Flatten(name='c2_flatten')(x)
c_2_bch = Dense(512, activation='relu', name='c2_fc_cifar10_1')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(512, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(1024, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu', name='fc2')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='bcnn_base_b')

#----------------------- compile  ---------------------------
sgd = optimizers.SGD(learning_rate=0.003, momentum=0.9, nesterov=True)
#ADD - lista de loss=[...] e metrics[...]
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy','accuracy','accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_bcnn_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_b_cnn_b = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_b_cnn_b = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_b_cnn_b,cons_b_cnn_b,f1_b_cnn_b= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- B-CNN Base B ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_b_cnn_b[4])
print("Accuracy level 2:",score_b_cnn_b[5])
print("Accuracy level 3:",score_b_cnn_b[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_b_cnn_b)
print("Consistency:",cons_b_cnn_b)
print("f1:",f1_b_cnn_b)
print("Parameters:","{:,}".format(parameters_b_cnn_b))

In [18]:
# 1. Save the trained model (architecture + weights + optimizer state)
model.save("bcnn_b_final_model.keras")

# 2. Save the training history
with open("history_bcnn_b.pkl", "wb") as f:
    pickle.dump(history_bcnn_b.history, f)

# 3. Save hyperparameters and other relevant variables
meta = {
    'batch_size': batch_size,
    'epochs': epochs,
    'alpha_value': K.get_value(alpha),
    'beta_value': K.get_value(beta),
    'gamma_value': K.get_value(gamma),
    'bt_strategy': bt_strategy,
    'input_shape': input_shape,
    'num_classes': num_classes,
    'num_c_1': num_c_1,
    'num_c_2': num_c_2,
    'learning_rate': 0.003  # or however you're setting it
}

with open("bcnn_b_meta.pkl", "wb") as f:
    pickle.dump(meta, f)

In [26]:
import pickle
from tensorflow import keras

# 1. Load the trained model (architecture + weights + optimizer state)
b_cnn_b_model = keras.models.load_model("./CIFAR10_models/saved_bcnn_b/bcnn_b_final_model.keras")

# 2. Load the training history
with open("./CIFAR10_models/saved_bcnn_b/history_bcnn_b.pkl", "rb") as f:
    history_bcnn_b = pickle.load(f)

# 3. Load hyperparameters and other relevant variables
with open("./CIFAR10_models/saved_bcnn_b/bcnn_b_meta.pkl", "rb") as f:
    b_cnn_b_meta = pickle.load(f)

# Unpack meta if you want individual variables
b_cnn_b_batch_size = b_cnn_b_meta['batch_size']
b_cnn_b_epochs = b_cnn_b_meta['epochs']
b_cnn_b_alpha_value = b_cnn_b_meta['alpha_value']
b_cnn_b_beta_value = b_cnn_b_meta['beta_value']
b_cnn_b_gamma_value = b_cnn_b_meta['gamma_value']
b_cnn_b_bt_strategy = b_cnn_b_meta['bt_strategy']
b_cnn_b_input_shape = b_cnn_b_meta['input_shape']
b_cnn_b_num_classes = b_cnn_b_meta['num_classes']
b_cnn_b_num_c_1 = b_cnn_b_meta['num_c_1']
b_cnn_b_num_c_2 = b_cnn_b_meta['num_c_2']
b_cnn_b_learning_rate = b_cnn_b_meta['learning_rate']

In [48]:
# 4. Assuming you have your test data loaded, evaluate the model
# Replace 'x_test', 'y_c1_test', 'y_c2_test', and 'y_test' with your actual test data variables
score_b_cnn_b = b_cnn_b_model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)

# 5. Print the accuracy at each level (assuming this information is in the score)
print("--- B-CNN Base B ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:", score_b_cnn_b[4])  # Accuracy level 1
print("Accuracy level 2:", score_b_cnn_b[5])  # Accuracy level 2
print("Accuracy level 3:", score_b_cnn_b[6])  # Accuracy level 3

# Calculate the number of trainable parameters
parameters_b_cnn_b = np.sum([K.count_params(w) for w in b_cnn_b_model.trainable_weights])

# You can also use hierarchical metrics if needed
acc_b_cnn_b, cons_b_cnn_b, f1_b_cnn_b = hierarchical_metrics([y_c1_test, y_c2_test, y_test], b_cnn_b_model.predict(x_test))

# Results
print("--- Hierarchical Metrics ---")
print("Accuracy:", acc_b_cnn_b)
print("Consistency:", cons_b_cnn_b)
print("f1:", f1_b_cnn_b)

--- B-CNN Base B ---
--- Accuracy per level ---
Accuracy level 1: 0.9589999914169312
Accuracy level 2: 0.8697999715805054
Accuracy level 3: 0.8424999713897705
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 104ms/step
--- Hierarchical Metrics ---
Accuracy: 0.7889
Consistency: 0.9028
f1: 0.8904333333333333


# BA-CNN Base B

In [19]:
# if True, the model uses BT-strategy for training
bt_strategy = True

# neurons of all dense layers on each branch
branch_neurons = 256

In [None]:
#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper

img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(branch_neurons, activation='relu', name='c1_fc_cifar10_1')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch = Dense(branch_neurons, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch_out = Dropout(0.5)(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch = Flatten(name='c2_flatten')(x)
c_2_bch = Dense(branch_neurons, activation='relu', name='c2_fc_cifar10_1')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(branch_neurons, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch_out = Dropout(0.5)(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(branch_neurons, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(branch_neurons, activation='relu', name='fc2')(x)
x = BatchNormalization()(x)
x_out = Dropout(0.5)(x)


#-- Att for coarse 1---
# Coarse 1
sfcn_1_1 = Dense(64, name='fc1_1')(c_1_bch_out)
sfcn_1_1 = Dense(1, name='fc1_2')(sfcn_1_1)
# Coarse 2
sfcn_1_2 = Dense(64, name='fc1_3')(c_2_bch_out)
sfcn_1_2 = Dense(1, name='fc1_4')(sfcn_1_2)
# Fine
sfcn_1_3 = Dense(64, name='fc1_5')(x_out)
sfcn_1_3 = Dense(1, name='fc1_6')(sfcn_1_3)

score_vector_1 = Concatenate()([sfcn_1_1,sfcn_1_2,sfcn_1_3]) # Score vector 1
att_weights_1 = Activation('softmax', name='attention_weights_1')(score_vector_1) # Attention weights 1
weightned_sum_1 = Add()([c_1_bch_out*att_weights_1[0][0],c_2_bch_out*att_weights_1[0][1],x_out*att_weights_1[0][2]]) # Weightned sum 1

# Concat and prediction
coarse_1_concat = Concatenate()([c_1_bch_out,weightned_sum_1])
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(coarse_1_concat)


#-- Att for coarse 2---

# Coarse 1
sfcn_2_1 = Dense(64, name='fc2_1')(c_1_bch_out)
sfcn_2_1 = Dense(1, name='fc2_2')(sfcn_2_1)
# Coarse 2
sfcn_2_2 = Dense(64, name='fc2_3')(c_2_bch_out)
sfcn_2_2 = Dense(1, name='fc2_4')(sfcn_2_2)
# Fine
sfcn_2_3 = Dense(64, name='fc2_5')(x_out)
sfcn_2_3 = Dense(1, name='fc2_6')(sfcn_2_3)

score_vector_2 = Concatenate()([sfcn_2_1,sfcn_2_2,sfcn_2_3]) # Score vector 1
att_weights_2 = Activation('softmax', name='attention_weights_2')(score_vector_2) # Attention weights 1
weightned_sum_2 = Add()([c_1_bch_out*att_weights_2[0][0],c_2_bch_out*att_weights_2[0][1],x_out*att_weights_2[0][2]]) # Weightned sum 1

# Concat and prediction
coarse_2_concat = Concatenate()([c_2_bch_out,weightned_sum_2])
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(coarse_2_concat)


#-- Att for fine---

# Coarse 1
sfcn_3_1 = Dense(64, name='fc3_1')(c_1_bch_out)
sfcn_3_1 = Dense(1, name='fc3_2')(sfcn_3_1)
# Coarse 2
sfcn_3_2 = Dense(64, name='fc3_3')(c_2_bch_out)
sfcn_3_2 = Dense(1, name='fc3_4')(sfcn_3_2)
# Fine
sfcn_3_3 = Dense(64, name='fc3_5')(x_out)
sfcn_3_3 = Dense(1, name='fc3_6')(sfcn_3_3)

score_vector_3 = Concatenate()([sfcn_3_1,sfcn_3_2,sfcn_3_3]) # Score vector 1
att_weights_3 = Activation('softmax', name='attention_weights_3')(score_vector_3) # Attention weights 1
weightned_sum_3 = Add()([c_1_bch_out*att_weights_3[0][0],c_2_bch_out*att_weights_3[0][1],x_out*att_weights_3[0][2]]) # Weightned sum 3

# Concat and prediction
fine_concat = Concatenate()([x_out,weightned_sum_3])
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(fine_concat)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='bacnn_base_b')

#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(learning_rate=0.003, momentum=0.9, nesterov=True)
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy','accuracy','accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_bacnn_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_ba_cnn_b = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_ba_cnn_b = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_ba_cnn_b,cons_ba_cnn_b,f1_ba_cnn_b= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- BA-CNN Base B ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_ba_cnn_b[4])
print("Accuracy level 2:",score_ba_cnn_b[5])
print("Accuracy level 3:",score_ba_cnn_b[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_ba_cnn_b)
print("Consistency:",cons_ba_cnn_b)
print("f1:",f1_ba_cnn_b)
print("Parameters:","{:,}".format(parameters_ba_cnn_b))

In [24]:
# Create a save directory
save_dir = 'saved_bacnn_b'
os.makedirs(save_dir, exist_ok=True)

# Save Keras model
model.save(os.path.join(save_dir, 'bacnn_b_model.keras'))

# Save Keras history
with open(os.path.join(save_dir, 'bacnn_b_history.pkl'), 'wb') as f:
    pickle.dump(history_bacnn_b.history, f)

# Save weights (optional redundancy)
model.save_weights(os.path.join(save_dir, 'bacnn_b_weights.weights.h5'))

# Save relevant variables using pickle
important_vars = {
    'alpha': K.get_value(alpha),
    'beta': K.get_value(beta),
    'gamma': K.get_value(gamma),
    'bt_strategy': bt_strategy,
    'branch_neurons': branch_neurons,
    'score_ba_cnn_b': score_ba_cnn_b,
    'parameters_ba_cnn_b': parameters_ba_cnn_b,
    'acc_ba_cnn_b': acc_ba_cnn_b,
    'cons_ba_cnn_b': cons_ba_cnn_b,
    'f1_ba_cnn_b': f1_ba_cnn_b,
}

with open(os.path.join(save_dir, 'bacnn_b_variables.pkl'), 'wb') as f:
    pickle.dump(important_vars, f)

In [25]:
import os
import pickle
from tensorflow import keras

# Define the directory
load_dir = '.\CIFAR10_models\saved_bacnn_b'

# Load the Keras model
ba_cnn_b_model = keras.models.load_model(os.path.join(load_dir, 'bacnn_b_model.keras'))

# Load the training history
with open(os.path.join(load_dir, 'bacnn_b_history.pkl'), 'rb') as f:
    history_bacnn_b = pickle.load(f)

# (Optional) Load the weights separately if you want to
ba_cnn_b_model.load_weights(os.path.join(load_dir, 'bacnn_b_weights.weights.h5'))

# Load important variables
with open(os.path.join(load_dir, 'bacnn_b_variables.pkl'), 'rb') as f:
    ba_cnn_b_important_vars = pickle.load(f)

# Now unpack the important_vars dictionary
ba_cnn_b_alpha_value = ba_cnn_b_important_vars['alpha']
ba_cnn_b_beta_value = ba_cnn_b_important_vars['beta']
ba_cnn_b_gamma_value = ba_cnn_b_important_vars['gamma']
ba_cnn_b_bt_strategy = ba_cnn_b_important_vars['bt_strategy']
ba_cnn_b_branch_neurons = ba_cnn_b_important_vars['branch_neurons']
score_ba_cnn_b = ba_cnn_b_important_vars['score_ba_cnn_b']
parameters_ba_cnn_b = ba_cnn_b_important_vars['parameters_ba_cnn_b']
acc_ba_cnn_b = ba_cnn_b_important_vars['acc_ba_cnn_b']
cons_ba_cnn_b = ba_cnn_b_important_vars['cons_ba_cnn_b']
f1_ba_cnn_b = ba_cnn_b_important_vars['f1_ba_cnn_b']

# (Optional) If you want to recreate the Keras variables for alpha, beta, gamma:
from tensorflow.keras import backend as K
import tensorflow as tf

ba_cnn_b_alpha = K.variable(value=ba_cnn_b_alpha_value, dtype=tf.float32, name="alpha")
ba_cnn_b_beta = K.variable(value=ba_cnn_b_beta_value, dtype=tf.float32, name="beta")
ba_cnn_b_gamma = K.variable(value=ba_cnn_b_gamma_value, dtype=tf.float32, name="gamma")

# H-CNN Base B

In [30]:
# if True, the model uses BT-strategy for training
bt_strategy = True

In [None]:
#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper

img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch_flatt = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(256, activation='relu', name='c1_fc_cifar10_1')(c_1_bch_flatt)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch = Dense(256, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch_flatt = Flatten(name='c2_flatten')(x)
c_2_bch_concat = Concatenate()([c_1_bch_flatt,c_2_bch_flatt]) # Conectivity Pattern
c_2_bch = Dense(512, activation='relu', name='c2_fc_cifar100_1')(c_2_bch_concat)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(512, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- fine block ---
x_flatt = Flatten(name='flatten')(x)
x = Concatenate()([c_2_bch_concat,x_flatt]) # Conectivity Pattern
x = Dense(1024, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu', name='fc2')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='hcnn_base_b')

#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(learning_rate=0.003, momentum=0.9, nesterov=True)
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy','accuracy','accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_hcnn_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_h_cnn+_b_b = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_h_cnn_b = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_h_cnn_b,cons_h_cnn_b,f1_h_cnn_b= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- H-CNN Base B ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_h_cnn_b[4])
print("Accuracy level 2:",score_h_cnn_b[5])
print("Accuracy level 3:",score_h_cnn_b[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_h_cnn_b)
print("Consistency:",cons_h_cnn_b)
print("f1:",f1_h_cnn_b)
print("Parameters:","{:,}".format(parameters_h_cnn_b))

In [None]:
# Directory to save the results
save_dir = 'results/hcnn_b'
os.makedirs(save_dir, exist_ok=True)

# Save model
model.save(os.path.join(save_dir, 'hcnn_b_model.keras'))

# Save weights
model.save_weights(os.path.join(save_dir, 'hcnn_b_weights.weights.h5'))

# Save training history
with open(os.path.join(save_dir, 'hcnn_b_history.pkl'), 'wb') as f:
    pickle.dump(history_hcnn_b.history, f)

# Save predictions
with open(os.path.join(save_dir, 'hcnn_b_predictions.pkl'), 'wb') as f:
    pickle.dump(predictions, f)

# Save evaluation scores
with open(os.path.join(save_dir, 'hcnn_b_eval.pkl'), 'wb') as f:
    pickle.dump(score_h_cnn_b, f)

# Save number of parameters
with open(os.path.join(save_dir, 'hcnn_b_params.pkl'), 'wb') as f:
    pickle.dump(parameters_h_cnn_b, f)

# Save hierarchical metrics
hierarchical_results = {
    'accuracy': acc_h_cnn_b,
    'consistency': cons_h_cnn_b,
    'f1': f1_h_cnn_b
}
with open(os.path.join(save_dir, 'hcnn_b_hierarchical_metrics.pkl'), 'wb') as f:
    pickle.dump(hierarchical_results, f)

# Save loss weights (alpha, beta, gamma)
loss_weights_dict = {
    'alpha': K.get_value(alpha),
    'beta': K.get_value(beta),
    'gamma': K.get_value(gamma)
}
with open(os.path.join(save_dir, 'hcnn_b_loss_weights.pkl'), 'wb') as f:
    pickle.dump(loss_weights_dict, f)

In [None]:
import shutil
from google.colab import files

# Create a zip of the results directory
shutil.make_archive('hcnn_b_results', 'zip', 'results/hcnn_b')

# Download the zip
files.download('hcnn_b_results.zip')

In [23]:
import os
import pickle
from tensorflow import keras

# Set the directory where everything is saved
save_dir = '.\CIFAR10_models\saved_hcnn_b'

# 1. Load the model (architecture + weights + optimizer state)
h_cnn_b_model = keras.models.load_model(os.path.join(save_dir, 'hcnn_b_model.keras'))

# 2. Load the model weights (optional — not needed if loading full model)
# model.load_weights(os.path.join(save_dir, 'hcnn_b_weights.weights.h5'))

# 3. Load the training history
with open(os.path.join(save_dir, 'hcnn_b_history.pkl'), 'rb') as f:
    history_hcnn_b = pickle.load(f)

# 4. Load the predictions
with open(os.path.join(save_dir, 'hcnn_b_predictions.pkl'), 'rb') as f:
    h_cnn_b_predictions = pickle.load(f)

# 5. Load evaluation scores
with open(os.path.join(save_dir, 'hcnn_b_eval.pkl'), 'rb') as f:
    score_h_cnn_b = pickle.load(f)

# 6. Load number of parameters
with open(os.path.join(save_dir, 'hcnn_b_params.pkl'), 'rb') as f:
    parameters_h_cnn_b = pickle.load(f)

# 7. Load hierarchical metrics
with open(os.path.join(save_dir, 'hcnn_b_hierarchical_metrics.pkl'), 'rb') as f:
    h_cnn_b_hierarchical_results = pickle.load(f)

# Unpack hierarchical metrics if needed
acc_h_cnn_b = h_cnn_b_hierarchical_results['accuracy']
cons_h_cnn_b = h_cnn_b_hierarchical_results['consistency']
f1_h_cnn_b = h_cnn_b_hierarchical_results['f1']

# 8. Load loss weights (alpha, beta, gamma)
with open(os.path.join(save_dir, 'hcnn_b_loss_weights.pkl'), 'rb') as f:
    h_cnn_b_loss_weights_dict = pickle.load(f)

# Unpack loss weights
h_cnn_b_alpha_value = h_cnn_b_loss_weights_dict['alpha']
h_cnn_b_beta_value = h_cnn_b_loss_weights_dict['beta']
h_cnn_b_gamma_value = h_cnn_b_loss_weights_dict['gamma']

# Add-net Base B

In [36]:
# if True, the model uses BT-strategy for training
bt_strategy = True

In [None]:
#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper

img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(256, activation='relu', name='c1_fc_cifar10_1')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch_out = Dense(256, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch_out)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch = Flatten(name='c2_flatten')(x)
c_2_bch = Dense(256, activation='relu', name='c2_fc_cifar10_1')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(256, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch_out = Add()([c_1_bch_out,c_2_bch])
c_2_bch = BatchNormalization()(c_2_bch_out)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(256, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu', name='fc2')(x)
x = Add()([x,c_2_bch_out])
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='add_net_Base_B')

#----------------------- compile and fit ---------------------------
#ADD - lr para learning_rate
sgd = optimizers.SGD(learning_rate=0.003, momentum=0.9, nesterov=True)
#ADD - Adiciona lista de loss e accuracy
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy', 'categorical_crossentropy'],
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy','accuracy','accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_addnet_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_addnet_b = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_addnet_b = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_addnet_b,cons_addnet_b,f1_addnet_b= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- Add-net Base B ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_addnet_b[4])
print("Accuracy level 2:",score_addnet_b[5])
print("Accuracy level 3:",score_addnet_b[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_addnet_b)
print("Consistency:",cons_addnet_b)
print("f1:",f1_addnet_b)
print("Parameters:","{:,}".format(parameters_addnet_b))

In [None]:
# Updated directory path
save_dir = 'results/add_net'
os.makedirs(save_dir, exist_ok=True)

# Save model architecture and weights
model.save(os.path.join(save_dir, 'addnet_b_model.keras'))  # Corrected extension
model.save_weights(os.path.join(save_dir, 'addnet_b_weights.weights.h5'))  # Full .weights.h5

# Save training history
with open(os.path.join(save_dir, 'addnet_b_history.pkl'), 'wb') as f:
    pickle.dump(history_addnet_b.history, f)

# Save loss weights variables (alpha, beta, gamma)
with open(os.path.join(save_dir, 'addnet_b_loss_weights.pkl'), 'wb') as f:
    pickle.dump({'alpha': K.get_value(alpha), 'beta': K.get_value(beta), 'gamma': K.get_value(gamma)}, f)

# Save evaluation metrics
with open(os.path.join(save_dir, 'addnet_b_evaluation.pkl'), 'wb') as f:
    pickle.dump({
        'score': score_addnet_b,
        'parameters': parameters_addnet_b,
        'acc': acc_addnet_b,
        'cons': cons_addnet_b,
        'f1': f1_addnet_b
    }, f)

print(f"Model and training artifacts saved to {save_dir}")

In [None]:
import shutil
from google.colab import files

# Define folder and zip paths
folder_path = 'results/add_net'
zip_path = 'add_net_results.zip'

# Create a zip archive of the folder
shutil.make_archive('add_net_results', 'zip', folder_path)

# Download the zip file
files.download(zip_path)

In [20]:
import os
import pickle
from tensorflow import keras

# Define the directory where the files are saved
save_dir = '.\CIFAR10_models\saved_add_net_b'

# 1. Load the full model (architecture + weights + optimizer state)
add_net_b_model = keras.models.load_model(os.path.join(save_dir, 'addnet_b_model.keras'))

# 2. Load model weights (optional redundancy)
# model.load_weights(os.path.join(save_dir, 'addnet_b_weights.weights.h5'))

# 3. Load the training history
with open(os.path.join(save_dir, 'addnet_b_history.pkl'), 'rb') as f:
    history_addnet_b = pickle.load(f)

# 4. Load the loss weights (alpha, beta, gamma)
with open(os.path.join(save_dir, 'addnet_b_loss_weights.pkl'), 'rb') as f:
    add_net_b_loss_weights = pickle.load(f)

# Unpack alpha, beta, gamma values
add_net_b_alpha_value = add_net_b_loss_weights['alpha']
add_net_b_beta_value = add_net_b_loss_weights['beta']
add_net_b_gamma_value = add_net_b_loss_weights['gamma']

# 5. Load the evaluation metrics
with open(os.path.join(save_dir, 'addnet_b_evaluation.pkl'), 'rb') as f:
    add_net_b_evaluation_metrics = pickle.load(f)

# Unpack evaluation metrics
score_addnet_b = add_net_b_evaluation_metrics['score']
parameters_addnet_b = add_net_b_evaluation_metrics['parameters']
acc_addnet_b = add_net_b_evaluation_metrics['acc']
cons_addnet_b = add_net_b_evaluation_metrics['cons']
f1_addnet_b = add_net_b_evaluation_metrics['f1']

print("✅ All model artifacts and important variables loaded successfully!")

✅ All model artifacts and important variables loaded successfully!


# Concat-net Base B

In [None]:
# if True, the model uses BT-strategy for training
bt_strategy = True

In [None]:
#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper

img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(256, activation='relu', name='c1_fc_cifar10_1')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch_out = Dense(256, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch_out)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch = Flatten(name='c2_flatten')(x)
c_2_bch = Dense(512, activation='relu', name='c2_fc_cifar10_1')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(512, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch_out = Concatenate()([c_1_bch_out,c_2_bch])
c_2_bch = BatchNormalization()(c_2_bch_out)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(1024, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(1024, activation='relu', name='fc2')(x)
x = Concatenate()([x,c_2_bch_out])
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='Concatnet_Base_B')

#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(lr=0.003, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_concatnet_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_concatnet_b = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_concatnet_b = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_concatnet_b,cons_concatnet_b,f1_concatnet_b= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- Concat-net Base B ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_concatnet_b[4])
print("Accuracy level 2:",score_concatnet_b[5])
print("Accuracy level 3:",score_concatnet_b[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_concatnet_b)
print("Consistency:",cons_concatnet_b)
print("f1:",f1_concatnet_b)
print("Parameters:","{:,}".format(parameters_concatnet_b))

In [None]:
# Create folder if it doesn't exist
save_dir = 'results/concat_net'
os.makedirs(save_dir, exist_ok=True)

# Save model architecture + weights
model.save(os.path.join(save_dir, 'concat_net_base_b.keras'))  # Full model
model.save_weights(os.path.join(save_dir, 'concat_net_base_b.weights.h5'))  # Just weights

# Save training history
with open(os.path.join(save_dir, 'history_concatnet_b.pkl'), 'wb') as f:
    pickle.dump(history_concatnet_b.history, f)

# Save results and metadata
results_concatnet_b = {
    "bt_strategy": bt_strategy,
    "alpha": float(K.get_value(alpha)),
    "beta": float(K.get_value(beta)),
    "gamma": float(K.get_value(gamma)),
    "score": score_concatnet_b,
    "parameters": int(parameters_concatnet_b),
    "acc": float(acc_concatnet_b),
    "cons": float(cons_concatnet_b),
    "f1": float(f1_concatnet_b),
}

with open(os.path.join(save_dir, 'results_concatnet_b.pkl'), 'wb') as f:
    pickle.dump(results_concatnet_b, f)

print("✅ All components saved to 'results/concat_net'")

In [None]:
import shutil
from google.colab import files

# Zip the folder
shutil.make_archive("concat_net", 'zip', "results/concat_net")

# Download the zip file
files.download("concat_net.zip")

In [21]:
import os
import pickle
from tensorflow import keras

# Define the directory where the files are saved
save_dir = '.\CIFAR10_models\saved_concat_net_b'

# 1. Load the full model (architecture + weights + optimizer state)
concat_net_b_model = keras.models.load_model(os.path.join(save_dir, 'concat_net_base_b.keras'))

# 2. (Optional) Load model weights separately if needed (not necessary if you loaded the full model)
concat_net_b_model.load_weights(os.path.join(save_dir, 'concat_net_base_b.weights.h5'))

# 3. Load the training history
with open(os.path.join(save_dir, 'history_concatnet_b.pkl'), 'rb') as f:
    history_concatnet_b = pickle.load(f)

# 4. Load the results and metadata
with open(os.path.join(save_dir, 'results_concatnet_b.pkl'), 'rb') as f:
    results_concatnet_b = pickle.load(f)

# Unpack the important variables
concat_net_b_bt_strategy = results_concatnet_b["bt_strategy"]
concat_net_b_alpha_value = results_concatnet_b["alpha"]
concat_net_b_beta_value = results_concatnet_b["beta"]
concat_net_b_gamma_value = results_concatnet_b["gamma"]
score_concatnet_b = results_concatnet_b["score"]
parameters_concatnet_b = results_concatnet_b["parameters"]
acc_concatnet_b = results_concatnet_b["acc"]
cons_concatnet_b = results_concatnet_b["cons"]
f1_concatnet_b = results_concatnet_b["f1"]

print("✅ Model, history, and experiment metadata loaded successfully!")

✅ Model, history, and experiment metadata loaded successfully!


# Flat CNN Base C

In [None]:
#----------get VGG16 pre-trained weights--------
WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                         WEIGHTS_PATH,
                         cache_subdir='models')
#----------------------- model definition ---------------------------
img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- block 5 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = BatchNormalization()(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc_cifar100_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(4096, activation='relu', name='fc_cifar100_2')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, fine_pred, name='flat_cnn_base_c')
model.load_weights(weights_path, by_name=True)

#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(learning_rate=0.001, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()

In [None]:
change_lr = LearningRateScheduler(scheduler)

# Training
history_base_c = model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          callbacks=change_lr,
          validation_data=(x_test, y_test))

# Evaluation on test set
score_base_c = model.evaluate(x_test, y_test, verbose=0)
parameters_base_c = np.sum([K.count_params(w) for w in model.trainable_weights])

In [None]:
import os
import pickle
import zipfile
import numpy as np
from tensorflow.keras.models import load_model

# Ensure your trained model is named `model`
# and your training results exist: history_base_c, score_base_c, etc.

# --- Customize below if needed ---
save_dir = "results/flat_cnn_base_c"
model_name = "flat_cnn_base_c"
model_path = os.path.join(save_dir, "model.keras")
zip_path = f"{model_name}.zip"

# Create directory
os.makedirs(save_dir, exist_ok=True)

# Save the model
model.save(model_path)

# Save additional data
with open(os.path.join(save_dir, "history.pkl"), "wb") as f:
    pickle.dump(history_base_c, f)

with open(os.path.join(save_dir, "score.pkl"), "wb") as f:
    pickle.dump(score_base_c, f)

with open(os.path.join(save_dir, "params.pkl"), "wb") as f:
    pickle.dump(parameters_base_c, f)

meta = {
    "batch_size": batch_size,
    "epochs": epochs,
    "input_shape": input_shape,
    "num_classes": num_classes,
    "model_name": model_name,
    "optimizer": "SGD(learning_rate=0.001, momentum=0.9, nesterov=True)",
    "loss": "categorical_crossentropy",
    "metrics": ["accuracy"]
}
with open(os.path.join(save_dir, "meta.pkl"), "wb") as f:
    pickle.dump(meta, f)

# Zip everything
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(save_dir):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, start=save_dir)
            zipf.write(file_path, arcname=arcname)

print(f"Saved and zipped into: {zip_path}")

In [None]:
from google.colab import files

# Path to your zip file
zip_filename = 'flat_cnn_base_c.zip'

# Trigger the download
files.download(zip_filename)

In [28]:
import os
import pickle
import numpy as np
from tensorflow.keras.models import load_model

# --- Set directory ---
save_dir = ".\CIFAR10_models\saved_flat_cnn_c"

# 1. Load the full model (architecture + weights + optimizer state)
model = load_model(os.path.join(save_dir, "model.keras"))

# 2. Load training history
with open(os.path.join(save_dir, "history.pkl"), "rb") as f:
    flat_cnn_history_base_c = pickle.load(f)

# 3. Load evaluation score
with open(os.path.join(save_dir, "score.pkl"), "rb") as f:
    flat_cnn_score_base_c = pickle.load(f)

# 4. Load number of parameters
with open(os.path.join(save_dir, "params.pkl"), "rb") as f:
    flat_cnn_parameters_base_c = pickle.load(f)

# 5. Load meta information
with open(os.path.join(save_dir, "meta.pkl"), "rb") as f:
    flat_cnn_meta_base_c = pickle.load(f)

# --- Optional: Unpack meta into variables ---
flat_cnn_c_batch_size = flat_cnn_meta_base_c.get("batch_size")
flat_cnn_c_epochs = flat_cnn_meta_base_c.get("epochs")
flat_cnn_c_input_shape = flat_cnn_meta_base_c.get("input_shape")
flat_cnn_c_num_classes = flat_cnn_meta_base_c.get("num_classes")
flat_cnn_c_optimizer_used = flat_cnn_meta_base_c.get("optimizer")
flat_cnn_c_loss_used = flat_cnn_meta_base_c.get("loss")
flat_cnn_c_metrics_used = flat_cnn_meta_base_c.get("metrics")
flat_cnn_c_model_name = flat_cnn_meta_base_c.get("model_name")

print("✅ Model, history, evaluation, parameters, and metadata loaded successfully!")

✅ Model, history, evaluation, parameters, and metadata loaded successfully!


# B-CNN Base C

In [None]:
# if True, the model uses BT-strategy for training
bt_strategy = True

In [None]:
#----------get VGG16 pre-trained weights--------
WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                         WEIGHTS_PATH,
                         cache_subdir='models')

#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper


img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(512, activation='relu', name='c1_fc_cifar10_1')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch = Dense(512, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch = Flatten(name='c2_flatten')(x)
c_2_bch = Dense(1024, activation='relu', name='c2_fc_cifar10_1')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(1024, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- block 5 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = BatchNormalization()(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='bcnn_base_c')
model.load_weights(weights_path, by_name=True)
#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(learning_rate=0.003, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_bcnn_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_b_cnn_c = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_b_cnn_c = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_b_cnn_c,cons_b_cnn_c,f1_b_cnn_c= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- B-CNN Base C ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_b_cnn_c[4])
print("Accuracy level 2:",score_b_cnn_c[5])
print("Accuracy level 3:",score_b_cnn_c[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_b_cnn_c)
print("Consistency:",cons_b_cnn_c)
print("f1:",f1_b_cnn_c)
print("Parameters:","{:,}".format(parameters_b_cnn_c))

In [None]:
import os
import pickle
import zipfile

# Set up save directory
save_dir = 'results/bcnn_base_c'
os.makedirs(save_dir, exist_ok=True)

# Save model
model.save(os.path.join(save_dir, 'bcnn_base_c.keras'))

# Save weights separately (optional redundancy)
model.save_weights(os.path.join(save_dir, 'bcnn_base_c.weights.h5'))

# Save training history
with open(os.path.join(save_dir, 'history_bcnn_base_c.pkl'), 'wb') as f:
    pickle.dump(history_bcnn_b.history, f)

# Save predictions
with open(os.path.join(save_dir, 'predictions_bcnn_base_c.pkl'), 'wb') as f:
    pickle.dump(predictions, f)

# Save evaluation metrics
with open(os.path.join(save_dir, 'evaluation_bcnn_base_c.pkl'), 'wb') as f:
    pickle.dump({
        'score': score_b_cnn_c,
        'parameters': int(parameters_b_cnn_c),
        'accuracy': float(acc_b_cnn_c),
        'consistency': float(cons_b_cnn_c),
        'f1_score': float(f1_b_cnn_c)
    }, f)

# Save loss weights
with open(os.path.join(save_dir, 'loss_weights_bcnn_base_c.pkl'), 'wb') as f:
    pickle.dump({
        'alpha': float(K.get_value(alpha)),
        'beta': float(K.get_value(beta)),
        'gamma': float(K.get_value(gamma))
    }, f)

# Save meta info
with open(os.path.join(save_dir, 'meta_bcnn_base_c.pkl'), 'wb') as f:
    pickle.dump({
        'bt_strategy': bt_strategy,
        'input_shape': input_shape,
        'num_classes': num_classes,
        'num_c_1': num_c_1,
        'num_c_2': num_c_2,
        'batch_size': batch_size,
        'epochs': epochs,
        'optimizer': 'SGD(learning_rate=0.003, momentum=0.9, nesterov=True)',
        'loss': ['categorical_crossentropy'] * 3,
        'metrics': ['accuracy'] * 3,
        'pretrained_weights_source': WEIGHTS_PATH
    }, f)

# Zip the folder for download
zip_path = 'bcnn_base_c.zip'
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(save_dir):
        for file in files:
            full_path = os.path.join(root, file)
            arcname = os.path.relpath(full_path, save_dir)
            zipf.write(full_path, arcname=arcname)

print("✅ All components saved and zipped successfully.")

In [None]:
from google.colab import files
files.download('bcnn_base_c.zip')

In [29]:
import os
import pickle
from tensorflow.keras.models import load_model

# Set directory where everything was saved
save_dir = '.\CIFAR10_models\saved_bcnn_c'

# --- 1. Load the full model (architecture + weights + optimizer state) ---
b_cnn_c_model = load_model(os.path.join(save_dir, 'bcnn_base_c.keras'))

# --- 2. Load the training history ---
with open(os.path.join(save_dir, 'history_bcnn_base_c.pkl'), 'rb') as f:
    history_bcnn_b = pickle.load(f)

# --- 3. Load the predictions ---
with open(os.path.join(save_dir, 'predictions_bcnn_base_c.pkl'), 'rb') as f:
    b_cnn_c_predictions = pickle.load(f)

# --- 4. Load the evaluation metrics ---
with open(os.path.join(save_dir, 'evaluation_bcnn_base_c.pkl'), 'rb') as f:
    evaluation_bcnn_base_c = pickle.load(f)
    score_b_cnn_c = evaluation_bcnn_base_c['score']
    parameters_b_cnn_c = evaluation_bcnn_base_c['parameters']
    acc_b_cnn_c = evaluation_bcnn_base_c['accuracy']
    cons_b_cnn_c = evaluation_bcnn_base_c['consistency']
    f1_b_cnn_c = evaluation_bcnn_base_c['f1_score']

# --- 5. Load the loss weights (alpha, beta, gamma values) ---
with open(os.path.join(save_dir, 'loss_weights_bcnn_base_c.pkl'), 'rb') as f:
    loss_weights_bcnn_base_c = pickle.load(f)
    b_cnn_c_alpha_value = loss_weights_bcnn_base_c['alpha']
    b_cnn_c_beta_value = loss_weights_bcnn_base_c['beta']
    b_cnn_c_gamma_value = loss_weights_bcnn_base_c['gamma']

# --- 6. Load the meta information (hyperparameters, dataset config, etc.) ---
with open(os.path.join(save_dir, 'meta_bcnn_base_c.pkl'), 'rb') as f:
    meta_bcnn_base_c = pickle.load(f)

# --- 7. (Optional) Extract meta variables if you want them directly ---
b_cnn_c_bt_strategy = meta_bcnn_base_c['bt_strategy']
b_cnn_c_input_shape = meta_bcnn_base_c['input_shape']
b_cnn_c_num_classes = meta_bcnn_base_c['num_classes']
b_cnn_c_num_c_1 = meta_bcnn_base_c['num_c_1']
b_cnn_c_num_c_2 = meta_bcnn_base_c['num_c_2']
b_cnn_c_batch_size = meta_bcnn_base_c['batch_size']
b_cnn_c_epochs = meta_bcnn_base_c['epochs']
b_cnn_c_optimizer_used = meta_bcnn_base_c['optimizer']
b_cnn_c_loss_used = meta_bcnn_base_c['loss']
b_cnn_c_metrics_used = meta_bcnn_base_c['metrics']
b_cnn_c_pretrained_weights_source = meta_bcnn_base_c['pretrained_weights_source']

print("✅ Model, history, predictions, evaluation, loss weights, and meta information loaded successfully!")

✅ Model, history, predictions, evaluation, loss weights, and meta information loaded successfully!


# BA-CNN Base C

In [None]:
# Best hyperparameters
# if True, the model uses BT-strategy for training
bt_strategy = True

# neurons of all dense layers on each branch
branch_neurons = 32 # Parsimonious version is 256

# neurons of all attention mechanism
att_neurons = 2048  # Parsimonious version is 64

In [None]:
#----------get VGG16 pre-trained weights--------
WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                         WEIGHTS_PATH,
                         cache_subdir='models')

#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper


img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(branch_neurons, activation='relu', name='c1_fc_cifar10_1')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch = Dense(branch_neurons, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch_out = Dropout(0.5)(c_1_bch)
#c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(c_1_bch_out)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch = Flatten(name='c2_flatten')(x)
c_2_bch = Dense(branch_neurons, activation='relu', name='c2_fc_cifar10_1')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(branch_neurons, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch_out = Dropout(0.5)(c_2_bch)
#c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(c_2_bch_out)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- block 5 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = BatchNormalization()(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(branch_neurons, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(branch_neurons, activation='relu', name='fc_cifar10_2')(x)
x = BatchNormalization()(x)
x_out = Dropout(0.5)(x)

#-- Att for coarse 1---
# Coarse 1
sfcn_1_1 = Dense(att_neurons, name='fc1_1')(c_1_bch_out)
sfcn_1_1 = Dense(1, name='fc1_2')(sfcn_1_1)
# Coarse 2
sfcn_1_2 = Dense(att_neurons, name='fc1_3')(c_2_bch_out)
sfcn_1_2 = Dense(1, name='fc1_4')(sfcn_1_2)
# Fine
sfcn_1_3 = Dense(att_neurons, name='fc1_5')(x_out)
sfcn_1_3 = Dense(1, name='fc1_6')(sfcn_1_3)

score_vector_1 = Concatenate()([sfcn_1_1,sfcn_1_2,sfcn_1_3]) # Score vector 1
att_weights_1 = Activation('softmax', name='attention_weights_1')(score_vector_1) # Attention weights 1
weightned_sum_1 = Add()([c_1_bch_out*att_weights_1[0][0],c_2_bch_out*att_weights_1[0][1],x_out*att_weights_1[0][2]]) # Weightned sum 1

# Concat and prediction
coarse_1_concat = Concatenate()([c_1_bch_out,weightned_sum_1])
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(coarse_1_concat)


#-- Att for coarse 2---

# Coarse 1
sfcn_2_1 = Dense(att_neurons, name='fc2_1')(c_1_bch_out)
sfcn_2_1 = Dense(1, name='fc2_2')(sfcn_2_1)
# Coarse 2
sfcn_2_2 = Dense(att_neurons, name='fc2_3')(c_2_bch_out)
sfcn_2_2 = Dense(1, name='fc2_4')(sfcn_2_2)
# Fine
sfcn_2_3 = Dense(att_neurons, name='fc2_5')(x_out)
sfcn_2_3 = Dense(1, name='fc2_6')(sfcn_2_3)

score_vector_2 = Concatenate()([sfcn_2_1,sfcn_2_2,sfcn_2_3]) # Score vector 1
att_weights_2 = Activation('softmax', name='attention_weights_2')(score_vector_2) # Attention weights 1
weightned_sum_2 = Add()([c_1_bch_out*att_weights_2[0][0],c_2_bch_out*att_weights_2[0][1],x_out*att_weights_2[0][2]]) # Weightned sum 1

# Concat and prediction
coarse_2_concat = Concatenate()([c_2_bch_out,weightned_sum_2])
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(coarse_2_concat)


#-- Att for fine---

# Coarse 1
sfcn_3_1 = Dense(att_neurons, name='fc3_1')(c_1_bch_out)
sfcn_3_1 = Dense(1, name='fc3_2')(sfcn_3_1)
# Coarse 2
sfcn_3_2 = Dense(att_neurons, name='fc3_3')(c_2_bch_out)
sfcn_3_2 = Dense(1, name='fc3_4')(sfcn_3_2)
# Fine
sfcn_3_3 = Dense(att_neurons, name='fc3_5')(x_out)
sfcn_3_3 = Dense(1, name='fc3_6')(sfcn_3_3)

score_vector_3 = Concatenate()([sfcn_3_1,sfcn_3_2,sfcn_3_3]) # Score vector 1
att_weights_3 = Activation('softmax', name='attention_weights_3')(score_vector_3) # Attention weights 1
weightned_sum_3 = Add()([c_1_bch_out*att_weights_3[0][0],c_2_bch_out*att_weights_3[0][1],x_out*att_weights_3[0][2]]) # Weightned sum 3

# Concat and prediction
fine_concat = Concatenate()([x_out,weightned_sum_3])
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(fine_concat)




model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='bacnn_base_c')
model.load_weights(weights_path, by_name=True)
#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(learning_rate=0.003, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_bcnn_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_ba_cnn_c = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_ba_cnn_c = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_ba_cnn_c,cons_ba_cnn_c,f1_ba_cnn_c= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- BA-CNN Base C ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_ba_cnn_c[4])
print("Accuracy level 2:",score_ba_cnn_c[5])
print("Accuracy level 3:",score_ba_cnn_c[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_ba_cnn_c)
print("Consistency:",cons_ba_cnn_c)
print("f1:",f1_ba_cnn_c)
print("Parameters:","{:,}".format(parameters_ba_cnn_c))

In [None]:
import os
import pickle
import zipfile

# Directory to save model artifacts
save_dir = "results/bacnn_base_c"
os.makedirs(save_dir, exist_ok=True)

# Save model and weights
model.save(os.path.join(save_dir, "bacnn_base_c.keras"))
model.save_weights(os.path.join(save_dir, "bacnn_base_c.weights.h5"))

# Save training history
with open(os.path.join(save_dir, "history_bacnn_base_c.pkl"), "wb") as f:
    pickle.dump(history_bcnn_b.history, f)

# Save predictions
with open(os.path.join(save_dir, "predictions_bacnn_base_c.pkl"), "wb") as f:
    pickle.dump(predictions, f)

# Save evaluation
with open(os.path.join(save_dir, "evaluation_bacnn_base_c.pkl"), "wb") as f:
    pickle.dump({
        "score": score_ba_cnn_c,
        "parameters": int(parameters_ba_cnn_c),
        "accuracy": float(acc_ba_cnn_c),
        "consistency": float(cons_ba_cnn_c),
        "f1_score": float(f1_ba_cnn_c)
    }, f)

# Save loss weights
with open(os.path.join(save_dir, "loss_weights_bacnn_base_c.pkl"), "wb") as f:
    pickle.dump({
        "alpha": float(K.get_value(alpha)),
        "beta": float(K.get_value(beta)),
        "gamma": float(K.get_value(gamma))
    }, f)

# Save metadata
with open(os.path.join(save_dir, "meta_bacnn_base_c.pkl"), "wb") as f:
    pickle.dump({
        "model_name": "bacnn_base_c",
        "bt_strategy": bt_strategy,
        "branch_neurons": branch_neurons,
        "att_neurons": att_neurons,
        "input_shape": input_shape,
        "num_classes": num_classes,
        "num_c_1": num_c_1,
        "num_c_2": num_c_2,
        "batch_size": batch_size,
        "epochs": epochs,
        "optimizer": "SGD(learning_rate=0.003, momentum=0.9, nesterov=True)",
        "loss": ["categorical_crossentropy"] * 3,
        "metrics": ["accuracy"] * 3,
        "pretrained_weights_source": WEIGHTS_PATH
    }, f)

# Zip everything
zip_path = "bacnn_base_c.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(save_dir):
        for file in files:
            full_path = os.path.join(root, file)
            arcname = os.path.relpath(full_path, save_dir)
            zipf.write(full_path, arcname=os.path.join("bacnn_base_c", arcname))

# Download the zip
from google.colab import files as gfiles
gfiles.download("bacnn_base_c.zip")

In [33]:
import os
import pickle
from tensorflow.keras.models import load_model

# Set directory where everything was saved
save_dir = "./CIFAR10_models/saved_bacnn_c/bacnn_base_c"

# --- 1. Load the full model (architecture + weights + optimizer state) ---
ba_cnn_c_model = load_model(os.path.join(save_dir, "bacnn_base_c.keras"))

# --- 2. Load the training history ---
with open(os.path.join(save_dir, "history_bacnn_base_c.pkl"), "rb") as f:
    history_bacnn_b = pickle.load(f)

# --- 3. Load the predictions ---
with open(os.path.join(save_dir, "predictions_bacnn_base_c.pkl"), "rb") as f:
    ba_cnn_c_predictions = pickle.load(f)

# --- 4. Load the evaluation scores ---
with open(os.path.join(save_dir, "evaluation_bacnn_base_c.pkl"), "rb") as f:
    ba_cnn_c_evaluation_data = pickle.load(f)
    score_ba_cnn_c = ba_cnn_c_evaluation_data["score"]
    parameters_ba_cnn_c = ba_cnn_c_evaluation_data["parameters"]
    acc_ba_cnn_c = ba_cnn_c_evaluation_data["accuracy"]
    cons_ba_cnn_c = ba_cnn_c_evaluation_data["consistency"]
    f1_ba_cnn_c = ba_cnn_c_evaluation_data["f1_score"]

# --- 5. Load the loss weights (alpha, beta, gamma values) ---
with open(os.path.join(save_dir, "loss_weights_bacnn_base_c.pkl"), "rb") as f:
    ba_cnn_c_loss_weights_data = pickle.load(f)
    ba_cnn_c_alpha_value = ba_cnn_c_loss_weights_data["alpha"]
    ba_cnn_c_beta_value = ba_cnn_c_loss_weights_data["beta"]
    ba_cnn_c_gamma_value = ba_cnn_c_loss_weights_data["gamma"]

# --- 6. Load the metadata (model config and training hyperparameters) ---
with open(os.path.join(save_dir, "meta_bacnn_base_c.pkl"), "rb") as f:
    ba_cnn_c_meta_data = pickle.load(f)

# --- 7. (Optional) Assign meta data variables separately if needed ---
ba_cnn_c_model_name = ba_cnn_c_meta_data["model_name"]
ba_cnn_c_bt_strategy = ba_cnn_c_meta_data["bt_strategy"]
ba_cnn_c_branch_neurons = ba_cnn_c_meta_data["branch_neurons"]
ba_cnn_c_att_neurons = ba_cnn_c_meta_data["att_neurons"]
ba_cnn_c_input_shape = ba_cnn_c_meta_data["input_shape"]
ba_cnn_c_num_classes = ba_cnn_c_meta_data["num_classes"]
ba_cnn_c_num_c_1 = ba_cnn_c_meta_data["num_c_1"]
ba_cnn_c_num_c_2 = ba_cnn_c_meta_data["num_c_2"]
ba_cnn_c_batch_size = ba_cnn_c_meta_data["batch_size"]
ba_cnn_c_epochs = ba_cnn_c_meta_data["epochs"]
ba_cnn_c_optimizer_info = ba_cnn_c_meta_data["optimizer"]
ba_cnn_c_loss_info = ba_cnn_c_meta_data["loss"]
ba_cnn_c_metrics_info = ba_cnn_c_meta_data["metrics"]
ba_cnn_c_pretrained_weights_source = ba_cnn_c_meta_data["pretrained_weights_source"]

print("✅ Model, history, predictions, evaluation, loss weights, and meta loaded successfully!")

✅ Model, history, predictions, evaluation, loss weights, and meta loaded successfully!


# H-CNN Base C

In [None]:
# if True, the model uses BT-strategy for training
bt_strategy = True

In [None]:
#----------get VGG16 pre-trained weights--------
WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                         WEIGHTS_PATH,
                         cache_subdir='models')

#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper
img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch_flatt = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(512, activation='relu', name='c1_fc_cifar10_1')(c_1_bch_flatt)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch = Dense(512, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch_flatt = Flatten(name='c2_flatten')(x)
c_2_bch_concat = Concatenate()([c_1_bch_flatt,c_2_bch_flatt]) # Conectivity Pattern
c_2_bch = Dense(1024, activation='relu', name='c2_fc_cifar100_1')(c_2_bch_concat)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(1024, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- block 5 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = BatchNormalization()(x)

#--- fine block ---
x_flatt = Flatten(name='flatten')(x)
x = Concatenate()([c_2_bch_concat,x_flatt]) # Conectivity Pattern
x = Dense(4096, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(4096, activation='relu', name='fc2')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='hcnn_base_c')
model.load_weights(weights_path, by_name=True)
#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(learning_rate=0.003, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              metrics=['accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_bcnn_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_h_cnn_c = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_h_cnn_c = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_h_cnn_c,cons_h_cnn_c,f1_h_cnn_c= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- H-CNN Base C ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_h_cnn_c[4])
print("Accuracy level 2:",score_h_cnn_c[5])
print("Accuracy level 3:",score_h_cnn_c[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_h_cnn_c)
print("Consistency:",cons_h_cnn_c)
print("f1:",f1_h_cnn_c)
print("Parameters:","{:,}".format(parameters_h_cnn_c))

In [None]:
import os
import pickle
import zipfile

# Save directory
save_dir = "results/hcnn_base_c"
os.makedirs(save_dir, exist_ok=True)

# Save model and weights
model.save(os.path.join(save_dir, "hcnn_base_c.keras"))
model.save_weights(os.path.join(save_dir, "hcnn_base_c.weights.h5"))

# Save training history
with open(os.path.join(save_dir, "history_hcnn_base_c.pkl"), "wb") as f:
    pickle.dump(history_bcnn_b.history, f)

# Save predictions
with open(os.path.join(save_dir, "predictions_hcnn_base_c.pkl"), "wb") as f:
    pickle.dump(predictions, f)

# Save evaluation
with open(os.path.join(save_dir, "evaluation_hcnn_base_c.pkl"), "wb") as f:
    pickle.dump({
        "score": score_h_cnn_c,
        "parameters": int(parameters_h_cnn_c),
        "accuracy": float(acc_h_cnn_c),
        "consistency": float(cons_h_cnn_c),
        "f1_score": float(f1_h_cnn_c)
    }, f)

# Save dynamic loss weights
with open(os.path.join(save_dir, "loss_weights_hcnn_base_c.pkl"), "wb") as f:
    pickle.dump({
        "alpha": float(K.get_value(alpha)),
        "beta": float(K.get_value(beta)),
        "gamma": float(K.get_value(gamma))
    }, f)

# Save metadata
with open(os.path.join(save_dir, "meta_hcnn_base_c.pkl"), "wb") as f:
    pickle.dump({
        "model_name": "hcnn_base_c",
        "bt_strategy": bt_strategy,
        "input_shape": input_shape,
        "num_classes": num_classes,
        "num_c_1": num_c_1,
        "num_c_2": num_c_2,
        "batch_size": batch_size,
        "epochs": epochs,
        "optimizer": "SGD(learning_rate=0.003, momentum=0.9, nesterov=True)",
        "loss": ["categorical_crossentropy"] * 3,
        "metrics": ["accuracy"] * 3,
        "pretrained_weights_source": WEIGHTS_PATH
    }, f)

# Zip and download
zip_path = "hcnn_base_c.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files in os.walk(save_dir):
        for file in files:
            full_path = os.path.join(root, file)
            arcname = os.path.relpath(full_path, save_dir)
            zipf.write(full_path, arcname=os.path.join("hcnn_base_c", arcname))


# Download the zip
from google.colab import files as gfiles
gfiles.download(zip_path)

In [34]:
import os
import pickle
from tensorflow.keras.models import load_model

# Define the directory where everything was saved
save_dir = ".\CIFAR10_models\saved_hcnn_c\hcnn_base_c"

# --- 1. Load the full model (architecture + weights + optimizer) ---
h_cnn_c_model = load_model(os.path.join(save_dir, "hcnn_base_c.keras"))

# --- 2. Load the training history ---
with open(os.path.join(save_dir, "history_hcnn_base_c.pkl"), "rb") as f:
    history_hcnn_b = pickle.load(f)

# --- 3. Load the predictions ---
with open(os.path.join(save_dir, "predictions_hcnn_base_c.pkl"), "rb") as f:
    h_cnn_c_predictions = pickle.load(f)

# --- 4. Load evaluation results ---
with open(os.path.join(save_dir, "evaluation_hcnn_base_c.pkl"), "rb") as f:
    h_cnn_c_evaluation_data = pickle.load(f)
    score_h_cnn_c = h_cnn_c_evaluation_data["score"]
    parameters_h_cnn_c = h_cnn_c_evaluation_data["parameters"]
    acc_h_cnn_c = h_cnn_c_evaluation_data["accuracy"]
    cons_h_cnn_c = h_cnn_c_evaluation_data["consistency"]
    f1_h_cnn_c = h_cnn_c_evaluation_data["f1_score"]

# --- 5. Load loss weights (alpha, beta, gamma values) ---
with open(os.path.join(save_dir, "loss_weights_hcnn_base_c.pkl"), "rb") as f:
    h_cnn_c_loss_weights_data = pickle.load(f)
    h_cnn_c_alpha_value = h_cnn_c_loss_weights_data["alpha"]
    h_cnn_c_beta_value = h_cnn_c_loss_weights_data["beta"]
    h_cnn_c_gamma_value = h_cnn_c_loss_weights_data["gamma"]

# --- 6. Load the metadata ---
with open(os.path.join(save_dir, "meta_hcnn_base_c.pkl"), "rb") as f:
    h_cnn_c_meta_data = pickle.load(f)

# --- 7. (Optional) Assign meta data to separate variables if needed ---
h_cnn_c_model_name = h_cnn_c_meta_data["model_name"]
h_cnn_c_bt_strategy = h_cnn_c_meta_data["bt_strategy"]
h_cnn_c_input_shape = h_cnn_c_meta_data["input_shape"]
h_cnn_c_num_classes = h_cnn_c_meta_data["num_classes"]
h_cnn_c_num_c_1 = h_cnn_c_meta_data["num_c_1"]
h_cnn_c_num_c_2 = h_cnn_c_meta_data["num_c_2"]
h_cnn_c_batch_size = h_cnn_c_meta_data["batch_size"]
h_cnn_c_epochs = h_cnn_c_meta_data["epochs"]
h_cnn_c_optimizer_info = h_cnn_c_meta_data["optimizer"]
h_cnn_c_loss_info = h_cnn_c_meta_data["loss"]
h_cnn_c_metrics_info = h_cnn_c_meta_data["metrics"]
h_cnn_c_pretrained_weights_source = h_cnn_c_meta_data["pretrained_weights_source"]

print("✅ Model, history, predictions, evaluation, loss weights, and metadata loaded successfully!")

✅ Model, history, predictions, evaluation, loss weights, and metadata loaded successfully!


# Add-net Base C

In [None]:
# if True, the model uses BT-strategy for training
bt_strategy = True

In [None]:
#----------get VGG16 pre-trained weights--------
WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                         WEIGHTS_PATH,
                         cache_subdir='models')

#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper


img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(256, activation='relu', name='c1_fc_cifar10_1')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch_out = Dense(256, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch_out)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch = Flatten(name='c2_flatten')(x)
c_2_bch = Dense(256, activation='relu', name='c2_fc_cifar10_1')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(256, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch_out= Add()([c_1_bch_out,c_2_bch])
c_2_bch = BatchNormalization()(c_2_bch_out)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- block 5 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = BatchNormalization()(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(256, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(256, activation='relu', name='fc2_cifar10_1')(x)
x = Add()([x,c_2_bch_out])
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='addnet_base_c')
model.load_weights(weights_path, by_name=True)
#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(lr=0.003, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_addnet_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_addnet_c = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_addnet_c = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_addnet_c,cons_addnet_c,f1_addnet_c= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- Add-net Base C ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_addnet_c[4])
print("Accuracy level 2:",score_addnet_c[5])
print("Accuracy level 3:",score_addnet_c[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_addnet_c)
print("Consistency:",cons_addnet_c)
print("f1:",f1_addnet_c)
print("Parameters:","{:,}".format(parameters_addnet_c))

In [None]:
import os
import pickle
import zipfile
from google.colab import files
from tensorflow.keras import backend as K

# Create a directory to store the model data
save_dir = "results/addnet_base_c"
os.makedirs(save_dir, exist_ok=True)

# Save the model architecture and weights
model.save(os.path.join(save_dir, "addnet_base_c.keras"))
model.save_weights(os.path.join(save_dir, "addnet_base_c.weights.h5"))

# Save training history
with open(os.path.join(save_dir, "history_addnet_base_c.pkl"), "wb") as f:
    pickle.dump(history_addnet_b.history, f)

# Save predictions
with open(os.path.join(save_dir, "predictions_addnet_base_c.pkl"), "wb") as f:
    pickle.dump(predictions, f)

# Save evaluation metrics
with open(os.path.join(save_dir, "evaluation_addnet_base_c.pkl"), "wb") as f:
    pickle.dump({
        "score": score_addnet_c,
        "parameters": int(parameters_addnet_c),
        "accuracy": float(acc_addnet_c),
        "consistency": float(cons_addnet_c),
        "f1_score": float(f1_addnet_c)
    }, f)

# Save loss weights
with open(os.path.join(save_dir, "loss_weights_addnet_base_c.pkl"), "wb") as f:
    pickle.dump({
        "alpha": float(K.get_value(alpha)),
        "beta": float(K.get_value(beta)),
        "gamma": float(K.get_value(gamma))
    }, f)

# Save meta information
with open(os.path.join(save_dir, "meta_addnet_base_c.pkl"), "wb") as f:
    pickle.dump({
        "model_name": "addnet_base_c",
        "bt_strategy": bt_strategy,
        "input_shape": input_shape,
        "num_classes": num_classes,
        "num_c_1": num_c_1,
        "num_c_2": num_c_2,
        "batch_size": batch_size,
        "epochs": epochs,
        "optimizer": "SGD(learning_rate=0.003, momentum=0.9, nesterov=True)",
        "loss": ["categorical_crossentropy"] * 3,
        "metrics": ["accuracy"] * 3,
        "pretrained_weights_source": WEIGHTS_PATH
    }, f)

# Zip everything into one file
zip_path = "addnet_base_c.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files_list in os.walk(save_dir):
        for file in files_list:
            full_path = os.path.join(root, file)
            arcname = os.path.relpath(full_path, os.path.dirname(save_dir))
            zipf.write(full_path, arcname)

# Download the zip file to your computer
files.download(zip_path)


In [36]:
import os
import pickle
import zipfile
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K

# Define the directory where the zip file will be extracted
extract_dir = "./CIFAR10_models/saved_add_net_c/addnet_base_c"

# Load the model architecture and weights
add_net_c_model = load_model(os.path.join(extract_dir, "addnet_base_c.keras"))
add_net_c_model.load_weights(os.path.join(extract_dir, "addnet_base_c.weights.h5"))

# Load training history
with open(os.path.join(extract_dir, "history_addnet_base_c.pkl"), "rb") as f:
    history_addnet_c = pickle.load(f)

# Load predictions
with open(os.path.join(extract_dir, "predictions_addnet_base_c.pkl"), "rb") as f:
    add_net_c_predictions = pickle.load(f)

# Load evaluation metrics
with open(os.path.join(extract_dir, "evaluation_addnet_base_c.pkl"), "rb") as f:
    evaluation_addnet_c = pickle.load(f)
    score_addnet_c = evaluation_addnet_c["score"]
    parameters_addnet_c = evaluation_addnet_c["parameters"]
    acc_addnet_c = evaluation_addnet_c["accuracy"]
    consistency_addnet_c = evaluation_addnet_c["consistency"]
    f1_addnet_c = evaluation_addnet_c["f1_score"]

# Load loss weights
with open(os.path.join(extract_dir, "loss_weights_addnet_base_c.pkl"), "rb") as f:
    loss_weights_addnet_c = pickle.load(f)
    add_net_c_alpha = K.variable(loss_weights_addnet_c["alpha"])
    add_net_c_beta = K.variable(loss_weights_addnet_c["beta"])
    add_net_c_gamma = K.variable(loss_weights_addnet_c["gamma"])

# Load meta information
with open(os.path.join(extract_dir, "meta_addnet_base_c.pkl"), "rb") as f:
    meta_addnet_c = pickle.load(f)
    add_net_c_model_name = meta_addnet_c["model_name"]
    add_net_c_bt_strategy = meta_addnet_c["bt_strategy"]
    add_net_c_input_shape = meta_addnet_c["input_shape"]
    add_net_c_num_classes = meta_addnet_c["num_classes"]
    add_net_c_num_c_1 = meta_addnet_c["num_c_1"]
    add_net_c_num_c_2 = meta_addnet_c["num_c_2"]
    add_net_c_batch_size = meta_addnet_c["batch_size"]
    add_net_c_epochs = meta_addnet_c["epochs"]
    add_net_c_optimizer = meta_addnet_c["optimizer"]
    add_net_c_loss = meta_addnet_c["loss"]
    add_net_c_metrics = meta_addnet_c["metrics"]
    add_net_c_pretrained_weights_source = meta_addnet_c["pretrained_weights_source"]

# Concat-net Base C

In [None]:
# if True, the model uses BT-strategy for training
bt_strategy = True

In [None]:
#----------get VGG16 pre-trained weights--------
WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
                         WEIGHTS_PATH,
                         cache_subdir='models')

#----------------------- model definition ---------------------------
if bt_strategy == True:
  alpha = K.variable(value=0.98, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.01, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.01, dtype="float32", name="gamma") # A3 in paper
else:
  alpha = K.variable(value=0.33, dtype="float32", name="alpha") # A1 in paper
  beta = K.variable(value=0.33, dtype="float32", name="beta") # A2 in paper
  gamma = K.variable(value=0.34, dtype="float32", name="gamma") # A3 in paper


img_input = Input(shape=input_shape, name='input')

#--- block 1 ---
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
x = BatchNormalization()(x)
x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)

#--- block 2 ---
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)

#--- coarse 1 branch ---
c_1_bch = Flatten(name='c1_flatten')(x)
c_1_bch = Dense(512, activation='relu', name='c1_fc_cifar10_1')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_bch_out = Dense(512, activation='relu', name='c1_fc2')(c_1_bch)
c_1_bch = BatchNormalization()(c_1_bch_out)
c_1_bch = Dropout(0.5)(c_1_bch)
c_1_pred = Dense(num_c_1, activation='softmax', name='c1_predictions_cifar10')(c_1_bch)

#--- block 3 ---
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)

#--- coarse 2 branch ---
c_2_bch = Flatten(name='c2_flatten')(x)
c_2_bch = Dense(1024, activation='relu', name='c2_fc_cifar10_1')(c_2_bch)
c_2_bch = BatchNormalization()(c_2_bch)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_bch = Dense(1024, activation='relu', name='c2_fc2')(c_2_bch)
c_2_bch_out= Concatenate()([c_1_bch_out,c_2_bch])
c_2_bch = BatchNormalization()(c_2_bch_out)
c_2_bch = Dropout(0.5)(c_2_bch)
c_2_pred = Dense(num_c_2, activation='softmax', name='c2_predictions_cifar10')(c_2_bch)

#--- block 4 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
x = BatchNormalization()(x)
x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)

#--- block 5 ---
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
x = BatchNormalization()(x)
x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
x = BatchNormalization()(x)

#--- fine block ---
x = Flatten(name='flatten')(x)
x = Dense(4096, activation='relu', name='fc_cifar10_1')(x)
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
x = Dense(4096, activation='relu', name='fc2_cifar10_1')(x)
x = Concatenate()([x,c_2_bch_out])
x = BatchNormalization()(x)
x = Dropout(0.5)(x)
fine_pred = Dense(num_classes, activation='softmax', name='predictions_cifar10')(x)

model = Model(img_input, [c_1_pred, c_2_pred, fine_pred], name='concatnet_base_c')
model.load_weights(weights_path, by_name=True)
#----------------------- compile and fit ---------------------------
sgd = optimizers.SGD(lr=0.003, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              loss_weights=[alpha, beta, gamma],
              # optimizer=keras.optimizers.Adadelta(),
              metrics=['accuracy'])
model.summary()

In [None]:
# Callbacks
change_lr = LearningRateScheduler(scheduler)
change_lw = LossWeightsModifier(alpha, beta, gamma)

if bt_strategy == True:
  cbks = [change_lr, change_lw]
else:
  cbks = [change_lr]

history_concatnet_b = model.fit(x_train, [y_c1_train, y_c2_train, y_train],
          batch_size=batch_size,
          epochs=epochs,
          verbose=0,
          callbacks=cbks,
          validation_data=(x_test, [y_c1_test, y_c2_test, y_test]))

score_concatnet_c = model.evaluate(x_test, [y_c1_test, y_c2_test, y_test], verbose=0)
parameters_concatnet_c = np.sum([K.count_params(w) for w in model.trainable_weights])

predictions = model.predict(x_test)
acc_concatnet_c,cons_concatnet_c,f1_concatnet_c= hierarchical_metrics([y_c1_test, y_c2_test, y_test],predictions)

# Results
print("--- Concat-net Base C ---")
print("--- Accuracy per level ---")
print("Accuracy level 1:",score_concatnet_c[4])
print("Accuracy level 2:",score_concatnet_c[5])
print("Accuracy level 3:",score_concatnett_c[6])
print("--- Hierarchical Metrics ---")
print("Accuracy:",acc_concatnet_c)
print("Consitency:",cons_concatnet_c)
print("f1:",f1_concatnet_c)
print("Parameters:","{:,}".format(parameters_concatnet_c))

In [None]:
import os
import pickle
import zipfile
from google.colab import files
from tensorflow.keras import backend as K

# Create a directory to store the model data
save_dir = "results/concatnet_base_c"
os.makedirs(save_dir, exist_ok=True)

# Save the model architecture and weights
model.save(os.path.join(save_dir, "concatnet_base_c.keras"))
model.save_weights(os.path.join(save_dir, "concatnet_base_c.weights.h5"))

# Save training history
with open(os.path.join(save_dir, "history_concatnet_base_c.pkl"), "wb") as f:
    pickle.dump(history_concatnet_b.history, f)

# Save predictions
with open(os.path.join(save_dir, "predictions_concatnet_base_c.pkl"), "wb") as f:
    pickle.dump(predictions, f)

# Save evaluation metrics
with open(os.path.join(save_dir, "evaluation_concatnet_base_c.pkl"), "wb") as f:
    pickle.dump({
        "score": score_concatnet_c,
        "parameters": int(parameters_concatnet_c),
        "accuracy": float(acc_concatnet_c),
        "consistency": float(cons_concatnet_c),
        "f1_score": float(f1_concatnet_c)
    }, f)

# Save loss weights (alpha, beta, gamma)
with open(os.path.join(save_dir, "loss_weights_concatnet_base_c.pkl"), "wb") as f:
    pickle.dump({
        "alpha": float(K.get_value(alpha)),
        "beta": float(K.get_value(beta)),
        "gamma": float(K.get_value(gamma))
    }, f)

# Save meta information
with open(os.path.join(save_dir, "meta_concatnet_base_c.pkl"), "wb") as f:
    pickle.dump({
        "model_name": "concatnet_base_c",
        "bt_strategy": bt_strategy,
        "input_shape": input_shape,
        "num_classes": num_classes,
        "num_c_1": num_c_1,
        "num_c_2": num_c_2,
        "batch_size": batch_size,
        "epochs": epochs,
        "optimizer": "SGD(learning_rate=0.003, momentum=0.9, nesterov=True)",
        "loss": ["categorical_crossentropy"] * 3,
        "metrics": ["accuracy"] * 3,
        "pretrained_weights_source": WEIGHTS_PATH
    }, f)

# Zip everything into one file
zip_path = "concatnet_base_c.zip"
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
    for root, _, files_list in os.walk(save_dir):
        for file in files_list:
            full_path = os.path.join(root, file)
            arcname = os.path.relpath(full_path, os.path.dirname(save_dir))
            zipf.write(full_path, arcname)

# Download the zip file to your computer
files.download(zip_path)

In [37]:
import os
import pickle
import zipfile
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K

# Define the directory where the zip file will be extracted
extract_dir = ".\CIFAR10_models\saved_concatnet_c\concatnet_base_c"

# Load the model architecture and weights
concat_net_c_model = load_model(os.path.join(extract_dir, "concatnet_base_c.keras"))
concat_net_c_model.load_weights(os.path.join(extract_dir, "concatnet_base_c.weights.h5"))

# Load training history
with open(os.path.join(extract_dir, "history_concatnet_base_c.pkl"), "rb") as f:
    history_concatnet_c = pickle.load(f)

# Load predictions
with open(os.path.join(extract_dir, "predictions_concatnet_base_c.pkl"), "rb") as f:
    concat_net_c_predictions = pickle.load(f)

# Load evaluation metrics
with open(os.path.join(extract_dir, "evaluation_concatnet_base_c.pkl"), "rb") as f:
    evaluation_concatnet_c = pickle.load(f)
    score_concatnet_c = evaluation_concatnet_c["score"]
    parameters_concatnet_c = evaluation_concatnet_c["parameters"]
    acc_concatnet_c = evaluation_concatnet_c["accuracy"]
    consistency_concatnet_c = evaluation_concatnet_c["consistency"]
    f1_concatnet_c = evaluation_concatnet_c["f1_score"]

# Load loss weights
with open(os.path.join(extract_dir, "loss_weights_concatnet_base_c.pkl"), "rb") as f:
    loss_weights_concatnet_c = pickle.load(f)
    concat_net_c_alpha = K.variable(loss_weights_concatnet_c["alpha"])
    concat_net_c_beta = K.variable(loss_weights_concatnet_c["beta"])
    concat_net_c_gamma = K.variable(loss_weights_concatnet_c["gamma"])

# Load meta information
with open(os.path.join(extract_dir, "meta_concatnet_base_c.pkl"), "rb") as f:
    meta_concatnet_c = pickle.load(f)
    concat_net_c_model_name = meta_concatnet_c["model_name"]
    concat_net_c_bt_strategy = meta_concatnet_c["bt_strategy"]
    concat_net_c_input_shape = meta_concatnet_c["input_shape"]
    concat_net_c_num_classes = meta_concatnet_c["num_classes"]
    concat_net_c_num_c_1 = meta_concatnet_c["num_c_1"]
    concat_net_c_num_c_2 = meta_concatnet_c["num_c_2"]
    concat_net_c_batch_size = meta_concatnet_c["batch_size"]
    concat_net_c_epochs = meta_concatnet_c["epochs"]
    concat_net_c_optimizer = meta_concatnet_c["optimizer"]
    concat_net_c_loss = meta_concatnet_c["loss"]
    concat_net_c_metrics = meta_concatnet_c["metrics"]
    concat_net_c_pretrained_weights_source = meta_concatnet_c["pretrained_weights_source"]

# Now all the saved variables and objects are loaded and available for use

# Summary

In [50]:
summary = {'':['Flat CNN Base B','B-CNN Base B','BA-CNN Base B','H-CNN Base B','Add-net Base B','Concat-net Base B'],'Coarse 1': [0,score_b_cnn_b[4],score_ba_cnn_b[4],score_h_cnn_b[4],score_addnet_b[4],score_concatnet_b[4]],'Coarse 2': [0,score_b_cnn_b[5],score_ba_cnn_b[5],score_h_cnn_b[5],score_addnet_b[5],score_concatnet_b[5]],'Fine': [score_base_b[1],score_b_cnn_b[6],score_ba_cnn_b[6],score_h_cnn_b[6],score_addnet_b[6],score_concatnet_b[6]],'h_Accuracy':[0,acc_b_cnn_b,acc_ba_cnn_b,acc_h_cnn_b,acc_addnet_b,acc_concatnet_b],'h_Consistency':[0,cons_b_cnn_b,cons_ba_cnn_b,cons_h_cnn_b,cons_addnet_b,cons_concatnet_b],'F1':[0,f1_b_cnn_b,f1_ba_cnn_b,f1_h_cnn_b,f1_addnet_b,f1_concatnet_b],'Parameters': [parameters_base_b ,parameters_b_cnn_b,parameters_ba_cnn_b,parameters_h_cnn_b,parameters_addnet_b,parameters_concatnet_b]}
summary = pd.DataFrame(summary)
summary['Parameters'] = (summary['Parameters'].astype(float)/1000000).round(2).astype(str) + 'MM'
summary = summary.set_index('')
summary.style.highlight_max()

Unnamed: 0,Coarse 1,Coarse 2,Fine,h_Accuracy,h_Consistency,F1,Parameters
,,,,,,,
Flat CNN Base B,0.0,0.0,0.831,0.0,0.0,0.0,7.85MM
B-CNN Base B,0.959,0.8698,0.8425,0.7889,0.9028,0.890433,12.38MM
BA-CNN Base B,0.9673,0.8773,0.8348,0.8121,0.9542,0.893133,8.72MM
H-CNN Base B,0.9598,0.8708,0.842,0.7917,0.9085,0.890867,29.16MM
Add-net Base B,0.9581,0.8646,0.8332,0.7942,0.9287,0.8853,8.57MM
Concat-net Base B,0.9566,0.8673,0.8364,0.7927,0.9227,0.886767,12.39MM


In [55]:
summary = {'':['Flat CNN Base C','B-CNN Base C','BA-CNN Base C','H-CNN Base C','Add-net Base B','Concat-net Base B'],'Coarse 1': [0,score_b_cnn_c[4],score_ba_cnn_c[4],score_h_cnn_c[4],score_addnet_c[4],score_concatnet_c[4]],'Coarse 2': [0,score_b_cnn_c[5],score_ba_cnn_c[5],score_h_cnn_c[5],score_addnet_c[5],score_concatnet_c[5]],'Fine': [flat_cnn_score_base_c[1],score_b_cnn_c[6],score_ba_cnn_c[6],score_h_cnn_c[6],score_addnet_c[6],score_concatnet_c[6]],'h_Accuracy':[0,acc_b_cnn_c,acc_ba_cnn_c,acc_h_cnn_c,acc_addnet_c,acc_concatnet_c],'Consistency':[0,cons_b_cnn_c,cons_ba_cnn_c,cons_h_cnn_c,consistency_addnet_c,consistency_concatnet_c],'F1':[0,f1_b_cnn_c,f1_ba_cnn_c,f1_h_cnn_c,f1_addnet_c,f1_concatnet_c],'Parameters': [flat_cnn_parameters_base_c ,parameters_b_cnn_c,parameters_ba_cnn_c,parameters_h_cnn_c,parameters_addnet_c,parameters_concatnet_c]}
summary = pd.DataFrame(summary)
summary['Parameters'] = (summary['Parameters'].astype(float)/1000000).round(2).astype(str) + 'MM'
summary = summary.set_index('')
summary.style.highlight_max()

Unnamed: 0,Coarse 1,Coarse 2,Fine,h_Accuracy,Consistency,F1,Parameters
,,,,,,,
Flat CNN Base C,0.0,0.0,0.8832,0.0,0.0,0.0,39.95MM
B-CNN Base C,0.9607,0.905,0.8909,0.8281,0.8996,0.918867,49.67MM
BA-CNN Base C,0.9833,0.9234,0.8887,0.8814,0.9847,0.9318,15.81MM
H-CNN Base C,0.9587,0.9049,0.8833,0.8263,0.9053,0.915633,108.39MM
Add-net Base B,0.9596,0.8987,0.8842,0.8334,0.9184,0.914167,18.6MM
Concat-net Base B,0.957,0.8978,0.8812,0.8307,0.9229,0.912,49.69MM
