In [1]:
import numpy as np
from copy import deepcopy
import tensorflow as tf
import tensorflow.keras as keras
import matplotlib.pyplot as plt
from tensorflow.keras import layers,regularizers,metrics,optimizers
import random
import pandas as pd
from scipy.linalg import sqrtm
import pickle
from typing import Any, Callable, Dict, List, Optional, Union
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2_as_graph
import math
import scipy.stats as st
from scipy.special import comb
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from collections import defaultdict
import itertools
import json
from collections import deque

In [2]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
config=tf.compat.v1.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.8
config.gpu_options.allow_growth=True
sess=tf.compat.v1.Session(config=config) 

In [3]:
"""This algorithm is used to evaluate the structural redundancy of VGGNet-16
and outputs the evaluation criteria of hidden layer redundancy as well as 
the entire redundancy evaluation criteria under each pruning parameter. 
Here, "Lam" refers to the pruning parameter set used in the evaluation 
algorithm, and "repeats" represents the number of times the pruning network 
is repeatedly fine-tuned."""
Lam=[1.0,0.9,0.8,0.7]
repeats=3

In [4]:
with open('data.pkl', 'rb') as f:
    [x_train,y_train,x_test,y_test]=pickle.load(f)
y_train_onehot=tf.keras.utils.to_categorical(y_train,num_classes=3)
y_test_onehot=tf.keras.utils.to_categorical(y_test,num_classes=3)

In [5]:
initial_lr = 0.1
weight_decay = 1e-4
epochs = 200
warmup_epochs = 5
batch_size = 32
image_size = 32

In [6]:
class WarmUpCosine(tf.keras.optimizers.schedules.LearningRateSchedule):
    def __init__(self, base_lr, total_steps, warmup_steps, warmup_lr=0.0):
        super().__init__()
        self.base_lr = base_lr
        self.total_steps = total_steps
        self.warmup_steps = warmup_steps
        self.warmup_lr = warmup_lr
    def __call__(self, step):
        if step is None:
            step = tf.constant(0)
        step = tf.cast(step, tf.float32)
        warmup_steps = tf.cast(self.warmup_steps, tf.float32)
        total_steps = tf.cast(self.total_steps, tf.float32)
        warmup_percent_done = step / warmup_steps
        learning_rate = tf.where(
            step < warmup_steps,
            self.warmup_lr + (self.base_lr - self.warmup_lr) * warmup_percent_done,
            self.base_lr * 0.5 * (1.0 + tf.cos(math.pi * (step - warmup_steps) / (total_steps - warmup_steps)))
        )
        return learning_rate
    def get_config(self):
        return {
            "base_lr": self.base_lr,
            "total_steps": self.total_steps,
            "warmup_steps": self.warmup_steps,
            "warmup_lr": self.warmup_lr,
        }

In [7]:
class CustomWeightDecaySGD(tf.keras.optimizers.SGD):
    def __init__(self, weight_decay, **kwargs):
        super().__init__(**kwargs)
        self.weight_decay = weight_decay
    def apply_gradients(self, grads_and_vars, name=None, experimental_aggregate_gradients=True):
        super().apply_gradients(grads_and_vars, name, experimental_aggregate_gradients)
        for grad, var in grads_and_vars:
            if ('kernel' in var.name) and ('bn' not in var.name.lower()):
                var.assign_sub(self.weight_decay * var)
    def get_config(self):
        config = super().get_config()
        config.update({
            "weight_decay": float(self.weight_decay),  # 确保是float
        })
        return config

In [8]:
class LastNSaver(tf.keras.callbacks.Callback):
    def __init__(self, n=10):
        super().__init__()
        self.n = n
        self.history = deque(maxlen=n)  # 存最近N次 (val_acc, weights)

    def on_epoch_end(self, epoch, logs=None):
        val_acc = logs.get("val_accuracy")
        if val_acc is not None:
            # 保存 (val_acc, 当前权重)
            weights = self.model.get_weights()
            self.history.append((val_acc, weights))

    def on_train_end(self, logs=None):
        # 在最后N次中选最优
        if not self.history:
            return
        best_acc, best_weights = max(self.history, key=lambda x: x[0])
        print(f" Using best val_acc={best_acc:.4f} from last {self.n} epochs")
        self.model.set_weights(best_weights)  # 恢复最佳权重

In [9]:
def load_VGG():
    model = tf.keras.models.load_model('VGG_16.h5',custom_objects={
        'CustomWeightDecaySGD': CustomWeightDecaySGD,
        'WarmUpCosine': WarmUpCosine
    })
    return model

In [10]:
model=load_VGG()

In [11]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 64)        1728      
                                                                 
 batch_normalization (BatchN  (None, 32, 32, 64)       256       
 ormalization)                                                   
                                                                 
 activation (Activation)     (None, 32, 32, 64)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 64)        36864     
                                                                 
 batch_normalization_1 (Batc  (None, 32, 32, 64)       256       
 hNormalization)                                                 
                                                                 
 activation_1 (Activation)   (None, 32, 32, 64)        0

In [12]:
def VGG_model(NN,input_shape=(32,32,3),num_class=3,d=512):
    model = keras.models.Sequential()
    model.add(layers.Conv2D(filters=NN[0], kernel_size=(3, 3), padding='same'
                            ,input_shape=input_shape,use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Conv2D(filters=NN[1], kernel_size=(3, 3), padding='same'
                            ,input_shape=input_shape,use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    
    #2
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(NN[2], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Conv2D(NN[3], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    #5
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(NN[4], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Conv2D(NN[5], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization()) 
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Conv2D(NN[6], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization()) 
    model.add(tf.keras.layers.Activation('relu'))
    #10
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(NN[7], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Conv2D(NN[8], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Conv2D(NN[9], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    #15
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Conv2D(NN[10], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Conv2D(NN[11], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Conv2D(NN[12], (3, 3), padding='same',use_bias=False))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    #
    #25
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(d))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Dense(d))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Activation('relu'))
    model.add(layers.Dense(num_class,activation='softmax'))
    return model

In [13]:
def JW(m, M):
    """
    Compute the binary MD-LP J_w_value.

    Args:
        m: 1-D tensor of shape [d], the mean of the Minkowski difference of a 
           binary classification dataset.
        M: 2-D tensor of shape [d, N], binary classification dataset Minkowski 
           difference set.
           
    Key idea:
        - Calculate the approximate solution m_weighted for the optimal weights 
          in the MD-LP.
        - Calculate the MD-LP based on the approximately optimal weights, and 
          perform a left truncation at 0.5. 
    Returns:
        Binary MD-LP value.
    """
    row_norm_sq = tf.reduce_sum(tf.square(M), axis=1)  
    reciprocal_norm = tf.where(row_norm_sq != 0,
                               tf.math.reciprocal(row_norm_sq),
                               tf.zeros_like(row_norm_sq))  
    m_weighted = m * reciprocal_norm  
    m_weighted = tf.reshape(m_weighted, [1, -1])  
    mM = tf.matmul(m_weighted, M)
    L1 = tf.reduce_sum(mM)
    L_1 = tf.reduce_sum(tf.abs(mM))
    J_w_value = tf.abs(L1) / (L_1 + 1e-8)
    J_w_value = tf.maximum(J_w_value, 0.5)
    return J_w_value
def W(X, Y, k, n_c=3):
    """
    This function is used to calculate the top k largest binary classification 
    problems MD-LP used in the multi-classification problem calculation. Here, 
    the binary classification problems are obtained by combining each pair of 
    categories of the multi-classification problem.
    Args:
        X: Tensor/array of shape [b, l, w]. Channel output.
        Y: Tensor/array of labels of shape [b]. Data labels.
        k: Number of the largest binary MD-LP to keep.
        n_c: Number of classes.
    Returns:
        JK_list: Tensor of shape [k], the top-k MD-LP.
    """
    b, l, w = X.shape
    X = tf.reshape(X, [b, l*w])   # flatten
    J_list = []
    for i, j in itertools.combinations(range(n_c), 2):
        mask_1 = tf.reshape(tf.equal(Y, i), [-1])
        mask_2 = tf.reshape(tf.equal(Y, j), [-1])
        X1 = tf.boolean_mask(X, mask_1)
        X2 = tf.boolean_mask(X, mask_2)
        n1 = tf.shape(X1)[0]
        n2 = tf.shape(X2)[0]
        # m_i = n2 * sum(X1) - n1 * sum(X2)
        m_i = tf.reduce_sum(X1, axis=0) * tf.cast(n2, tf.float32) - tf.reduce_sum(X2, axis=0) * tf.cast(n1, tf.float32)
        m_i = m_i / tf.linalg.norm(m_i + 1e-8)
        # M_i 不需要展开 (X1[:,None]-X2[None,:])，直接用 broadcast
        M_i = tf.reshape(X1[:, None, :] - X2[None, :, :], [-1, l*w])
        M_i = tf.transpose(M_i)
        J = JW(m_i, M_i)
        J_list.append(J)
    J_list = tf.stack(J_list)
    JK_list , JK_inde = tf.math.top_k(J_list,k)
    return JK_list

In [14]:
def prune_channel(x_L, y, prune_rate, nnn=3, alpha=2.5, eps=1e-8):
    """
    This function computes the structural redundancy evaluation criterion R_L
    for a convolutional hidden layer and determines the set of retained channel
    indices `channel_i_label` used by the pruning algorithm.
    
    Given the output of a convolutional layer, this function will execute:
    - Treating each channel independently and computing a multi-class MD-LP 
      via function W;
    - By applying nonlinear transformation, a TCR measure is constructed 
      to enhance the separability of MD-LP.
    - By analyzing the propensity calculation of TCR measure, an evaluation 
      criterion for evaluating the redundancy of convolutional layers is derived.
    - Based on the TCR measure, the pruning threshold is calculated and the 
      channels that remain after pruning are selected.
    
    Key Args:
    x_L (Tensor):
        Output of a convolutional hidden layer, with shape 
        [batch_size, height, width, channels].
    y (Tensor):
        Ground-truth labels corresponding to the input samples.
    prune_rate (float):
        Pruning parameter. Used to control the strictness of pruning.
    alpha (float, optional):
        LP transformation parameter. Used to enhance the separability 
        of the MD-LP close to 1.
    
    Returns:
    channel_i_label (ndarray):
        Indices of channels retained after pruning.
    R_L (float):
        Structural redundancy evaluation criterion of the layer,
    """
    a, b, d, c = x_L.shape
    jw = tf.zeros([c], dtype=tf.float32)
    alpha = tf.cast(alpha, tf.float32)
    for j in tf.range(c):
        N_tf = W(x_L[:,:,:,j], y, nnn)
        jw_j = tf.norm(N_tf) / tf.sqrt(float(nnn))
        jw_j = (tf.exp(alpha * (2*jw_j-1)) - 1.0) / (tf.exp(alpha) - 1.0)
        jw = tf.tensor_scatter_nd_update(jw, [[j]], [jw_j])
    #print(jw.numpy())
    jw_min = tf.maximum(tf.reduce_min(jw) - eps, 0.0)
    jw_max = tf.reduce_max(jw)
    me = tf.sqrt(tf.reduce_mean(tf.square(jw - jw_min)))
    jd = jw_min + prune_rate * me
    mean = tf.maximum(tf.reduce_mean(jw) - eps, 0.0)
    R_L = tf.reduce_mean(tf.sign(jw - mean))
    channel_i_label = tf.where(jw >= jd)[:,0]
    return channel_i_label.numpy(), R_L.numpy()

In [15]:
def prune_model(model,x,y,prune_rate):
    """
    Structured Channel Pruning Function Based on MD-LP (Channel-wise Pruning) 
    
    This function performs channel pruning on the VGGNet-16:
    - After each convolutional layer, based on the current layer's output x_L1 and the label y,
    the retained channel index channel_new_label is generated by the functionprune_channel;
    - Only the selected input/output channels are retained, while simultaneously pruning 
    the convolution kernels and BN parameters;
    - The pruned features continue to be the input for the next layer, achieving layer-by-layer 
    cascaded pruning. 
    
    Input:
    model : Original Keras VGGNet-16
    x : Network input samples (used for forward propagation and channel evaluation)
    y : Sample labels (used for metric calculation in prune_channel)
    prune_rate : Pruning parameter
    
    Output:
    wb_list: List of weights for each convolutional / fully connected layer after pruning
    bn_list: Parameters (gamma, beta, mean, var) for each BN layer after pruning
    channel_label: Record of the number of retained channels for each convolutional layer 
    """
    x_L=x
    wb_list=[]
    bn_list=[]
    channel_old_label=[0,1,2]
    channel_label=[]
    LLLL=0
    N=len(model.layers)
    for i in range(N):
        layer=model.layers[i]
        if "conv2d" in layer.name:
            bn_layer=model.layers[i+1]
            weight=layer.get_weights()
            gamma,beta,mean,var=bn_layer.get_weights()
            c_L=np.prod(weight[0].shape)
            # =========================
            # Prune the input channels of the current convolution based on the retained 
            # channels from the previous layer, and perform forward propagation to obtain 
            # the output features of the current layer, which will be used as the input 
            # of the layer pruning function prune_channel.
            # =========================
            weight[0]=weight[0][:,:,channel_old_label,:]
            x_L1=tf.nn.conv2d(x_L,weight[0],strides=1,padding="SAME")
            #x_L1=tf.nn.bias_add(x_L1,bias)
            x_L1=tf.nn.batch_normalization(x_L1,mean=mean,
                                          variance=var,
                                          offset=beta,
                                          scale=gamma,variance_epsilon=1e-5)
            x_L1=tf.nn.relu(x_L1)
            # =========================
            # Key Step: Channel Pruning in Convolutional Layer
            # The function "prune_channel" calculates the TCR index based on the MD-LP 
            # of each channel and compares it with the hidden layer pruning threshold, 
            # thereby obtaining the channels that will be retained after pruning. 
            # =========================
            channel_new_label,r_l=prune_channel(x_L1,y,prune_rate)
            # =========================
            # Based on the pruning results, adjust the output channels of the convolution 
            # kernel and the BN parameters, and then perform a forward pass again as the 
            # input for the next layer.
            # =========================
            weight[0]=weight[0][:,:,:,channel_new_label]
            #bias=bias[channel_new_label]
            gamma=gamma[channel_new_label]
            beta=beta[channel_new_label]
            mean=mean[channel_new_label]
            var=var[channel_new_label]
            x_L=tf.nn.conv2d(x_L,weight[0],strides=1,padding="SAME")
            #x_L=tf.nn.bias_add(x_L,bias)
            x_L=tf.nn.batch_normalization(x_L,mean=mean,
                                          variance=var,
                                          offset=beta,
                                          scale=gamma,variance_epsilon=1e-5)
            print(len(channel_new_label))
            wb_list.append(weight)
            bn_list.append([gamma,beta,mean,var])
            channel_old_label=deepcopy(channel_new_label)
            channel_label.append(len(channel_new_label))
        if "max_pooling2d" in layer.name:
            x_L=tf.nn.max_pool(x_L,ksize=[1, 2, 2, 1],strides=[1, 2, 2, 1],padding='VALID',name="pool")
        if i<(N-8) and "activation" in layer.name:
            x_L=tf.nn.relu(x_L)
        if i>(N-7) and "batch_normalization" in layer.name:
            gamma,beta,mean,var=layer.get_weights()
            bn_list.append([gamma,beta,mean,var])
        if "dense" in layer.name:
            weight,bias=layer.get_weights()
            if LLLL==0:
                weight=weight[channel_new_label]
                LLLL+=1
            wb_list.append([weight,bias])
    return wb_list,bn_list,channel_label

In [16]:
def model_pr(model,wb_list,bn_list,channel_label):
    """This function is used to construct a pruned network by using 
    the given pruned network structure and parameters."""
    t1=0
    t2=0
    model_p=VGG_model(channel_label)
    for layer in model_p.layers:
        if "conv2d" in layer.name:
            temp=layer.get_weights()
            temp[0]=wb_list[t1][0]
            #temp[1]=wb_list[t1][1]
            layer.set_weights(temp)
            t1+=1
        if "batch_normalization" in layer.name:
            temp=layer.get_weights()
            temp[0]=bn_list[t2][0]
            temp[1]=bn_list[t2][1]
            temp[2]=bn_list[t2][2]
            temp[3]=bn_list[t2][3]
            layer.set_weights(temp)
            t2+=1
        if "dense" in layer.name:
            temp=layer.get_weights()
            temp[0]=wb_list[t1][0]
            temp[1]=wb_list[t1][1]
            layer.set_weights(temp)
            t1+=1
    return model_p

In [17]:
datagen = ImageDataGenerator(
            featurewise_center=False,  # set input mean to 0 over the dataset
            samplewise_center=False,  # set each sample mean to 0
            featurewise_std_normalization=False,  # divide inputs by std of the dataset
            samplewise_std_normalization=False,  # divide each input by its std
            zca_whitening=False,  # apply ZCA whitening
            rotation_range=15,  # randomly rotate images in the range (degrees, 0 to 180)
            width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
            height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
            horizontal_flip=True,  # randomly flip images
            vertical_flip=False)  # randomly flip images
        # (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(x_train)

In [18]:
def retrain(model,x_train,y_train,x_test,y_test):
    """This function is used to fine-tune the pruned network 
    using the same method as the original network training."""
    total_steps = epochs * (x_train.shape[0] // batch_size)
    warmup_steps = warmup_epochs * (x_train.shape[0] // batch_size)
    lr_schedule = WarmUpCosine(initial_lr, total_steps, warmup_steps)
    optimizer = CustomWeightDecaySGD(weight_decay=weight_decay,learning_rate=lr_schedule,momentum=0.9,nesterov=True)
    loss_fn=tf.keras.losses.CategoricalCrossentropy()
    model.compile(optimizer=optimizer,loss=loss_fn,metrics=['accuracy'])
    saver = LastNSaver(n=20)
    model.fit(datagen.flow(x_train, y_train_onehot,batch_size=batch_size),
                            steps_per_epoch=x_train.shape[0] // batch_size,
                            epochs=epochs,
                            validation_data=(x_test, y_test_onehot),verbose=2,callbacks=[saver])

In [19]:
"""These functions are used to calculate the FLOPs 
and the number of parameters of the network."""
def conv_flops_params(layer, input_shape):
    h_in, w_in, cin = input_shape[1:]
    h_out, w_out, cout = layer.output_shape[1:]
    k_h, k_w = layer.kernel_size
    flops = h_out * w_out * cin * cout * k_h * k_w
    params = cin * cout * k_h * k_w
    if layer.use_bias:
        params += cout
    return flops, params, (h_out, w_out, cout)
def dense_flops_params(layer, input_shape):
    cin = input_shape[-1]
    cout = layer.units
    flops = cin * cout
    params = cin * cout
    if layer.use_bias:
        params += cout
    return flops, params, (cout,)
def compute_flops_params(model, input_shape=(32, 32, 3)):
    total_flops = 0
    total_params = 0
    dummy_input = tf.zeros((1, *input_shape))
    _ = model(dummy_input)
    current_shape = (1, *input_shape)
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Conv2D):
            flops, params, out_shape = conv_flops_params(layer, current_shape)
            total_flops += flops
            total_params += params
            current_shape = (1, *out_shape)
        elif isinstance(layer, tf.keras.layers.Dense):
            flops, params, out_shape = dense_flops_params(layer, current_shape)
            total_flops += flops
            total_params += params
            current_shape = (1, *out_shape)
    return total_flops, total_params

In [20]:
def model_C(model):
    """This function is used to obtain the number of channels of the network."""
    C_L=[]
    for i in range(len(model.variables)):
        tensor=model.variables[i].numpy()
        if ("conv2d" in model.variables[i].name) and ("kernel" in model.variables[i].name):
            a,b,d,c=tensor.shape
            C_L.append(c)
    return C_L

In [21]:
def R_layers(model,x,y):
    """This function is used to obtain the structural redundancy 
    criterion of each convolutional layer in the VGGNet-16 network."""
    layer_outputs =[layer.output for layer in model.layers] 
    R_L=[]
    RR_L=[]
    C_L=[]
    channel_label=[]
    for i in range(len(model.layers)):
        if "conv2d" in model.layers[i].name:
            print('start')
            weight=model.layers[i].get_weights()[0]
            c_L=np.prod(weight.shape)
            activation_model = tf.keras.models.Model(inputs=model.input,outputs=layer_outputs[i+2])
            x_L=activation_model.predict(x)
            #x_L=layer_x[i+2]
            channel_new_label,r_L=prune_channel(x_L,y,0,nnn=2)
            print('finish')
            r_L=float(r_L)
            R_L.append(r_L)
            print(r_L)
            C_L.append(c_L)
            RR_L.append(r_L)
    R_L=np.array(R_L)
    R=np.mean(R_L)
    return R,RR_L

In [22]:
P_list=[]
E_list=[]
F_list=[]
#RP_list=[]
#RRP_list=[]
C_list=[]
flops,par=compute_flops_params(model)
loss, acc = model.evaluate(x_test, y_test_onehot)
C_0=model_C(model)
print(flops)

313722368


In [23]:
SAVE_FILE = "training_VGGNet16_log.json"
def load_progress():
    if os.path.exists(SAVE_FILE):
        with open(SAVE_FILE, "r") as f:
            return json.load(f)
    return {"results": [], 
            "RR_L": [],
            "P_list": [],
            "E_list": [],
            "F_list": [],
            "C_list": [],
            "last_lam_idx": 0,
            "last_repeat": 0,
            "RL_exist": 0,
            "Cri_exist": 0}
def save_progress(progress):
    with open(SAVE_FILE, "w") as f:
        json.dump(progress, f)

In [24]:
progress = load_progress()
start_lr_idx = progress["last_lam_idx"]
start_repeat = progress["last_repeat"]
If_RL = progress["RL_exist"]

In [25]:
if If_RL == 0:
    model=load_VGG()
    R_L,RR_L=R_layers(model,x_train,y_train)
    progress["RR_L"].append(RR_L)
    progress["RL_exist"] = 1
    save_progress(progress)

start
finish
-0.25
start
finish
-0.0625
start
finish
0.046875
start
finish
0.03125
start
finish
0.03125
start
finish
0.171875
start
finish
0.171875
start
finish
0.39453125
start
finish
0.48046875
start
finish
0.73046875
start
finish
0.84375
start
finish
0.91015625
start
finish
0.94140625


In [26]:
print(progress["RR_L"])

[[-0.09375, 0.0, 0.09375, 0.09375, 0.0546875, 0.21875, 0.140625, 0.4609375, 0.61328125, 0.8125, 0.96875, 0.9765625, 0.9921875], [-0.25, -0.0625, 0.046875, 0.03125, 0.03125, 0.171875, 0.171875, 0.39453125, 0.48046875, 0.73046875, 0.84375, 0.91015625, 0.94140625]]


In [27]:
repeats=3

In [28]:
for lam_idx in range(start_lr_idx, len(Lam)):
    lam = Lam[lam_idx]
    for rep in range(start_repeat, repeats):
        print(f"\n lambda: Lam={lam}, Repeat={rep+1}/{repeats}")
        if progress["Cri_exist"] == 0:
            model=load_VGG()
            wb_list,bn_list,channel_label=prune_model(model,x_train,y_train,lam)
            model_p=model_pr(model,wb_list,bn_list,channel_label)
            flops_p,par_p=compute_flops_params(model_p)
            P_=par_p/par
            F=flops_p/flops
            C_P=model_C(model_p)
            print(flops_p,flops)
            progress["P_list"].append(P_)
            progress["F_list"].append([flops_p,F])
            progress["C_list"].append([C_P])
            progress["Cri_exist"] = 1
            save_progress(progress)
            model_p.save("VGG_16_pruned.h5")
        else:
            model_p=tf.keras.models.load_model('VGG_16_pruned.h5',custom_objects={
                'CustomWeightDecaySGD': CustomWeightDecaySGD,
                'WarmUpCosine': WarmUpCosine})
            flops_p,par_p=compute_flops_params(model_p)
            F=flops_p/flops
            print(flops_p,flops)
        retrain(model_p,x_train,y_train_onehot,x_test,y_test_onehot)
        loss_p, acc_p = model_p.evaluate(x_test, y_test_onehot)
        print(f" Finished: Lam={lam}, Repeat={rep+1}, Acc={acc_p:.4f}")
        progress["results"].append(acc_p)
        progress["last_lam_idx"] = lam_idx
        progress["last_repeat"] = rep+1
        save_progress(progress)
    progress["E_list"].append(sum(progress["results"])/(repeats*acc))
    progress["results"]=[]
    progress["Cri_exist"] = 0
    progress["last_repeat"] = 0
    progress["last_lam_idx"] = lam_idx + 1
    start_repeat=0
    save_progress(progress)

In [29]:
[0.9447,0.9443,0.9472]

[0.9447, 0.9443, 0.9472]

In [30]:
[0.9440,0.9453,0.9487]

[0.944, 0.9453, 0.9487]

In [31]:
[0.9457,0.9450,0.9503]

[0.9457, 0.945, 0.9503]

In [32]:
[0.9540,0.9490,0.9437]

[0.954, 0.949, 0.9437]