In [1]:
import tensorflow as tf
import numpy as np
import copy 
from Dataset import *
from collections import defaultdict
from numpy import linalg as LA

print(tf.__version__)

from tensorflow.python.client import device_lib
device_lib.list_local_devices()

2.2.0


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 1907810684105767984,
 name: "/device:XLA_CPU:0"
 device_type: "XLA_CPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 16820970885105611435
 physical_device_desc: "device: XLA_CPU device",
 name: "/device:XLA_GPU:0"
 device_type: "XLA_GPU"
 memory_limit: 17179869184
 locality {
 }
 incarnation: 1104621815741727784
 physical_device_desc: "device: XLA_GPU device",
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 10486821696
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 3261008697050369161
 physical_device_desc: "device: 0, name: GeForce RTX 2080 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5"]

In [2]:
l1_lambda = 0.00001  # Sparsity for L1
l2_lambda = 0.0001   # L2 lambda
gl_lambda = 0.001    # Group Lasso lambda
regular_lambda = 15.0 # 0.5 # regularization lambda

ex_k = 10       # The number of units increased in the expansion processing
loss_thr = 0.01 # Threshold of dynamic expansion
spl_thr = 0.05  # Threshold of split and duplication

class_num = 2

GL_var = [] # [var for var in tf.trainable_variables() if 'new' in var.name and ('bw' in var.name or 'tw' in var.name)]

In [3]:
class CustomModel(tf.keras.Model):
    def train_step(self, data):
        # Unpack the data. Its structure depends on your model and
        # on what you pass to `fit()`.
        x, y = data

        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)  # Forward pass
            # Compute the loss value
            # (the loss function is configured in `compile()`)
            loss = self.compiled_loss(y, y_pred, regularization_losses=self.losses)

        # Compute gradients
        trainable_vars = self.trainable_variables
        gradients = tape.gradient(loss, trainable_vars)
        # Update weights
        self.optimizer.apply_gradients(zip(gradients, trainable_vars))
        
        # Cut-off ---------------------------------------------------------------------------------------------------------
        for var in trainable_vars:
            th_t = tf.fill(tf.shape(var), l1_lambda)
            zero_t = tf.zeros(tf.shape(var))
            var_temp = var - (th_t * tf.sign(var))
            var.assign(tf.where(tf.less(tf.abs(var), th_t), zero_t, var_temp))

        # Gropu Cut-off
        for var in GL_var:
            #print("in the group cut off part!")
            g_sum = tf.sqrt(tf.reduce_sum(tf.square(var), 0))
            th_t = gl_lambda
            gw = []
            for i in range(var.get_shape()[1]):
                temp_gw = var[:, i] - (th_t * var[:, i] / g_sum[i])
                gw_gl = tf.where(tf.less(g_sum[i], th_t), tf.zeros(tf.shape(var[:, i])), temp_gw)
                gw.append(gw_gl)
            var.assign(tf.stack(gw, 1))
        # -----------------------------------------------------------------------------------------------------------------

        # Update metrics (includes the metric that tracks the loss)
        self.compiled_metrics.update_state(y, y_pred)
        # Return a dict mapping metric names to current value
        return {m.name: m.result() for m in self.metrics}

def _regular_lambda_loss(y_true, y_pred, lam, prev_w, cur_w):
    regular_terms = []
    for prev_var, cur_var in zip(prev_w, cur_w):
        regular_terms.append(tf.nn.l2_loss(cur_var-prev_var))
    loss = tf.reduce_sum(lam * tf.reduce_sum(regular_terms) + tf.reduce_sum(tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)))
    return loss

def regular_lambda_loss(lam, prev_w, cur_w):
    def _temp_loss(y_true, y_pred):
        return _regular_lambda_loss(y_true, y_pred, lam, prev_w, cur_w)
    return _temp_loss

reg_l2 = tf.keras.regularizers.l2(l=l2_lambda)

def zero_node_count(model):
    for layer in model.layers:
        if layer.trainable == False: continue
        w = layer.get_weights()
        for _w in w:
            print(f"layer \"{layer.name}\" weight shape : {_w.shape}")
            if len(_w.shape) > 1:
                cnt = 0
                for j in range(_w.shape[0]):
                    if np.count_nonzero(_w[j, :]) == 0:
                        #print(f'{j} node is zero')
                        cnt += 1
                print(f"\nnumber of zero node : {cnt}")
            print(f"number of zero weight : {np.sum(_w==0)}")
            print(f"min value of weight : {np.min(np.abs(_w))}\n")

def new_task_model(tree_model, Task_number):
    inputs = tree_model.get_layer("input_layer").input
    h = tree_model.layers[-2].output
    outputs = tf.keras.layers.Dense(class_num , activation='softmax', name = f"T{Task_number}_output")(h)

    return CustomModel(inputs, outputs)

def selective_learning(model, dataset):
    selected_prev_params = dict()   
    selected_params = dict()
    all_indices = defaultdict(list) # nonzero unis 

    selected_model = model_t2
    n_layer = len(model_t2.layers)

    # 상위 레이어 부터 골라내기
    for i in range(n_layer-1,0,-1):
        w = selected_model.layers[i].get_weights()[0]
        b = selected_model.layers[i].get_weights()[1]
        if i == n_layer-1:
            for j in range(w.shape[0]):
                if np.count_nonzero(w[j, :])!= 0:
                    all_indices['layer%d'%i].append(j)
            selected_params['layer%d/weight'%(i)] = w[np.ix_(all_indices['layer%d'%i], list(range(class_num)))]
            selected_params['layer%d/biases'%(i)] = b
        else:
            top_indices = all_indices['layer%d'%(i+1)]
            for j in range(w.shape[0]):
                if np.count_nonzero(w[j, top_indices])!= 0 or i == 1: # input과 연결된 node는 전부 쓰기 위한 i == 1
                    all_indices['layer%d'%i].append(j)

            sub_weight = w[np.ix_(all_indices['layer%d'%i], top_indices)]
            sub_biases = b[all_indices['layer%d'%(i+1)]]
            selected_params['layer%d/weight'%i] = sub_weight
            selected_params['layer%d/biases'%i] = sub_biases
            selected_prev_params['layer%d/weight'%i] = sub_weight
            selected_prev_params['layer%d/biases'%i] = sub_biases

    print(f"seleted_params ------\n")
    for i in selected_params.items():
        print(i[0], i[1].shape)

    selected_w = []
    for i in range(1,4):
        if i == 1 : h = inputs = tf.keras.Input(shape=(model.input_shape[-1],))
        else : h = tf.keras.layers.Dense(selected_params[f"layer{i}/weight"].shape[0], activation='relu', kernel_regularizer=reg_l2, bias_regularizer=reg_l2)(h)
        selected_w.append(selected_params[f"layer{i}/weight"])
        selected_w.append(selected_params[f"layer{i}/biases"])

    outputs = tf.keras.layers.Dense(class_num , activation='softmax', kernel_regularizer=reg_l2, bias_regularizer=reg_l2)(h)
    model_select = CustomModel(inputs, outputs)
    model_select.set_weights(selected_w)

    cur_w = model_select.trainable_variables
    prev_w = list(map(lambda x:x.numpy(), model_select.trainable_variables))
    model_select.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss=regular_lambda_loss(regular_lambda, prev_w, cur_w), metrics=['accuracy'])
    model_select.summary()

    images_train, labels_train, images_test, labels_test = dataset
    history = model_select.fit(images_train, labels_train, validation_data=(images_test, labels_test), epochs=10, batch_size=100, verbose=1)

    # union
    selected_w = model_select.get_weights()

    n_layer = len(model.layers)

    # seleted train weights 를 기존 모델에 결합 (상위 레이어부터 차례로 결합)
    for i in range(n_layer-1,0,-1):
        w = model.layers[i].get_weights()[0]
        b = model.layers[i].get_weights()[1]
        if i == n_layer-1:
            temp_weight = w
            temp_weight[np.ix_(all_indices['layer%d'%i], list(range(class_num)))] = selected_w[i*2-2]
            model.layers[i].set_weights([temp_weight, selected_w[i*2-1]])
        else:
            temp_weight = w
            temp_biases = b
            temp_weight[np.ix_(all_indices['layer%d'%i], all_indices['layer%d'%(i+1)])] = selected_w[i*2-2]
            temp_biases[all_indices['layer%d'%(i+1)]] = selected_w[i*2-1]
            model.layers[i].set_weights([temp_weight, temp_biases])

def network_expansion(model, ex_k, dataset):
    inputs = model.get_layer("input_layer").input

    for i, layer in enumerate(model.layers[1:-1]):
        if i == 0:
            new_h = tf.keras.layers.Dense(ex_k, activation='relu', kernel_regularizer=reg_l2, bias_regularizer=reg_l2, name = f"layer{i+1}_tw")(inputs)
            expanded_h = tf.keras.layers.Concatenate(name = f"layer{i+1}_tw_concat")([layer.output, new_h])
            new_output = layer.output
        else:
            new_h_add = tf.keras.layers.Dense(layer.output_shape[-1], activation='relu', kernel_regularizer=reg_l2, use_bias=False, name = f"layer{i+1}_bw")(new_h)
            new_output = tf.keras.layers.Add(name = f"layer{i+1}_bw_add")([new_h_add, layer(prev_output)])
            new_h = tf.keras.layers.Dense(ex_k, activation='relu', kernel_regularizer=reg_l2, bias_regularizer=reg_l2, name = f"layer{i+1}_tw")(expanded_h)
            expanded_h = tf.keras.layers.Concatenate(name = f"layer{i+1}_tw_concat")([new_output, new_h])
        prev_output = new_output
    
    new_h_add = tf.keras.layers.Dense(model.layers[-1].output_shape[-1], activation='relu', kernel_regularizer=reg_l2, use_bias=False, name = f"layer{len(model.layers)-1}_bw")(new_h)
    outputs = tf.keras.layers.Add(name = f"layer{len(model.layers)-1}_add")([new_h_add, model.layers[-1](new_output)])

    model_expansion = CustomModel(inputs, outputs)

    # cur_w = network_2.trainable_variables
    # prev_w = list(map(lambda x:x.numpy(), network_2.trainable_variables))
    model_expansion.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])#, run_eagerly=True)
    model_expansion.summary()
    #GL_var = [var for var in model_t3.trainable_variables if len(var.get_shape()) > 1] # [var for var in tf.trainable_variables() if 'new' in var.name and ('bw' in var.name or 'tw' in var.name)]

    images_train, labels_train, images_test, labels_test = dataset
    #(images_train, labels_train),(images_test, labels_test) = specific_load('emnist', add_channel = False, emnist_type = 'upper', choose = list(range(10)))
    history = model_expansion.fit(images_train, labels_train, validation_data=(images_test, labels_test), epochs=6, batch_size=100, verbose=1)

    # --------------------------------------------- 골라내기
    extended_params = dict()

    n_layers = len(model.layers)

    for i in range(n_layers-2, 0, -1): # 2~1

        bw_layer = model_expansion.get_layer(f"layer{i+1}_bw")
        tw_layer = model_expansion.get_layer(f"layer{i}_tw")

        print(bw_layer.get_weights()[0].shape, tw_layer.get_weights()[0].shape)

        if i != n_layers-2 : prev_tw_w = extended_params[f"layer{i+1}_tw"][0]
        bw_w = bw_layer.get_weights()[0]
        tw_w = tw_layer.get_weights()[0]
        tw_b = tw_layer.get_weights()[1]
        

        useless = []
        for j in range(tw_w.shape[1]):
            if np.count_nonzero(tw_w[:, j]) == 0:
                useless.append(j)

        print(f"   [*] Expanding {i}th hidden unit, {ex_k - len(useless)} unit added")
        extended_params[bw_layer.name] = [np.delete(bw_w, useless, axis = 0)]
        extended_params[tw_layer.name] = [np.delete(tw_w, useless, axis = 1), np.delete(tw_b, useless)]
        if i != n_layers-2 and len(useless) != 0:
            extended_params[f"layer{i+1}_tw"][0] = np.delete(prev_tw_w, np.array(useless) + prev_tw_w.shape[0] - ex_k, axis = 0)
            
    # --------------------------------------- cur_w
    model_cur_w = []

    for l in model.layers[1:]:
        model_cur_w.append(l.get_weights()[0])
        model_cur_w.append(l.get_weights()[1])

    for weight in model_cur_w:
        print(weight.shape)
    # --------------------------------------- new_w
    model_new_w = []

    for i, value in enumerate(model_cur_w):
        temp_w = value
        idx = i//2
        if len(value.shape) > 1:
            if f'layer{idx+1}_bw' in extended_params:
                extend_weight = extended_params[f'layer{idx+1}_bw'][0]
                temp_w = np.concatenate((temp_w ,extend_weight), axis = 0)
            if f'layer{idx+1}_tw' in extended_params:
                extend_weight = extended_params[f'layer{idx+1}_tw'][0]
                temp_w = np.concatenate((temp_w ,extend_weight), axis = 1)
        else:
            if f'layer{idx+1}_tw' in extended_params:
                extend_weight = extended_params[f'layer{idx+1}_tw'][1]
                temp_w = np.concatenate((temp_w ,extend_weight))
        model_new_w.append(temp_w)
        print(temp_w.shape)

    return model_new_w

def split_expansion(model, model_prev_w, model_new_w, dataset):
    # find the highly drifted ones and split
    unit_indices = []
    for prev, cur in zip(model_prev_w[:-2], model_new_w[:-2]):
        if len(prev.shape) == 1 : continue # bias 는 거름
        next_dim = prev.shape[1]

        indices = []
        cosims = []
        for j in range(next_dim):
            cosim = LA.norm(prev[:, j] - cur[:prev.shape[0], j])
            #print(cosim, prev[:, j], cur[:prev.shape[0], j])
            if cosim > spl_thr: # spl_thr = 0.05
                indices.append(j)
                cosims.append(cosim)
        _temp = np.argsort(cosims)[:ex_k] # ex_k = 10 , 10개까지 추가 제한
        print("   [*] split N in layer: %d / %d"%(len(_temp), len(cosims)))
        indices = np.array(indices)[_temp]
        unit_indices.append(indices)

    prev_W_split = copy.deepcopy(model_new_w)
    for i, w in enumerate(model_prev_w[:-2]):
        temp = prev_W_split[i]
        if len(w.shape) >= 2:
            temp[:w.shape[0], :w.shape[1]] = w
        else:
            temp[:w.shape[0]] = w
        prev_W_split[i] = temp

    # ------------------------------------ 
    final_weight = []

    for i in range(len(unit_indices)):
        prev_w = np.copy(prev_W_split[i*2])
        cur_w = np.copy(model_new_w[i*2])
        indices = unit_indices[i]
        next_dim = prev_w.shape[1]
        if i >= 1:
            below_dim = prev_w.shape[0]
            below_indices = unit_indices[i-1]
            bottom_p_prev_ary, bottom_p_new_ary, bottom_c_prev_ary, bottom_c_new_ary = [], [], [], []
            for j in range(below_dim):
                if j in below_indices:
                    bottom_p_prev_ary.append(prev_w[j, :])
                    bottom_p_new_ary.append(cur_w[j, :])
                    bottom_c_prev_ary.append(cur_w[j, :])
                    bottom_c_new_ary.append(cur_w[j, :])
                else:
                    bottom_p_prev_ary.append(cur_w[j, :])
                    bottom_c_prev_ary.append(cur_w[j, :])
            prev_w = np.array( bottom_p_prev_ary + bottom_p_new_ary ).astype(np.float32)
            cur_w = np.array( bottom_c_prev_ary + bottom_c_new_ary ).astype(np.float32)

        prev_ary = []
        new_ary = []
        for j in range(next_dim):
            if j in indices:
                prev_ary.append(prev_w[:, j]) 
                new_ary.append(cur_w[:, j]) # will be expanded
            else:
                prev_ary.append(cur_w[:, j])
        # fully connected, L1
        expanded_w = np.array( prev_ary + new_ary ).T.astype(np.float32)
        expanded_b = np.concatenate((prev_W_split[i*2+1], np.random.rand(len(new_ary)))).astype(np.float32)

        final_weight.append([expanded_w, expanded_b])
        print(f"expanded_w shape = {expanded_w.shape}")
        print(f"expanded_b shape = {expanded_b.shape}")
    # ----------------------------------------

    node_number_expansion = [n.shape[0]-p.shape[0] for p, n in zip(model_prev_w[1:-2:2], model_new_w[1:-2:2])]
    node_number_split = [len(i) for i in unit_indices]
    node_number_add = [e+s for e, s in zip(node_number_expansion, node_number_split)]
    print(node_number_expansion, node_number_split, node_number_add)

    inputs = model.get_layer("input_layer").input

    for i, layer in enumerate(model.layers[1:-1]):
        if i == 0:
            new_h = tf.keras.layers.Dense(node_number_add[i], activation='relu', kernel_regularizer=reg_l2, bias_regularizer=reg_l2, name = f"layer{i+1}_tw_select")(inputs)

            expanded_h = tf.keras.layers.Concatenate(name = f"layer{i+1}_tw_concat_select")([layer.output, new_h])
            new_output = layer.output
        else:
            new_h_add = tf.keras.layers.Dense(layer.output_shape[-1], activation='relu', kernel_regularizer=reg_l2, use_bias=False, name = f"layer{i+1}_bw_select")(new_h)

            new_output = tf.keras.layers.Add(name = f"layer{i+1}_bw_add_select")([new_h_add, layer(prev_output)])
            new_h = tf.keras.layers.Dense(node_number_add[i], activation='relu', kernel_regularizer=reg_l2, bias_regularizer=reg_l2, name = f"layer{i+1}_tw_select")(expanded_h)

            expanded_h = tf.keras.layers.Concatenate(name = f"layer{i+1}_tw_concat_select")([new_output, new_h])
        prev_output = new_output

    new_h_add = tf.keras.layers.Dense(model.layers[-1].output_shape[-1], activation='relu', kernel_regularizer=reg_l2, use_bias=False, name = f"layer{len(model.layers)-1}_bw_select")(new_h)

    outputs = tf.keras.layers.Add(name = f"layer{len(model.layers)-1}_add_select")([new_h_add, model.layers[-1](new_output)])

    model_final = CustomModel(inputs, outputs)

    model_final.get_layer("layer1_tw_select").set_weights([final_weight[0][0][:,-node_number_add[0]:], final_weight[0][1][-node_number_add[0]:]])
    model_final.get_layer("layer2_bw_select").set_weights([final_weight[1][0][-node_number_add[0]:,:model_prev_w[3].shape[0]]])
    model_final.get_layer("layer2_tw_select").set_weights([final_weight[1][0][:,-node_number_add[1]:], final_weight[1][1][-node_number_add[1]:]])
    temp = model_final.get_layer("layer3_bw_select").get_weights()
    temp[0][:node_number_expansion[-1],:] = model_new_w[-2][-node_number_expansion[-1]:,:]

    model_final.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])#, run_eagerly=True)
    model_final.summary()

    images_train, labels_train, images_test, labels_test = dataset
    history = model_final.fit(images_train, labels_train, validation_data=(images_test, labels_test), epochs=6, batch_size=100, verbose=1)
    # --------------------------------------------- 골라내기
    extended_params = dict()

    n_layers = len(model.layers)

    for i in range(n_layers-2, 0, -1): # 2~1

        bw_layer = model_final.get_layer(f"layer{i+1}_bw_select")
        tw_layer = model_final.get_layer(f"layer{i}_tw_select")

        print(bw_layer.get_weights()[0].shape, tw_layer.get_weights()[0].shape)

        bw_w = bw_layer.get_weights()[0]
        tw_w = tw_layer.get_weights()[0]
        tw_b = tw_layer.get_weights()[1]

        extended_params[bw_layer.name] = [bw_w]
        extended_params[tw_layer.name] = [tw_w, tw_b]

    # --------------------------------------- cur_w
    model_cur_w = []

    for l in model.layers[1:]:
        model_cur_w.append(l.get_weights()[0])
        model_cur_w.append(l.get_weights()[1])

    for weight in model_cur_w:
        print(weight.shape)
    # --------------------------------------- new_w
    model_new_w = []

    for i, value in enumerate(model_cur_w):
        temp_w = value
        idx = i//2
        if len(value.shape) > 1:
            if f'layer{idx+1}_bw_select' in extended_params:
                extend_weight = extended_params[f'layer{idx+1}_bw_select'][0]
                temp_w = np.concatenate((temp_w ,extend_weight), axis = 0)
            if f'layer{idx+1}_tw_select' in extended_params:
                extend_weight = extended_params[f'layer{idx+1}_tw_select'][0]
                temp_w = np.concatenate((temp_w ,extend_weight), axis = 1)
        else:
            if f'layer{idx+1}_tw_select' in extended_params:
                extend_weight = extended_params[f'layer{idx+1}_tw_select'][1]
                temp_w = np.concatenate((temp_w ,extend_weight))
        model_new_w.append(temp_w)
        print(temp_w.shape)

    return model_new_w

def make_models_by_weight(weight, Task_num, prev_models):
    models = []
    inputs = h = tf.keras.Input(shape=(weight[0].shape[0],), name = "input_layer")

    for w in weight[:-2:2]:
        h = tf.keras.layers.Dense(w.shape[1], activation='relu', kernel_regularizer=reg_l2, bias_regularizer=reg_l2)(h)
    
    for T in range(1, Task_num + 1):
        outputs = tf.keras.layers.Dense(class_num , activation='softmax', kernel_regularizer=reg_l2, bias_regularizer=reg_l2, name = f'T{T}_output')(h)
        new_model = CustomModel(inputs, outputs)
        new_model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        models.append(new_model)

    models[-1].set_weights(weight)

    for m, p_m in zip(models[:-1], prev_models):
        temp = p_m.layers[-1].get_weights()[0]
        temp_shape = temp.shape[0]
        extend_shape = weight[-2].shape[0]
        
        new_temp = np.concatenate((temp, np.zeros((extend_shape-temp_shape, class_num))), axis = 0)

        new_temp_weight = [new_temp, copy.deepcopy(p_m.layers[-1].get_weights()[1])]
        m.layers[-1].set_weights(new_temp_weight)

    return models

In [4]:
inputs = tf.keras.Input(shape=(784,), name = "input_layer")
h = tf.keras.layers.Dense(312, activation='relu', kernel_regularizer=reg_l2, bias_regularizer=reg_l2)(inputs)
h = tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=reg_l2, bias_regularizer=reg_l2, name = 'T1_feature')(h)
outputs = tf.keras.layers.Dense(class_num , activation='softmax', kernel_regularizer=reg_l2, bias_regularizer=reg_l2, name = 'T1_output')(h)

model_t1 = CustomModel(inputs, outputs)
model_t1.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])#, run_eagerly=True)
model_t1.summary()

Model: "custom_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     [(None, 784)]             0         
_________________________________________________________________
dense (Dense)                (None, 312)               244920    
_________________________________________________________________
T1_feature (Dense)           (None, 128)               40064     
_________________________________________________________________
T1_output (Dense)            (None, 2)                 258       
Total params: 285,242
Trainable params: 285,242
Non-trainable params: 0
_________________________________________________________________


In [5]:
#(images_train, labels_train),(images_test, labels_test) = load_mnist()
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [4,5])
_ = model_t1.fit(images_train, labels_train, validation_data=(images_test, labels_test), epochs=10, batch_size=100, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [6]:
zero_node_count(model_t1)

layer "dense" weight shape : (784, 312)

number of zero node : 218
number of zero weight : 100791
min value of weight : 0.0

layer "dense" weight shape : (312,)
number of zero weight : 21
min value of weight : 0.0

layer "T1_feature" weight shape : (312, 128)

number of zero node : 1
number of zero weight : 6695
min value of weight : 0.0

layer "T1_feature" weight shape : (128,)
number of zero weight : 18
min value of weight : 0.0

layer "T1_output" weight shape : (128, 2)

number of zero node : 6
number of zero weight : 12
min value of weight : 0.0

layer "T1_output" weight shape : (2,)
number of zero weight : 0
min value of weight : 0.0005245982902124524



In [7]:
model_t1_prev_w = copy.deepcopy(model_t1.get_weights())
model_t1.trainable = False

# cur_w = network_2.trainable_variables
# prev_w = list(map(lambda x:x.numpy(), network_2.trainable_variables))
model_t2 = new_task_model(tree_model = model_t1, Task_number = 2)
model_t2.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_t2.summary()

Model: "custom_model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     [(None, 784)]             0         
_________________________________________________________________
dense (Dense)                (None, 312)               244920    
_________________________________________________________________
T1_feature (Dense)           (None, 128)               40064     
_________________________________________________________________
T2_output (Dense)            (None, 2)                 258       
Total params: 285,242
Trainable params: 258
Non-trainable params: 284,984
_________________________________________________________________


In [8]:
#(images_train, labels_train),(images_test, labels_test) = load_fashion_mnist()
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [0,1])
_ = model_t2.fit(images_train, labels_train, validation_data=(images_test, labels_test), epochs=10, batch_size=100, verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [9]:
zero_node_count(model_t2)

layer "T2_output" weight shape : (128, 2)

number of zero node : 0
number of zero weight : 2
min value of weight : 0.0

layer "T2_output" weight shape : (2,)
number of zero weight : 0
min value of weight : 0.5772326588630676



In [10]:
selective_learning(model_t2, (images_train, labels_train, images_test, labels_test))

seleted_params ------

layer3/weight (128, 2)
layer3/biases (2,)
layer2/weight (311, 128)
layer2/biases (128,)
layer1/weight (784, 311)
layer1/biases (311,)
Model: "custom_model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 784)]             0         
_________________________________________________________________
dense_1 (Dense)              (None, 311)               244135    
_________________________________________________________________
dense_2 (Dense)              (None, 128)               39936     
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 258       
Total params: 284,329
Trainable params: 284,329
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

In [11]:
history = model_t2.evaluate(images_test,labels_test)

#(images_train, labels_train),(images_test, labels_test) = load_mnist()
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [4,5])
_ = model_t1.evaluate(images_test,labels_test)



In [12]:
if history[0] > loss_thr:
    ### distory_graph 나중에 추가할 것
    print(f"{history[0]} > {loss_thr}")
    #(images_train, labels_train),(images_test, labels_test) = load_fashion_mnist()
    (images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [0,1])
    model_new_w = network_expansion(model_t2, ex_k, (images_train, labels_train, images_test, labels_test))
else:
    model_new_w = model_t2.get_weights
    

0.012575163505971432 > 0.01
Model: "custom_model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_layer (InputLayer)        [(None, 784)]        0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 312)          244920      input_layer[0][0]                
__________________________________________________________________________________________________
layer1_tw (Dense)               (None, 10)           7850        input_layer[0][0]                
__________________________________________________________________________________________________
layer1_tw_concat (Concatenate)  (None, 322)          0           dense[0][0]                      
                                                         

In [13]:
final_weight = split_expansion(model_t2, model_t1_prev_w, model_new_w, (images_train, labels_train, images_test, labels_test))

   [*] split N in layer: 10 / 25
   [*] split N in layer: 2 / 2
expanded_w shape = (784, 332)
expanded_b shape = (332,)
expanded_w shape = (332, 140)
expanded_b shape = (140,)
[10, 10] [10, 2] [20, 12]
Model: "custom_model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_layer (InputLayer)        [(None, 784)]        0                                            
__________________________________________________________________________________________________
dense (Dense)                   (None, 312)          244920      input_layer[0][0]                
__________________________________________________________________________________________________
layer1_tw_select (Dense)        (None, 20)           15700       input_layer[0][0]                
_________________________________________________________________________________

In [14]:
new_model_t1, new_model_t2 = make_models_by_weight(weight = final_weight, Task_num = 2, prev_models = [model_t1])

_ = new_model_t2.evaluate(images_test,labels_test)
#(images_train, labels_train),(images_test, labels_test) = load_mnist()
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [4,5])
_ = new_model_t1.evaluate(images_test,labels_test)



In [15]:
model_t1_prev_w = copy.deepcopy(new_model_t1.get_weights())
new_model_t1.trainable = False
new_model_t2.trainable = False

# cur_w = network_2.trainable_variables
# prev_w = list(map(lambda x:x.numpy(), network_2.trainable_variables))
model_t3 = new_task_model(tree_model = new_model_t1, Task_number = 3)
model_t3.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_t3.summary()

#(images_train, labels_train),(images_test, labels_test) = specific_load('emnist', add_channel = False, emnist_type = 'upper', choose = list(range(10)))
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [2,3])
_ = model_t3.fit(images_train, labels_train, validation_data=(images_test, labels_test), epochs=10, batch_size=100, verbose=1)

selective_learning(model_t3, (images_train, labels_train, images_test, labels_test))

history = model_t3.evaluate(images_test,labels_test)

if history[0] > loss_thr:
    ### distory_graph 나중에 추가할 것
    print(f"{history[0]} > {loss_thr}")
    model_new_w = network_expansion(model_t3, ex_k, (images_train, labels_train, images_test, labels_test))
else:
    model_new_w = model_t3.get_weights

final_weight = split_expansion(model_t3, model_t1_prev_w, model_new_w, (images_train, labels_train, images_test, labels_test))

new_model_t1, new_model_t2, new_model_t3 = make_models_by_weight(weight = final_weight, Task_num = 3, prev_models = [new_model_t1, new_model_t2])

Model: "custom_model_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     [(None, 784)]             0         
_________________________________________________________________
dense_4 (Dense)              (None, 332)               260620    
_________________________________________________________________
dense_5 (Dense)              (None, 140)               46620     
_________________________________________________________________
T3_output (Dense)            (None, 2)                 282       
Total params: 307,522
Trainable params: 282
Non-trainable params: 307,240
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
seleted_params ------

layer3/weight (128, 2)
layer3/biases (2,)
layer2/weight (311, 128)
layer2/biases (128,)
layer1/weight (784, 

In [16]:
_ = new_model_t3.evaluate(images_test,labels_test)
#(images_train, labels_train),(images_test, labels_test) = load_fashion_mnist()
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [0,1])
_ = new_model_t2.evaluate(images_test,labels_test)
#(images_train, labels_train),(images_test, labels_test) = load_mnist()
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [4,5])
_ = new_model_t1.evaluate(images_test,labels_test)



In [24]:
temp_2 = model_t2.get_weights()
temp_2_1 = new_model_t2.get_weights()

In [25]:
layer = 1
for w1, w2 in zip(temp_2, temp_2_1):
    print("layer :", layer)
    if len(w1.shape) > 1:
        print(np.count_nonzero(w1 != w2[:w1.shape[0], :w1.shape[1]]))
    else:
        print(np.count_nonzero(w1 != w2[:w1.shape[0]]))
    layer = layer + 1

layer : 1
169568
layer : 2
305
layer : 3
36063
layer : 4
118
layer : 5
0
layer : 6
0


In [29]:
model_t1_prev_w = copy.deepcopy(new_model_t1.get_weights())
new_model_t1.trainable = False

# cur_w = network_2.trainable_variables
# prev_w = list(map(lambda x:x.numpy(), network_2.trainable_variables))
model_t4 = new_task_model(tree_model = new_model_t1, Task_number = 4)
model_t4.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_t4.summary()

#(images_train, labels_train),(images_test, labels_test) = specific_load('emnist', add_channel = False, emnist_type = 'upper', choose = list(range(10)))
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [6,7])
_ = model_t4.fit(images_train, labels_train, validation_data=(images_test, labels_test), epochs=10, batch_size=100, verbose=1)

selective_learning(model_t4, (images_train, labels_train, images_test, labels_test))

history = model_t4.evaluate(images_test,labels_test)

if history[0] > loss_thr:
    ### distory_graph 나중에 추가할 것
    print(f"{history[0]} > {loss_thr}")
    model_new_w = network_expansion(model_t4, ex_k, (images_train, labels_train, images_test, labels_test))
else:
    model_new_w = model_t3.get_weights

final_weight = split_expansion(model_t4, model_t1_prev_w, model_new_w, (images_train, labels_train, images_test, labels_test))

new_model_t1, new_model_t2, new_model_t3, new_model_t4 = make_models_by_weight(weight = final_weight, Task_num = 4, prev_models = [new_model_t1, new_model_t2, new_model_t3])

Model: "custom_model_14"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_layer (InputLayer)     [(None, 784)]             0         
_________________________________________________________________
dense_9 (Dense)              (None, 352)               276320    
_________________________________________________________________
dense_10 (Dense)             (None, 160)               56480     
_________________________________________________________________
T4_output (Dense)            (None, 2)                 322       
Total params: 333,122
Trainable params: 322
Non-trainable params: 332,800
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
seleted_params ------

layer3/weight (128, 2)
layer3/biases (2,)
layer2/weight (311, 128)
layer2/biases (128,)
layer1/weight (784,

In [30]:
_ = new_model_t4.evaluate(images_test,labels_test)
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [2,3])
_ = new_model_t3.evaluate(images_test,labels_test)
#(images_train, labels_train),(images_test, labels_test) = load_fashion_mnist()
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [0,1])
_ = new_model_t2.evaluate(images_test,labels_test)
#(images_train, labels_train),(images_test, labels_test) = load_mnist()
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [4,5])
_ = new_model_t1.evaluate(images_test,labels_test)



In [None]:
model_t1_prev_w = copy.deepcopy(new_model_t1.get_weights())
new_model_t1.trainable = False

# cur_w = network_2.trainable_variables
# prev_w = list(map(lambda x:x.numpy(), network_2.trainable_variables))
model_t5 = new_task_model(tree_model = new_model_t1, Task_number = 4)
model_t5.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_t5.summary()

#(images_train, labels_train),(images_test, labels_test) = specific_load('emnist', add_channel = False, emnist_type = 'upper', choose = list(range(10)))
(images_train, labels_train),(images_test, labels_test) = specific_load('mnist', add_channel = False, choose = [8,9])
_ = model_t5.fit(images_train, labels_train, validation_data=(images_test, labels_test), epochs=10, batch_size=100, verbose=1)

selective_learning(model_t5, (images_train, labels_train, images_test, labels_test))

history = model_t5.evaluate(images_test,labels_test)

if history[0] > loss_thr:
    ### distory_graph 나중에 추가할 것
    print(f"{history[0]} > {loss_thr}")
    model_new_w = network_expansion(model_t5, ex_k, (images_train, labels_train, images_test, labels_test))
else:
    model_new_w = model_t3.get_weights

final_weight = split_expansion(model_t5, model_t1_prev_w, model_new_w, (images_train, labels_train, images_test, labels_test))

new_model_t1, new_model_t2, new_model_t3, new_model_t4, new_model_t5 = make_models_by_weight(weight = final_weight, Task_num = 4, prev_models = [new_model_t1, new_model_t2, new_model_t3, new_model_t4])