In [1]:
import numpy as np
import copy
from scipy.optimize import linear_sum_assignment

from keras.models import model_from_json
import keras
import pickle

C:\Users\isabe\Miniconda3\envs\cv\lib\site-packages\numpy\.libs\libopenblas.NOIJJG62EMASZI6NYURL6JBKM4EVBGM7.gfortran-win_amd64.dll
C:\Users\isabe\Miniconda3\envs\cv\lib\site-packages\numpy\.libs\libopenblas.XWYDX2IKJW2NMTWSFYNGFUWKQU3LYTCZ.gfortran-win_amd64.dll


## SaSD local search approach to compare (Convolutional) Neural Networks

### Get Distance-Score without changing order

In [2]:
def getWeightsArray(mod):
    weights = []
    for layer in mod.layers:
        if isinstance(layer, keras.layers.core.Dense) or isinstance(layer, keras.layers.convolutional.Conv2D):
            weights.append(np.array(layer.get_weights()[0])) 
            
    return weights

In [3]:
# gets two vectors with all weights (! and not just the connection indices) and gives back edit distance
def editDistanceSigns(a,b):
    nums = len(a) - len(np.intersect1d(np.where(b==0), np.where(a==0)))
    same = len(np.intersect1d(np.where(a<0), np.where(b<0))) + len(np.intersect1d(np.where(a>0), np.where(b>0)))
    if nums == 0:
        return 0
    return (nums-same)/nums

In [4]:
def getScoreLayers(layer1, layer2):
    
    assert layer1.shape == layer2.shape

    # k is nr of kernels/neurons
    k = layer1.shape[-1]
    
    signDist = 0
        
    # if conv layer, shape has length 4 (height, width, channels, kernels)
    if len(layer1.shape) == 4:
                
        for kernel in range(k):
            signDist += editDistanceSigns(layer1[:,:,:, kernel].flatten(), layer2[:,:,:, kernel].flatten())

    
    # a dense layer, shape is (neurons last layer, neurons this layer)
    else:

        for neuron in range(k):
            signDist += editDistanceSigns(layer1[:, neuron], layer2[:, neuron])
            
    
    return signDist/k

In [5]:
# returns distance score between two models (without interchanging order of neurons)
def getScoreModels(mod1, mod2):
    
    # get array of weights of conv and dense layers
    weightsNN1 = getWeightsArray(mod1)
    weightsNN2 = getWeightsArray(mod2)
    
    # boolean is set to True if we have a conv layer and remains as such until we reach first Dense layer
    # it recognizes this and is set to False thereafter
    firstDenseAfterConv = False
    # saves length of last conv layer before first dense
    lastConvLen = 0
            
    numLayers = len(weightsNN1)
    assert len(weightsNN2) == numLayers
    
    editDistance = np.zeros(numLayers)
    
    # for all layers:
    for k in range(numLayers):
        layerNN1 = weightsNN1[k].copy()
        layerNN2 = weightsNN2[k].copy()
                
        editDistance[k] = getScoreLayers(layerNN1, layerNN2)
        
    return editDistance

## Functions to change 2 kernels/neurons (neighbourhood)

In [6]:
def change_neurons(mod2, layer, n1, n2):
    # initialize new (empty) model
    mod2_new = getModelFromFile("../tickets/conv2.json", "../tickets/zeros.h5")
    # get weights from original one
    weights_new = mod2.get_weights()
    weights_old = mod2.get_weights()
    
   # change important weights
    # if conv layer (shape = (height, width, channel, kernel))
    if layer == 0:
        # change kernels n1 and n2 (no need to change biases, 
        # they are not counting towards SaSD and were not pruned)
        weights_new[0][:,:,:,n1] = weights_old[0][:,:,:,n2]
        weights_new[0][:,:,:,n2] = weights_old[0][:,:,:,n1]

        # change channels n1 and n2 in layer 1 (== weights[2], as weights[1] is bias for first layer)
        weights_new[2][:,:,n1,:] = weights_old[2][:,:,n2,:]
        weights_new[2][:,:,n2,:] = weights_old[2][:,:,n1,:]

    elif layer == 1:
        # change kernels n1 and n2 (no need to change biases)
        weights_new[2][:,:,:,n1] = weights_old[2][:,:,:,n2]
        weights_new[2][:,:,:,n2] = weights_old[2][:,:,:,n1]

        # change order of weights going to first dense layer (shape = (neurons_old, neurons_new))
        block_size = int(np.array(weights_old[4]).shape[0]/np.array(weights_old[2]).shape[3])
        weights_new[4][n1*block_size:n1*block_size+block_size-1, :] = weights_old[4][n2*block_size:n2*block_size+block_size-1, :]
        weights_new[4][n2*block_size:n2*block_size+block_size-1, :] = weights_old[4][n1*block_size:n1*block_size+block_size-1, :]

    # first dense layer
    elif layer == 2:
        # change neurons n1 and n2 (no need to change biases)
        weights_new[4][:,n1] = weights_old[4][:,n2]
        weights_new[4][:,n2] = weights_old[4][:,n1]

        # change order of weights going to next dense layer (shape = (neurons_old, neurons_new))
        weights_new[6][n1, :] = weights_old[6][n2, :]
        weights_new[6][n2, :] = weights_old[6][n1, :]

    # second dense layer
    elif layer == 3:
        # change neurons n1 and n2 (no need to change biases)
        weights_new[6][:,n1] = weights_old[6][:,n2]
        weights_new[6][:,n2] = weights_old[6][:,n1]

        # change order of weights going to next dense layer (shape = (neurons_old, neurons_new))
        weights_new[8][n1, :] = weights_old[8][n2, :]
        weights_new[8][n2, :] = weights_old[8][n1, :]


    mod2_new.set_weights(weights_new)
    return mod2_new

## local search (simple climbing (downwards) to begin with)

In [7]:
def SaSD_local_search(mod1, mod2):
    # start with beginning solution (the way it is)
    dist = getScoreModels(mod1, mod2)

    for i in range(100): ### change stop condition here
        
        # each few steps, print dist to see how it goes
        if i %10 == 0:
            print("i =", i, ", dist =", dist)
            
        # go to random neighbour and look if it makes score better
        # choose random layer with probability relative to neurons in layer 
        # (total of 64+64+256+256 = 640 neurons)
        # last layer (output) not possible to change
        layer = np.random.choice([0,1,2,3], p = [1,0,0,0])#[0.1, 0.1, 0.4, 0.4])
        #depending on layer, different possible nrs of neurons to choose from
        if layer < 2:
            n1 = np.random.randint(64)
            n2 = np.random.randint(64)
        else:
            n1 = np.random.randint(256)
            n2 = np.random.randint(256)
        if n1 != n2:
            mod2_new = change_neurons(mod2, layer = layer, n1 = n1, n2 = n2)
            dist_new = getScoreModels(mod1, mod2_new)
            if np.mean(dist_new) < np.mean(dist):
                dist = dist_new
                mod2 = mod2_new
    return dist

# first-layer-first SaSD (with real change of mod2!!)

In [8]:
def compareLayers(layer1, layer2):
    
    assert layer1.shape == layer2.shape

    # k is nr of kernels/neurons
    k = layer1.shape[-1]

    #print(layer1.shape)
    bottomList1 = [] 
    bottomList2 = []
    
    # if conv layer, shape has length 4 (height, width, channels, kernels)
    if len(layer1.shape) == 4:
      
        for kernel in range(k):
            bottomList1.append(layer1[:,:,:, kernel].flatten())
            bottomList2.append(layer2[:,:,:, kernel].flatten())

    # a dense layer, shape is (neurons last layer, neurons this layer)
    else:
        for j in range(layer1.shape[1]):
            bottomList1.append(layer1[:, j])
            bottomList2.append(layer2[:, j])
        
        # if last layer (output layer has 10 neurons):
        if layer1.shape[1] == 10:
            # do just compute distance, without being able to change order of output neurons
            summed_dist = 0
            for j in range(10):
                summed_dist += editDistanceSigns(bottomList1[j], bottomList2[j])
            return summed_dist/10, range(10), range(10)     

    editMatrix = np.zeros((k, k))
    for j1 in range(k):
        for j2 in range(k):
            editMatrix[j1, j2] = editDistanceSigns(bottomList1[j1], bottomList2[j2])

    row_ind, col_ind = linear_sum_assignment(editMatrix)
    minCost = editMatrix[row_ind, col_ind].sum()

    return minCost / k, row_ind, col_ind



def compareModels(mod1, mod2):
    
    # get array of weights of conv and dense layers
    weightsNN1 = getWeightsArray(mod1)
    weightsNN2 = getWeightsArray(mod2)
    
    # get weights and biases from mod2, length 10 (5 layers, weights and biases seperately)
    weightsNN2_new = mod2.get_weights()
    
    # boolean is set to True if we have a conv layer and remains as such until we reach first Dense layer
    # it recognizes this and is set to False thereafter
    firstDenseAfterConv = False
    # saves length of last conv layer before first dense
    lastConvLen = 0
            
    numLayers = len(weightsNN1)
    assert len(weightsNN2) == numLayers
    
    editDistance = np.zeros(numLayers)
    
    # for first layer: compare them and return new order of NN2-neurons/kernels 
    k = 0
    layerNN1 = weightsNN1[k].copy()
    layerNN2 = weightsNN2[k].copy()
    editDistance[k], hid_layerNN1, hid_layerNN2 = compareLayers(layerNN1, layerNN2)
        
    ### added to actually save model weights of mod2
    for j in range(np.array(weightsNN2_new[2*k]).shape[-1]):
        weightsNN2_new[2*k][:,:,:,j] = weightsNN2[k][:,:,:,hid_layerNN2[j]].copy()
    ###
    
    # if first layer is Conv, we have to make the transition when first dense layer is ahead
    if len(layerNN1.shape) == 4:
        firstDenseAfterConv = True
    
    # for all other layers:
    for k in range(1, numLayers):
        layerNN1 = weightsNN1[k].copy()
        layerNN2 = weightsNN2[k].copy()
        
        # 3 possibilities: conv layer is next, first dense layer, or other dense layers
        
        # nr 1: we are dealing with a conv layer
        if len(layerNN1.shape) == 4:
            # iterate through all channels in layer
            for j in range(weightsNN2[k].shape[2]):
                # reorder channels in kernel
                layerNN2[:,:,j,:] = weightsNN2[k][:,:,hid_layerNN2[j],:].copy()
                
                ### added to actually change weights of mod2
                weightsNN2_new[2*k][:,:,j,:] = weightsNN2[k][:,:,hid_layerNN2[j],:].copy()
                ###
                
            # save number of kernels in case it is the last conv layer
            lastConvLen = weightsNN2[k].shape[-1]
            
        # nr 2: first dense layer after having had a conv layer
        elif firstDenseAfterConv:
            # change order of first dense layer according to hid_layerNN2
            block_size = int(layerNN2.shape[0]/lastConvLen)
            for i in range(lastConvLen):
                layerNN2[i*block_size:i*block_size+block_size-1, :] = weightsNN2[k][hid_layerNN2[i]*block_size:hid_layerNN2[i]*block_size+block_size-1, :].copy()
                ### added to actually change weights of mod2
                weightsNN2_new[2*k][i*block_size:i*block_size+block_size-1, :] = weightsNN2[k][hid_layerNN2[i]*block_size:hid_layerNN2[i]*block_size+block_size-1, :].copy()
                ###
                
            firstDenseAfterConv = False
            
        # nr 3: normal dense layer after dense
        else:
            for j in range(weightsNN2[k].shape[0]):
                layerNN2[j, :] = weightsNN2[k][hid_layerNN2[j], :].copy()
                
                ### added to actually change weights of mod2
                weightsNN2_new[2*k][j,:] = weightsNN2[k][hid_layerNN2[j],:].copy()
                ###
                
        editDistance[k], hid_layerNN1, hid_layerNN2 = compareLayers(layerNN1, layerNN2)
        
        ### added to actually save model weights of mod2
        weightsNN2_copy = copy.deepcopy(weightsNN2_new[2*k])
        if k < 2:
            for j in range(np.array(weightsNN2_new[2*k]).shape[-1]):
                weightsNN2_new[2*k][:,:,:,j] = weightsNN2_copy[:,:,:,hid_layerNN2[j]].copy()
        else:
            for j in range(np.array(weightsNN2_new[2*k]).shape[-1]):
                weightsNN2_new[2*k][:,j] = weightsNN2_copy[:,hid_layerNN2[j]].copy()
                
    mod2_new = getModelFromFile("conv2.json", "zeros.h5")
    mod2_new.set_weights(weightsNN2_new)
    ###
            
        
    return editDistance, mod2_new

# Use both approaches (take greedy solution and try to get better with local search)

In [9]:
def SaSD_local_search_plus_first(mod1, mod2):
    # start with beginning solution (take first-layer-first method)
    dist, mod2 = compareModels(mod1, mod2)
    
    print("orig dist is", dist)

    for i in range(3000): # change for other stopping criterion
        # every 100 steps print momentary best dist
        if i %100 == 0:
            print(dist)
            
        # go to random neighbour and look if it makes score better
        # choose random layer with probability relative to neurons in layer 
        # (total of 64+64+256+256 = 640 neurons)
        # last layer (output) not possible to change
        layer = np.random.choice([0,1,2,3], p = [0.1, 0.1, 0.4, 0.4])
        
        #depending on layer, different possible nrs of neurons to choose from
        if layer < 2:
            n1 = np.random.randint(64)
            n2 = np.random.randint(64)
        else:
            n1 = np.random.randint(256)
            n2 = np.random.randint(256)
            
        # if its not the same neuron, change them and compute score
        if n1 != n2:
            mod2_new = change_neurons(mod2, layer = layer, n1 = n1, n2 = n2)
            dist_new = getScoreModels(mod1, mod2_new)
            # if score is better than the one before, go to this neighbour and continue from here
            if np.mean(dist_new) <= np.mean(dist):
                dist = dist_new
                mod2 = mod2_new

    return dist

## Extract saved Models

In [10]:
# function to get CNN from json and h5 files
def getModelFromFile(json_file, h5_file):
    # get model structure from json
    json = open(json_file, "r")
    loaded_json = json.read()
    json.close()
    model = model_from_json(loaded_json)
    
    # load weights in model
    model.load_weights(h5_file)
    return model

In [11]:
def is_WT(his_WT, his_orig):
    return (np.argmin(his_WT["val_loss"])<=np.argmin(his_orig["val_loss"])) and (np.min(his_WT["val_loss"])<1.02*np.min(his_orig["val_loss"]))

In [12]:
# array with WTs and with random tickets
WTs_CIFAR = []
WTs_CINIC = []
WTs_SVHN = []
randoms = []

# for each possible WT for CIFAR, add to array if it is one
for i in range(0, 20):
    # extract history
    his_orig = pickle.load(open('../tickets/WTs_CIFAR/his_orig_s0.1_nr' + str(i), "rb"))
    his_WT = pickle.load(open('../tickets/WTs_CIFAR/his_WT_s0.1_nr' + str(i), "rb"))
    # check if it is a WT (min epoch same or equal, min val_loss smaller or only 2%(?) higher)
    if is_WT(his_WT, his_orig):
        WTs_CIFAR.append(getModelFromFile("../tickets/conv2.json", "../tickets/WTs_CIFAR/WT_s0.1_nr" + str(i) + ".h5"))

#CINIC
for i in range(0, 20):
    # extract history
    his_orig = pickle.load(open('../tickets/WTs_CINIC/his_orig_s0.1_nr' + str(i), "rb"))
    his_WT = pickle.load(open('../tickets/WTs_CINIC/his_WT_s0.1_nr' + str(i), "rb"))
    # check if it is a WT (min epoch same or equal, min val_loss smaller or only 2%(?) higher)
    if is_WT(his_WT, his_orig):
        WTs_CINIC.append(getModelFromFile("../tickets/conv2.json", "../tickets/WTs_CINIC/WT_s0.1_nr" + str(i) + ".h5"))
        
#SVHN
for i in range(0, 20):
    # extract history
    his_orig = pickle.load(open('../tickets/WTs_SVHN/his_orig_s0.1_nr' + str(i), "rb"))
    his_WT = pickle.load(open('../tickets/WTs_SVHN/his_WT_s0.1_nr' + str(i), "rb"))
    # check if it is a WT (min epoch same or equal, min val_loss smaller or only 2%(?) higher)
    if is_WT(his_WT, his_orig):
        WTs_SVHN.append(getModelFromFile("../tickets/conv2.json", "../tickets/WTs_SVHN/WT_s0.1_nr" + str(i) + ".h5"))
        
        
# for each random ticket, add to array if it is not a WT
for i in range(0,20):
    # extract history
    his_orig = pickle.load(open('../tickets/random/his_orig_s0.1_nr' + str(i), "rb"))
    his_random = pickle.load(open('../tickets/random/his_random_s0.1_nr' + str(i), "rb"))
    # check if it is a WT (min epoch same or equal, min val_loss smaller or only 2%(?) higher)
    if not is_WT(his_random, his_orig):
        randoms.append(getModelFromFile("../tickets/conv2.json", "../tickets/random/random_s0.1_nr" + str(i) + ".h5"))
        

In [13]:
# now we can start comparing!
# Only compare WTs_CIFAR 0 and 1 for a test period of 100 iterations (far to few, but takes already some time)
SaSD_local_search(WTs_CIFAR[0], WTs_CIFAR[1])

i = 0 , dist = [0.91676019 0.90696023 0.97946739 0.97484517 0.9744076 ]
i = 10 , dist = [0.90596834 0.90667595 0.97946739 0.97484517 0.9744076 ]
i = 20 , dist = [0.88724792 0.90785737 0.97946739 0.97484517 0.9744076 ]
i = 30 , dist = [0.87565735 0.9078186  0.97946739 0.97484517 0.9744076 ]
i = 40 , dist = [0.8739515  0.90591506 0.97946739 0.97484517 0.9744076 ]
i = 50 , dist = [0.85855347 0.90639821 0.97946739 0.97484517 0.9744076 ]
i = 60 , dist = [0.85060135 0.90539502 0.97946739 0.97484517 0.9744076 ]
i = 70 , dist = [0.84383092 0.90454424 0.97946739 0.97484517 0.9744076 ]
i = 80 , dist = [0.83971199 0.90468196 0.97946739 0.97484517 0.9744076 ]
i = 90 , dist = [0.8368128  0.90580851 0.97946739 0.97484517 0.9744076 ]


array([0.8368128 , 0.90580851, 0.97946739, 0.97484517, 0.9744076 ])