## Extracting structural features from Winning Tickets

Load the weights of the earlier created winning tickets. Then extract (some) of the following features:
* sign distribution overall in conv1 layer
* number of weights left per kernel distribution in conv1 layer 
* distribution of weight or no weight for each position in the kernel seperatly for conv1 layer
* conditional probabilities for positive and negative weights to be neighbours in a kernel of conv1 layer.

In [1]:
# import necessary libraries

import tensorflow_datasets as tfds
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [2]:
def load_and_prep_cifar(batch_size, shuffle_size):
    # load data set
    (train_ds, test_ds), ds_info = tfds.load(name="cifar10", split=["train","test"], as_supervised=True, with_info=True)
    # tfds.show_examples(train_ds, ds_info)
    
    def prepare_cifar10_data(ds):
        #convert data from uint8 to float32
        ds = ds.map(lambda img, target: (tf.cast(img, tf.float32), target))
        #sloppy input normalization, just bringing image values from range [0, 255] to [-1, 1]
        ds = ds.map(lambda img, target: ((img/128.)-1., target))
        #create one-hot targets
        ds = ds.map(lambda img, target: (img, tf.one_hot(target, depth=10)))
        #cache this progress in memory, as there is no need to redo it; it is deterministic after all
        ds = ds.cache()
        #shuffle, batch, prefetch
        ds = ds.shuffle(shuffle_size).batch(batch_size).prefetch(2)
        #return preprocessed dataset
        return ds
    
    # prepare data
    train_dataset = train_ds.apply(prepare_cifar10_data)
    test_dataset = test_ds.apply(prepare_cifar10_data)
    
    return train_dataset, test_dataset

In [3]:
# create the model

class CNN2Model(tf.keras.Model):
    
    # basic
    def __init__(self):
        super(CNN2Model, self).__init__()
        
        # set biases to a value that is not exactly 0.0, so they don't get handled like pruned values
        self.bias_in = tf.keras.initializers.Constant(value=0.0000000001)
        
        self.conv1 = tf.keras.layers.Conv2D(filters=64, kernel_size=3,activation="relu", padding="same",kernel_initializer='glorot_uniform', bias_initializer=self.bias_in) # [batchsize,32,32,64]
        self.conv2 = tf.keras.layers.Conv2D(filters=64, kernel_size=3,activation="relu", padding="same",kernel_initializer='glorot_uniform', bias_initializer=self.bias_in) # [batchsize,32,32,64]
        self.maxpool = tf.keras.layers.MaxPooling2D(pool_size=(2, 2),strides=(2, 2),input_shape=(32, 32, 64)) # [batchsize,16,16,64]
        self.flatten = tf.keras.layers.Flatten() # [batch_size,16384]
        self.dense1 = tf.keras.layers.Dense(256, activation="relu",kernel_initializer='glorot_uniform', bias_initializer=self.bias_in) # [batch_size,256]
        self.dense2 = tf.keras.layers.Dense(256, activation="relu",kernel_initializer='glorot_uniform', bias_initializer=self.bias_in) # [batch_size,256]
        self.dense3 = tf.keras.layers.Dense(10, activation="softmax",kernel_initializer='glorot_uniform', bias_initializer=self.bias_in) # [batch_size,256]

    @tf.function
    def call(self, inputs):
        x = self.conv1(inputs)
        x = self.conv2(x)
        x = self.maxpool(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        return x
            
    
    def get_conv_weights(self):
        return self.conv1.get_weights(), self.conv2.get_weights(), self.dense3.get_weights()
        
    def set_conv_weights(self,weights_conv1, weights_conv2, weights_dense3):
        self.conv1.set_weights(weights_conv1)
        self.conv2.set_weights(weights_conv2)
        self.dense3.set_weights(weights_dense3)
        
    def get_dense_weights(self):
        return self.dense1.get_weights(), self.dense2.get_weights()
        
    def set_dense_weights(self,weights_dense1, weights_dense2):
        self.dense1.set_weights(weights_dense1)
        self.dense2.set_weights(weights_dense2)

## Extracting features:

In [4]:
# load isabels stored weights into a model
train_dataset, test_dataset = load_and_prep_cifar(batch_size=60, shuffle_size=512)
model = CNN2Model()
model(list(train_dataset)[0][0])
model.load_weights("isabels_wts/WT_s0.1_nr0.h5")

# get the pruning rate
all_weights = model.get_weights()
int_weights = []
for w in all_weights:
    int_weights.extend(w.astype(bool).astype(int).flatten())
pruned_amount = np.mean(int_weights)
print(f"The sparse model is pruned to {pruned_amount} of its original size")

# get kernel weights of the conv1 layer in a neet numpy array
conv1_weights = model.get_conv_weights()[0][0]
print("conv1 shape: ",np.shape(conv1_weights))
conv1_weights_t = np.transpose(conv1_weights, (3, 0, 1, 2))
print("conv1 transposed shape: ",conv1_weights_t.shape)

The sparse model is pruned to 0.08808055156612289 of its original size
conv1 shape:  (3, 3, 3, 64)
conv1 transposed shape:  (64, 3, 3, 3)


In [18]:
# calculate the positional probabilities of unpruned weights in a kernel

pos_probs = np.mean(conv1_weights_t.astype(bool).astype(int), axis=0)
print("Probability that the weight is not pruned for each position in 3*3*3 kernel: \n",pos_probs)

Probability that the weight is not pruned for each position in 3*3*3 kernel: 
 [[[0.265625 0.3125   0.25    ]
  [0.34375  0.328125 0.359375]
  [0.296875 0.296875 0.265625]]

 [[0.28125  0.234375 0.3125  ]
  [0.328125 0.390625 0.40625 ]
  [0.40625  0.421875 0.34375 ]]

 [[0.171875 0.1875   0.28125 ]
  [0.375    0.265625 0.359375]
  [0.34375  0.328125 0.3125  ]]]


In [19]:
# extract distribution of signs

pos_values = len(conv1_weights.flatten()[conv1_weights.flatten()>0.0])
neg_values = len(conv1_weights.flatten()[conv1_weights.flatten()<0.0])
ratio = pos_values/neg_values
print(f"There are {pos_values} positive weights and {neg_values} negative weights in the conv1 layer. Therfore the ratio is {ratio}.")

There are 361 positive weights and 181 negative weights in the conv1 layer. Therfore the ratio is 1.9944751381215469.


In [7]:
# extract distribution of number of weights per kernel


In [8]:
# do this when averaging over all winningTickets