## CP-decomposition with tensor power method
This notebook runs through an example process of CP decomposition. In this case, we will use ResNet50. The first thing to do is to load the model and the test dataset.

In [None]:
import tensorflow as tf
import numpy as np

#mod_name = 'mobilenet'
mod_name = 'resnet50'
#mod_name = 'vgg16'
model = tf.keras.models.load_model('../models/ResNet50.h5')
model = model.layers[-1]

In [None]:
import imagenet

train_ds = imagenet.load_ds((224,224))
if mod_name == 'mobilenet':
    train_ds = train_ds.map(lambda x,y: (tf.keras.applications.mobilenet_v2.preprocess_input(x), y),
                            num_parallel_calls=tf.data.experimental.AUTOTUNE)
elif mod_name == 'resnet50':
    train_ds = train_ds.map(lambda x,y: (tf.nn.bias_add(x, [-123.68, -116.779, -103.939]) ,y),
                            num_parallel_calls=tf.data.experimental.AUTOTUNE)
elif mod_name == 'vgg16':
    train_ds = train_ds.map(lambda x,y: (tf.keras.applications.imagenet_utils.preprocess_input(x,
                                        mode='torch'), y), num_parallel_calls=tf.data.experimental.AUTOTUNE)

train_ds = train_ds.cache()
train_ds = train_ds.shuffle(5000).repeat()

This decompostion method has 2 phases: rank estimation and the actual decomposition. To estimate rank we decompose the whole network assigning a rank of 5 to all layers.

In [None]:
# setting initial rank dict for loss measurements
ranks = dict()
laynames = []
for layer in model.layers:
    if isinstance(layer, (tf.keras.layers.Conv2D, tf.keras.layers.Dense)):
        if isinstance(layer, tf.keras.layers.DepthwiseConv2D):
            pass
        else: 
            ranks[layer.name] = 5
            laynames.append(layer.name)

In [None]:
dsize = 50000      
orig_acc = 0.9245 #0.9245 resnet 0.9046 mobnet 0.9106 vgg
#model.compile(loss='sparse_categorical_crossentropy',
#              metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])
#_, orig_acc = model.evaluate(train_ds.batch(64), steps=dsize//64+1)

c_losses = dict()
d_losses = dict()
old_model = model

for i in range(len(ranks)):
    curr_ranks = dict()
    for j in laynames[0:i+1]:
        curr_ranks[j] = ranks[j]
    dec_model = CP_optimize(old_model, curr_ranks)
    dec_model.compile(optimizer=tf.keras.optimizers.SGD(1e-4, momentum=0.9),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])
    bsz = 64
    ds = train_ds.batch(bsz)
    # we measure the accuracy of the model at this stage
    dec_model.fit(ds, steps_per_epoch=dsize//bsz+1) 
    _, acc = dec_model.evaluate(ds, steps=dsize//bsz+1)  
    if isinstance(old_model.get_layer(laynames[i]), tf.keras.layers.Conv2D):
        c_losses[laynames[i]] = orig_acc - acc
    else:
        d_losses[laynames[i]] = orig_acc - acc
    del old_model
    old_model = dec_model
    del dec_model


In this notebook the accuracy of the original network and the accuracy losses for each layer are already measured to save time, but the code used is included in case you want to try again with something different. 

In [None]:
# precalculated losses for imagenet val set, use them if you want to save time

if mod_name == 'mobilenet':
    c_losses = {'block_5_expand': 0.4155, 'block_2_project': 0.8937, 'block_12_expand': 0.8193,
                'block_1_expand': 0.4621, 'block_12_project': 0.8844, 'block_4_expand': 0.8683,
                'block_13_project': 0.8988, 'block_11_expand': 0.8746, 'block_6_project': 0.8973,
                'block_3_project': 0.8972, 'block_9_expand': 0.3242, 'block_7_project': 0.7914,
                'block_10_expand': 0.5996, 'block_14_expand': 0.8979, 'block_10_project': 0.8935,
                'block_7_expand': 0.8828, 'block_14_project': 0.8958, 'block_1_project': 0.8962,
                'Conv_1': 0.8895, 'block_4_project': 0.7647, 'Conv1': 0.0924, 'block_13_expand': 0.8577,
                'block_3_expand': 0.7847, 'expanded_conv_project': 0.8981, 'block_9_project': 0.3996,
                'block_8_project': 0.4170, 'block_16_expand': 0.8765, 'block_2_expand': 0.8829, 
                'block_5_project': 0.3563, 'block_6_expand': 0.5946, 'block_15_project': 0.8926, 
                'block_8_expand': 0.5607, 'block_16_project': 0.8960, 'block_15_expand': 0.8879, 
                'block_11_project': 0.8817}
    d_losses = {'predictions': 0.8806}
elif mod_name == 'resnet50':
    c_losses = {'conv4_block1_0_conv': 0.2273, 'conv2_block1_1_conv': -0.0045, 'conv2_block2_2_conv': -0.0182,
             'conv2_block3_2_conv': -0.0207, 'conv2_block1_3_conv': -0.0085, 'conv4_block1_2_conv': 0.0539, 
             'conv5_block2_2_conv': 0.6786, 'conv3_block3_1_conv': -0.0103, 'conv4_block4_2_conv': 0.2060, 
             'conv4_block3_3_conv': 0.1362, 'conv5_block3_1_conv': 0.7771, 'conv2_block2_3_conv': -0.0261, 
             'conv3_block4_1_conv': 0.0084, 'conv4_block3_2_conv': 0.1776, 'conv2_block3_1_conv': -0.0318, 
             'conv2_block2_1_conv': -0.0207, 'conv1_conv': 0.0042, 'conv3_block3_3_conv': -0.0166, 
             'conv3_block2_2_conv': 0.0179, 'conv5_block1_0_conv': 0.6709, 'conv3_block2_1_conv': 0.0093, 
             'conv4_block1_1_conv': 0.0247, 'conv4_block5_2_conv': 0.2173, 'conv4_block4_3_conv': 0.1755, 
             'conv4_block2_3_conv': 0.1486, 'conv4_block6_3_conv': 0.2816, 'conv3_block4_2_conv': 0.0310, 
             'conv5_block3_2_conv': 0.7877, 'conv3_block3_2_conv': -0.0064, 'conv4_block2_2_conv': 0.1924, 
             'conv4_block6_1_conv': 0.2759, 'conv3_block4_3_conv': 0.0094, 'conv5_block3_3_conv': 0.7747, 
             'conv2_block3_3_conv': -0.0290, 'conv3_block1_1_conv': -0.0330, 'conv4_block4_1_conv': 0.1903, 
             'conv4_block1_3_conv': 0.1939, 'conv3_block1_3_conv': 0.0232, 'conv4_block5_1_conv': 0.2371, 
             'conv5_block2_1_conv': 0.6987, 'conv4_block5_3_conv': 0.1825, 'conv4_block6_2_conv': 0.3194, 
             'conv3_block1_0_conv': 0.0493, 'conv5_block1_3_conv': 0.6562, 'conv5_block2_3_conv': 0.6573, 
             'conv5_block1_1_conv': 0.4090, 'conv2_block1_2_conv': -0.0043, 'conv3_block2_3_conv': 0.0015, 
             'conv4_block2_1_conv': 0.1752, 'conv2_block1_0_conv': -0.0003, 'conv3_block1_2_conv': -0.0163, 
             'conv5_block1_2_conv': 0.4568, 'conv4_block3_1_conv': 0.1720}
    d_losses = {'predictions': 0.8903}
elif mod_name == 'vgg16':
    c_losses = {'block5_conv3': 0.8361, 'block1_conv1': -0.0190, 'block1_conv2': 0.0200, 
                'block5_conv2': 0.8152, 'block5_conv1': 0.7409, 'block3_conv1': 0.1064, 
                'block4_conv2': 0.6069, 'block2_conv2': 0.0401, 'block3_conv2': 0.1969, 
                'block4_conv3': 0.6990, 'block4_conv1': 0.4949, 'block3_conv3': 0.2242, 
                'block2_conv1': 0.0262}
    d_losses = {'predictions': 0.8725, 'fc2': 0.8683, 'fc1': 0.8749}


With the calculated losses, we assign a rank to each layer, proportional to the loss. Convolutional layers and FC (Dense) layers have their ranks assigned separately. For Conv layers, we give an average rank of 150 for the network, whereas the average rank for FC layers is 300, just like in the original paper. Keep in mind that later in the process we will detect and NOT decompose the layers that would be actually slowed down by the decomposition, so the average rank could be higher than the specified.

In [None]:
# rank calculation
c_total_ranks = 150 * len(c_losses)
c_total_loss = sum(c_losses.values())
d_total_ranks = 300 * len(d_losses)
d_total_loss = sum(d_losses.values())

for i in range(len(laynames)):
    name = laynames[i] 
    if name in c_losses:
        curr_rank = max(int(150 * len(c_losses) * c_losses[name] / c_total_loss), 5)
        ranks[name] = curr_rank
        c_total_ranks -= curr_rank
    else:
        curr_rank = max(int(300 * len(d_losses) * d_losses[name] / d_total_loss), 5)
        ranks[name] = curr_rank
        d_total_ranks -= curr_rank       
while c_total_ranks > 0:
    maxscore = 0 # Sainte-Lague method to proportionately assign ranks
    for i in c_losses.keys():
        score = c_losses[i] / (2 * ranks[i] + 1)
        if maxscore < score:
            maxscore = score
            max_i = i
    ranks[max_i] += 1
    c_total_ranks -= 1
while d_total_ranks > 0:
    maxscore = 0
    for i in d_losses.keys():
        score = d_losses[i] / (2 * ranks[i] + 1)
        if maxscore < score:
            maxscore = score
            max_i = i
    ranks[max_i] += 1
    d_total_ranks -= 1

# eliminate from the process layers that would be slowed down by it
for i in laynames: # Keras dxdxCxN -> dxdxR CxR RxN --- NCD2WoHo > R(CWiHi+D2WoHo+NWoHo) speedup
    layer = model.get_layer(i)
    R = ranks[i]
    if isinstance(layer, tf.keras.layers.Conv2D):
        _, Wi, Hi, C = layer.input_shape
        _, Wo, Ho, N = layer.output_shape
        D = layer.kernel_size[0]
        if R * (C*Wi*Hi + D**2*Wo*Ho + N*Wo*Ho) >= N*C*D**2*Wo*Ho:
            ranks.pop(i)
    else:
        _, C = layer.input_shape
        _, N = layer.output_shape
        if R >= C*N/(C+N): # K < CN/(C+N) to accelerate
            ranks.pop(i)

print('Ranks:\n', ranks)

Now we decompose again the network, this time with the definitive ranks, optimizing each layer one-by-one and training for a single epoch for fine-tuning.

In [None]:
# Network decomposition

old_model = model
del model
for i in range(len(ranks)):
    print('Stage %d/%d' % (i+1, len(ranks)), 16*'-')
    curr_ranks = dict()
    for j in laynames:
        if j in ranks:
            curr_ranks[j] = ranks[j]
            if len(curr_ranks) > i:
                break

    dec_model = CP_optimize(old_model, curr_ranks)
    del old_model
    dec_model.compile(optimizer=tf.keras.optimizers.SGD(1e-4, momentum=0.9),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])
    bsz = 64
    ds = train_ds.batch(bsz)
    dec_model.fit(ds, steps_per_epoch=dsize//bsz+1)
    if i+1 < len(ranks):
        old_model = dec_model
        del dec_model
print('Done')

We finally add the preprocessing layers to the model and save it.

In [None]:
dec_model.summary()

netin = keras.Input((224,224,3), dtype='uint8')
x = tf.cast(netin, 'float32')
if mod_name == 'mobilenet':
    x = keras.applications.mobilenet_v2.preprocess_input(x) 
elif mod_name == 'resnet50':
    x = tf.nn.bias_add(x, [-123.68, -116.779, -103.939])
elif mod_name == 'vgg16'
    x = keras.applications.imagenet_utils.preprocess_input(x, mode='torch')
x = dec_model(x)
dec_model = keras.Model(inputs=netin, outputs=x)
dec_model.save('%s_CP' % mod_name)