In [1]:
import tensorflow as tf
import numpy as np

In [2]:
class Conv2DDecomp(tf.keras.layers.Layer):
    def __init__(self, filters, rank, kernel_size, strides=(1,1), 
                 padding='valid', activation=None, use_bias=True, **kwargs):
        super(Conv2DDecomp, self).__init__(**kwargs)
        self.filters = filters
        self.rank = rank
        self.kernel_size = 2*(kernel_size,) if isinstance(kernel_size, int) else kernel_size
        self.strides = 2*(strides,) if isinstance(strides, int) else strides
        self.padding = padding
        self.activation = activation
        self.use_bias = use_bias
        self.in_layer = tf.keras.layers.Conv2D(rank, kernel_size=1, activation=None, 
                                              use_bias=False)
        if self.kernel_size != (1,1) or self.strides != (1,1):
            self.conv_layer = tf.keras.layers.DepthwiseConv2D(kernel_size=kernel_size, strides=strides, 
                                                           padding=padding, activation=None, use_bias=False)
        else:
            self.conv_layer = None
        self.out_layer = tf.keras.layers.Conv2D(filters, kernel_size=1, activation=activation, 
                                              use_bias=use_bias)

    def call(self, inputs):
        if self.conv_layer:
            return self.out_layer(self.conv_layer(self.in_layer(inputs)))
        else:
            return self.out_layer(self.in_layer(inputs))
    
    def get_config(self):
        config = super(Conv2DDecomp, self).get_config()
        config.update({'filters':self.filters, 'rank':self.rank, 'kernel_size':self.kernel_size, 
                       'strides':self.strides, 'padding':self.padding, 'activation':self.activation, 
                       'use_bias':self.use_bias})
        return config
    
    
class DenseDecomp(tf.keras.layers.Layer):
    def __init__(self, units, rank, activation=None, use_bias=True, **kwargs):
        super(DenseDecomp, self).__init__(**kwargs)
        self.units = units
        self.rank = rank
        self.activation = activation
        self.use_bias = use_bias
        self.in_layer = tf.keras.layers.Dense(rank, activation=None, use_bias=False)
        self.out_layer = tf.keras.layers.Dense(units, activation=activation, use_bias=use_bias)

    def call(self, inputs):
        return self.out_layer(self.in_layer(inputs))
    
    def get_config(self):
        config = super(DenseDecomp, self).get_config()
        config.update({'units':self.units, 'rank':self.rank, 
                       'activation':self.activation, 
                       'use_bias':self.use_bias})
        return config
    

In [46]:
a = np.arange(12).reshape((3,2,2))
b = np.arange(4).reshape((2,2))
print(a)
print(b)
c = np.tensordot(a,np.arange(1,3),axes=0) #c[1,1,0] = a[:,1,1].b[0,:]
print(c)
#array([[['a[:,0,0].b[0,:]', 'a[:,0,0].b[1,:]'],
#        ['a[:,1,0].b[0,:]', 'a[:,1,0].b[1,:]']],
#       [['a[:,0,1].b[0,:]', 'a[:,0,1].b[1,:]'],
#        ['a[:,1,1].b[0,:]', 'a[:,1,1].b[1,:]']]]

[[[ 0  1]
  [ 2  3]]

 [[ 4  5]
  [ 6  7]]

 [[ 8  9]
  [10 11]]]
[[0 1]
 [2 3]]
[[[[ 0  0]
   [ 1  2]]

  [[ 2  4]
   [ 3  6]]]


 [[[ 4  8]
   [ 5 10]]

  [[ 6 12]
   [ 7 14]]]


 [[[ 8 16]
   [ 9 18]]

  [[10 20]
   [11 22]]]]


In [3]:
def norm_dist(a, b):
    return np.sum((a - b) ** 2) ** 0.5

# Keras dxdxCxN -> dxdxR CxR RxN

def CP_decompose(weight_tensor, rank): # use TPM to decompose 4d tensor
    X = weight_tensor.copy()
    U = np.empty(X.shape[:2] + (rank,), 'float32')
    V = np.empty((X.shape[2], rank), 'float32')
    W = np.empty((X.shape[3], rank), 'float32')
    for i in range(rank):
        Ui = np.random.randn(np.prod(U.shape[:2])).reshape(U.shape[:2])
        Ui /= np.sum(Ui**2) ** 0.5
        Vi = np.random.randn(V.shape[0])
        Vi /= np.sum(Vi**2) ** 0.5
        Wi = np.random.randn(W.shape[0])
        Wi /= np.sum(Wi**2) ** 0.5
        while True:
            Up, Vp, Wp = Ui.copy(), Vi.copy(), Wi.copy()
            Ui = np.tensordot(np.tensordot(X, Vi, axes=(2,0)), Wi, axes=(2,0)) # X x2 Vi x3 Wi
            Ui /= np.sum(Ui**2) ** 0.5
            Vi = np.tensordot(np.tensordot(X, Ui, axes=((0,1),(0,1))), Wi, axes=(1,0)) # X x1 Ui x3 Wi
            Vi /= np.sum(Vi**2) ** 0.5
            Wi = np.tensordot(np.tensordot(X, Ui, axes=((0,1),(0,1))), Vi, axes=(0,0)) # X x1 Ui x2 Vi
            Wi /= np.sum(Wi**2) ** 0.5
            if norm_dist(Ui,Up) < 1e-3 and norm_dist(Vi,Vp) < 1e-3 and norm_dist(Wi,Wp) < 1e-3:
                break
        d = np.dot(np.tensordot(np.tensordot(X, Ui, axes=((0,1),(0,1))), Vi, axes=(0,0)), Wi) # d 
        U[...,i] = Ui
        V[:,i] = Vi * d ** 0.5
        W[:,i] = Wi * d ** 0.5
        X -= np.tensordot(np.tensordot(Ui, Vi, axes=0), Wi, axes=0) 
    
    return U.reshape(U.shape+(1,)), V.reshape((1,1)+V.shape), W.T.reshape((1,1)+W.T.shape)

def SVD_decompose(weight_tensor, rank):
    conv = len(weight_tensor.shape) == 4
    weight_tensor = weight_tensor.squeeze()
    U, D, Q = np.linalg.svd(weight_tensor, full_matrices=False)
    D = np.sqrt(D[:rank])          
    U = U[:,:rank] * D
    Q = np.transpose(Q[:rank,:].T * D)
    if conv:
        return U.reshape((1,1)+U.shape), Q.reshape((1,1)+Q.shape)
    else:
        return U, Q
        

In [7]:
a = np.array([[1,2],[3,4]])
b = np.array([0.5, 0.1])
a * b

array([[0.5, 0.2],
       [1.5, 0.4]])

In [4]:
def Decomp_clone(layer, ranks, fullcopy=False):
    if layer.name in ranks and not isinstance(layer, (Conv2DDecomp, DenseDecomp)):
        config = layer.get_config()
        if isinstance(layer, tf.keras.layers.Conv2D):
            w_shape = layer.weights[0].shape
            name = config['name']
            kernel = config['kernel_size']
            stride = config['strides']
            pad = config['padding']
            act = config['activation']
            bias = config['use_bias']
            return Conv2DDecomp(w_shape[-1], ranks[layer.name], kernel_size=kernel, strides=stride, 
                                padding=pad, activation=act, use_bias=bias, name=name)
        elif isinstance(layer, tf.keras.layers.Dense):
            w_shape = layer.weights[0].shape
            name = config['name']
            act = config['activation']
            bias = config['use_bias']
            return DenseDecomp(w_shape[-1], ranks[layer.name], activation=act, use_bias=bias, name=name)
    elif fullcopy:
        return layer.__class__.from_config(layer.get_config())
    else:
        return layer

def CP_optimize(model, layer_ranks=5, fullcopy=False):
    ranks = layer_ranks if isinstance(layer_ranks, dict) else dict()
    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Conv2D):
            #kernel_shape = layer.weights[0].shape #TODO detectar si capa es mas rapida o no (?)
            if isinstance(layer, tf.keras.layers.DepthwiseConv2D):
                pass
            elif isinstance(layer_ranks, int):
                ranks[layer.name] = layer_ranks
    clone = tf.keras.models.clone_model(model, clone_function=lambda l: Decomp_clone(l, ranks, fullcopy))
    
    # prueba para rangos definitivos
    
    for i in range(len(clone.layers)):
        layer = model.get_layer(index=i)
        clone_layer = clone.get_layer(index=i)
        if layer.name in ranks and not isinstance(layer, (Conv2DDecomp, DenseDecomp)):
            if hasattr(clone_layer, 'conv_layer') and clone_layer.conv_layer:
                U, V, W = CP_decompose(layer.weights[0].numpy(), ranks[layer.name])
                clone_layer.conv_layer.set_weights([U])
            else:
                V, W = SVD_decompose(layer.weights[0].numpy(), ranks[layer.name])
            clone_layer.in_layer.set_weights([V])
            if layer.get_config()['use_bias']:
                b = layer.weights[1].numpy()
                clone_layer.out_layer.set_weights([W,b])
            else:
                clone_layer.out_layer.set_weights([W])
        else:
            clone_layer.set_weights(layer.get_weights())

    return clone


In [12]:
#old_model = model
model = tf.keras.models.load_model('../models/ResNet50.h5')
model = model.layers[-1]

W0305 18:51:24.957443 140364455229184 hdf5_format.py:187] No training configuration found in the save file, so the model was *not* compiled. Compile it manually.


In [15]:
c_model.compile(optimizer=tf.keras.optimizers.SGD(1e-4, momentum=0.9),
          loss='sparse_categorical_crossentropy',
          metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])
bsz = 8
ds = train_ds.batch(bsz)
_, acc = c_model.evaluate(ds, steps=50000//bsz+1) 



In [16]:
c_losses['block4_conv2'] = 0.9106 - acc

In [16]:
netin = tf.keras.Input((224,224,3), dtype='uint8')
x = tf.cast(netin, 'float32')
x = tf.nn.bias_add(x, [-123.68, -116.779, -103.939])
#x = tf.keras.applications.mobilenet_v2.preprocess_input(x)
#x = tf.keras.applications.imagenet_utils.preprocess_input(x, mode='torch')
x = dec_model(x)
cp_model = tf.keras.Model(inputs=netin, outputs=x)
cp_model.save('ResNet50_CP')

In [20]:
del c_model

In [None]:
old_model

In [122]:
test = (mid(x_test[:1,:]).numpy() - decmid(x_test[:1,:]).numpy())# chequear tensores de pesos

In [18]:
import pdb
weights = model.layers[2].weights[0].numpy()
pdb.set_trace()
A,B,C = CP_decompose(weights, 5)

--Return--
> <ipython-input-18-856b239d1747>(3)<module>()->None
-> pdb.set_trace()
(Pdb) b 4
Breakpoint 11 at <ipython-input-18-856b239d1747>:4
(Pdb) c
> <ipython-input-18-856b239d1747>(4)<module>()->None
-> A,B,C = CP_decompose(weights, 5)
(Pdb) s
--Call--
> <ipython-input-17-2c4b09038d1e>(6)CP_decompose()
-> def CP_decompose(weight_tensor, rank): # use TPM to decompose 4d tensor
(Pdb) b 32
Breakpoint 12 at <ipython-input-17-2c4b09038d1e>:32
(Pdb) c
> <ipython-input-17-2c4b09038d1e>(32)CP_decompose()
-> X -= np.tensordot(np.tensordot(Ui, Vi, axes=0), Wi, axes=0)
(Pdb) b 34
Breakpoint 13 at <ipython-input-17-2c4b09038d1e>:34
(Pdb) disable 12
Disabled breakpoint 12 at <ipython-input-17-2c4b09038d1e>:32
(Pdb) c
> <ipython-input-17-2c4b09038d1e>(34)CP_decompose()
-> return U.reshape(U.shape+(1,)), V.reshape((1,1)+V.shape), W.T.reshape((1,1)+W.T.shape)
(Pdb) np.all(Vi==V[:,4])
False
(Pdb) V.shape
(32, 5)
(Pdb) rank
5
(Pdb) Vi
array([-1.52300991e-02,  1.69562959e-02, -2.28119212e-02,  4.016

BdbQuit: 

In [6]:
import imagenet_full as imagenet

train_ds = imagenet.load_ds((224,224))
#train_ds = train_ds.map(lambda x,y: (tf.nn.bias_add(x, [-123.68, -116.779, -103.939]) ,y), 
#train_ds = train_ds.map(lambda x,y: (tf.keras.applications.mobilenet_v2.preprocess_input(x), y),
train_ds = train_ds.map(lambda x,y: (tf.keras.applications.imagenet_utils.preprocess_input(x,mode='torch'), y),
                       num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_ds = train_ds.cache()
train_ds = train_ds.shuffle(5000).repeat()

In [6]:
model = tf.keras.models.load_model('VGG16bn_CP')

In [7]:
#import pdb
#pdb.set_trace()
ranks = dict()
laynames = []
for layer in model.layers:
    if isinstance(layer, (tf.keras.layers.Conv2D, tf.keras.layers.Dense)):
        #kernel_shape = layer.weights[0].shape #TODO detectar si capa es mas rapida o no (?)
        if isinstance(layer, tf.keras.layers.DepthwiseConv2D):
            pass
        else: #isinstance(layer_ranks, int):
            ranks[layer.name] = 5
            laynames.append(layer.name)

In [None]:
#import pdb

dsize = 50000      
orig_acc = 0.9245 #0.9245 resnet 0.9046 mobnet 0.9106 vgg
#model.compile(loss='sparse_categorical_crossentropy',
#              metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])
#_, orig_acc = model.evaluate(train_ds.batch(64), steps=dsize//64+1)
c_losses = dict()
d_losses = dict()
old_model = model
#del model
#pdb.set_trace()
for i in range(len(ranks)):
    #if i<9:
    #    continue
    curr_ranks = dict()
    for j in laynames[0:i+1]:
        curr_ranks[j] = ranks[j]
    dec_model = CP_optimize(old_model, curr_ranks)
    dec_model.compile(optimizer=tf.keras.optimizers.SGD(1e-4, momentum=0.9),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])
    bsz = 64
    ds = train_ds.batch(bsz)
    dec_model.fit(ds, steps_per_epoch=dsize//bsz+1) 
    _, acc = dec_model.evaluate(ds, steps=dsize//bsz+1) 
    #hist.history['sparse_top_k_categorical_accuracy'][0]
    if isinstance(old_model.get_layer(laynames[i]), tf.keras.layers.Conv2D):
        c_losses[laynames[i]] = orig_acc - acc
    else:
        d_losses[laynames[i]] = orig_acc - acc
    del old_model
    old_model = dec_model
    del dec_model
#del old_model



In [15]:
len(c_losses)

53

In [31]:
old_model.save('vggtest.h5')

In [14]:
print(c_losses)
print(d_losses)

{'conv5_block1_1_conv': 0.4090348596572876, 'conv1_conv': 0.004223477363586414, 'conv4_block6_3_conv': 0.28155721378326415, 'conv4_block6_2_conv': 0.3194208855628967, 'conv3_block4_1_conv': 0.008379470825195301, 'conv4_block4_1_conv': 0.19028485965728759, 'conv3_block4_3_conv': 0.009438466548919666, 'conv2_block3_2_conv': -0.020652640342712414, 'conv4_block1_3_conv': 0.1938614363670349, 'conv4_block4_3_conv': 0.17547906589508055, 'conv4_block2_1_conv': 0.1751793737411499, 'conv2_block2_1_conv': -0.020732570171356213, 'conv3_block1_1_conv': -0.033020782947540295, 'conv3_block1_2_conv': -0.016316819667816174, 'conv3_block3_2_conv': -0.006446290493011486, 'conv2_block1_1_conv': -0.004508126258850109, 'conv3_block3_1_conv': -0.010282624244689953, 'conv2_block2_2_conv': -0.01823495674133302, 'conv2_block3_3_conv': -0.02902458953857423, 'conv3_block2_1_conv': 0.009278606891632069, 'conv5_block1_0_conv': 0.6709034557342529, 'conv5_block2_2_conv': 0.6786160434484482, 'conv4_block3_3_conv': 0.1

In [8]:
#cmobnet {'block_13_project': 0.8987771887332201, 'block_5_expand': 0.4155011475086212, 'block_13_expand': 0.8576966263353825, 'block_6_project': 0.8972786273807287, 'block_7_expand': 0.8827925343066454, 'block_8_expand': 0.5607217252254486, 'block_16_expand': 0.8765185568481684, 'block_12_expand': 0.8193134739995003, 'block_4_project': 0.7647058963775635, 'block_9_project': 0.3996163606643677, 'block_3_project': 0.8971787230111659, 'block_6_expand': 0.5945892035961151, 'block_10_expand': 0.5996443629264832, 'block_3_expand': 0.7847266793251038, 'block_5_project': 0.356317937374115, 'Conv1': 0.09235137701034546, 'block_2_expand': 0.8828524760901928, 'block_12_project': 0.8843710180372, 'block_2_project': 0.8937020609155297, 'block_14_project': 0.8958200272172689, 'block_16_project': 0.8960398165509105, 'block_10_project': 0.893502252176404, 'block_15_project': 0.8926430772989988, 'block_1_expand': 0.4621363580226898, 'block_7_project': 0.7913802862167358, 'expanded_conv_project': 0.898097840603441, 'block_11_expand': 0.8746003974229097, 'Conv_1': 0.8895460506901145, 'block_9_expand': 0.324248731136322, 'block_4_expand': 0.8682664781808853, 'block_14_expand': 0.8978980323299766, 'block_15_expand': 0.8879475854337215, 'block_11_project': 0.8816935885697603, 'block_1_project': 0.8962396243587136, 'block_8_project': 0.4169996976852417}
#dmobnet {'predictions': 0.880595}
c_losses = {'conv5_block1_1_conv': 0.4090348596572876, 'conv1_conv': 0.004223477363586414, 'conv4_block6_3_conv': 0.28155721378326415, 'conv4_block6_2_conv': 0.3194208855628967, 'conv3_block4_1_conv': 0.008379470825195301, 'conv4_block4_1_conv': 0.19028485965728759, 'conv3_block4_3_conv': 0.009438466548919666, 'conv2_block3_2_conv': -0.020652640342712414, 'conv4_block1_3_conv': 0.1938614363670349, 'conv4_block4_3_conv': 0.17547906589508055, 'conv4_block2_1_conv': 0.1751793737411499, 'conv2_block2_1_conv': -0.020732570171356213, 'conv3_block1_1_conv': -0.033020782947540295, 'conv3_block1_2_conv': -0.016316819667816174, 'conv3_block3_2_conv': -0.006446290493011486, 'conv2_block1_1_conv': -0.004508126258850109, 'conv3_block3_1_conv': -0.010282624244689953, 'conv2_block2_2_conv': -0.01823495674133302, 'conv2_block3_3_conv': -0.02902458953857423, 'conv3_block2_1_conv': 0.009278606891632069, 'conv5_block1_0_conv': 0.6709034557342529, 'conv5_block2_2_conv': 0.6786160434484482, 'conv4_block3_3_conv': 0.13619674396514891, 'conv4_block3_2_conv': 0.1776369924545288, 'conv3_block1_0_conv': 0.04926021051406859, 'conv3_block4_2_conv': 0.030977797031402576, 'conv2_block1_0_conv': -0.00027220296859742366, 'conv5_block3_3_conv': 0.7747437621355057, 'conv4_block5_2_conv': 0.2172989363670349, 'conv4_block4_2_conv': 0.2060297245979309, 'conv3_block3_3_conv': -0.016576576709747326, 'conv4_block6_1_conv': 0.2759426350593567, 'conv5_block1_3_conv': 0.6561975593566894, 'conv5_block3_2_conv': 0.7876513714790344, 'conv2_block1_2_conv': -0.004328299045562756, 'conv4_block5_1_conv': 0.2371198649406433, 'conv4_block1_2_conv': 0.05385584783554076, 'conv2_block3_1_conv': -0.03180192756652833, 'conv2_block2_3_conv': -0.026147354125976574, 'conv5_block3_1_conv': 0.7770615483522415, 'conv4_block2_3_conv': 0.1485648865699768, 'conv5_block2_3_conv': 0.6573364849090576, 'conv4_block3_1_conv': 0.17196245145797728, 'conv4_block1_0_conv': 0.2273292655944824, 'conv5_block1_2_conv': 0.4568289632797241, 'conv5_block2_1_conv': 0.6986568088531494, 'conv3_block1_3_conv': 0.023245226860046375, 'conv2_block1_3_conv': -0.008464324951171887, 'conv3_block2_3_conv': 0.0014860744476318244, 'conv4_block5_3_conv': 0.18253226232528685, 'conv3_block2_2_conv': 0.017870378017425526, 'conv4_block2_2_conv': 0.19238282394409179, 'conv4_block1_1_conv': 0.024723779678344715}
d_losses = {'predictions': 0.8903128209412098}
#cvgg = {'block1_conv1': -0.018951, 'block4_conv2': 0.606949, 'block4_conv1': 0.49485304293632504, 'block3_conv1': 0.10637735662460324, 'block2_conv2': 0.040078573799133266, 'block5_conv1': 0.7409071558475494, 'block5_conv2': 0.8152352580189705, 'block1_conv2': 0.0199649864196777, 'block3_conv2': 0.19688838062286373, 'block5_conv3': 0.8360919216036796, 'block3_conv3': 0.22423964080810543, 'block2_conv1': 0.026242964839935268, 'block4_conv3': 0.6989938550949096}
#dvgg = {'predictions': 0.872506, 'fc2': 0.868267, 'fc1': 0.874926}

In [10]:
dsize = 50000

In [13]:
c_total_ranks = 150 * len(c_losses)
c_total_loss = sum(c_losses.values())
d_total_ranks = 300 * len(d_losses)
d_total_loss = sum(d_losses.values())
#def_ranks = dict.fromkeys(laynames)
for i in range(len(laynames)):
    name = laynames[i] 
    if name in c_losses:
        curr_rank = max(int(150 * len(c_losses) * c_losses[name] / c_total_loss), 5)
        ranks[name] = curr_rank
        c_total_ranks -= curr_rank
    else:
        curr_rank = max(int(300 * len(d_losses) * d_losses[name] / d_total_loss), 5)
        ranks[name] = curr_rank
        d_total_ranks -= curr_rank       
while c_total_ranks > 0:
    maxscore = 0
    for i in c_losses.keys():
        score = c_losses[i] / (2 * ranks[i] + 1)
        if maxscore < score:
            maxscore = score
            max_i = i
    ranks[max_i] += 1
    c_total_ranks -= 1
while d_total_ranks > 0:
    maxscore = 0
    for i in d_losses.keys():
        score = d_losses[i] / (2 * ranks[i] + 1)
        if maxscore < score:
            maxscore = score
            max_i = i
    ranks[max_i] += 1
    d_total_ranks -= 1
    
for i in laynames: # Keras dxdxCxN -> dxdxR CxR RxN --- NCD2WoHo > R(CWiHi+D2WoHo+NWoHo) speedup
    layer = model.get_layer(i)
    R = ranks[i]
    if isinstance(layer, tf.keras.layers.Conv2D):
        _, Wi, Hi, C = layer.input_shape
        _, Wo, Ho, N = layer.output_shape
        D = layer.kernel_size[0]
        if R * (C*Wi*Hi + D**2*Wo*Ho + N*Wo*Ho) >= N*C*D**2*Wo*Ho:
            ranks.pop(i)
    else:
        _, C = layer.input_shape
        _, N = layer.output_shape
        if R >= C*N/(C+N): # K < CN/(C+N) para acelerar
            ranks.pop(i)
            
print('Ranks:\n', ranks)

old_model = model
del model
for i in range(len(ranks)):
    #if i<11:
    #    continue
    print('Stage %d/%d' % (i+1, len(ranks)), 16*'-')
    curr_ranks = dict()
    for j in laynames:
        if j in ranks:
            curr_ranks[j] = ranks[j]
            if len(curr_ranks) > i:
                break

    dec_model = CP_optimize(old_model, curr_ranks)
    del old_model
    dec_model.compile(optimizer=tf.keras.optimizers.SGD(1e-4, momentum=0.9),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])
    bsz = 64
    ds = train_ds.batch(bsz)
    dec_model.fit(ds, steps_per_epoch=dsize//bsz+1)
    #dec_model.compile(optimizer=tf.keras.optimizers.SGD(2e-4, momentum=0.9),
    #      loss='sparse_categorical_crossentropy',
    #      metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])
    bsz = 32
    #ds = train_ds.batch(bsz)
    #dec_model.fit(ds, steps_per_epoch=dsize//bsz+1, epochs=2)
    if i+1 < len(ranks):
        old_model = dec_model
        del dec_model
print('Done')

Ranks:
 {'conv2_block3_2_conv': 5, 'conv5_block1_2_conv': 360, 'conv2_block1_3_conv': 5, 'conv4_block3_3_conv': 107, 'conv3_block1_1_conv': 5, 'conv3_block1_2_conv': 5, 'conv4_block5_1_conv': 187, 'conv3_block2_3_conv': 5, 'conv2_block2_3_conv': 5, 'conv4_block4_3_conv': 138, 'conv4_block3_1_conv': 135, 'conv1_conv': 5, 'conv4_block3_2_conv': 140, 'conv3_block1_3_conv': 18, 'conv2_block2_1_conv': 5, 'conv3_block3_3_conv': 5, 'conv3_block4_3_conv': 7, 'conv4_block4_2_conv': 162, 'conv3_block4_1_conv': 6, 'conv4_block1_1_conv': 19, 'conv3_block4_2_conv': 24, 'conv4_block2_3_conv': 117, 'conv3_block3_1_conv': 5, 'conv3_block2_2_conv': 14, 'conv4_block1_3_conv': 153, 'conv4_block6_2_conv': 252, 'conv2_block3_1_conv': 5, 'conv5_block2_2_conv': 536, 'conv5_block3_2_conv': 622, 'conv2_block1_0_conv': 5, 'conv3_block2_1_conv': 7, 'conv4_block2_2_conv': 152, 'predictions': 300, 'conv4_block5_3_conv': 144, 'conv2_block3_3_conv': 5, 'conv4_block5_2_conv': 171, 'conv4_block2_1_conv': 138, 'conv2_b

In [14]:
dec_model.save('ResNet50_CP') #18:50 - 1:00

W0306 00:50:50.089661 140364455229184 deprecation.py:506] From /home/alexis/tensorflow/lib/python3.5/site-packages/tensorflow/python/ops/resource_variable_ops.py:1817: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [15]:
dec_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2DDecomp)       (None, 112, 112, 64) 644         conv1_pad[45][0]                 
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[43][0]                
___________________________________________________________________________________________

In [66]:
laynames[0:3]

['Conv1', 'expanded_conv_project', 'block_1_expand']

In [26]:
model.compile(loss='sparse_categorical_crossentropy',
              metrics=['accuracy', tf.keras.metrics.SparseTopKCategoricalAccuracy()])

In [33]:
dec_model.compile(optimizer=tf.keras.optimizers.SGD(1e-3, momentum=0.9),
              loss='sparse_categorical_crossentropy',
              metrics=[tf.keras.metrics.SparseTopKCategoricalAccuracy()])

In [41]:
bsz = 64
ds = train_ds.batch(bsz)


In [32]:
model.evaluate(ds, steps=5000//bsz) #top5 0.9245



[1.024785041809082, 0.7558093070983887, 0.9244791865348816]

In [39]:
dec_model.evaluate(x_test, y_test) #0.4873



[1.2850342988967896, 0.5472999811172485]

In [121]:
mid = tf.keras.Model(inputs=model.inputs, outputs=model.layers[2].output)
decmid = tf.keras.Model(inputs=dec_model.inputs, outputs=dec_model.layers[2].output)

In [57]:
model.summary()
dec_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
_______________________________________________________________________________

NameError: name 'dec_model' is not defined

In [83]:
dec_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
_______________________________________________________________________________

In [75]:
help(super)

Help on class super in module builtins:

class super(object)
 |  super() -> same as super(__class__, <first argument>)
 |  super(type) -> unbound super object
 |  super(type, obj) -> bound super object; requires isinstance(obj, type)
 |  super(type, type2) -> bound super object; requires issubclass(type2, type)
 |  Typical use to call a cooperative superclass method:
 |  class C(B):
 |      def meth(self, arg):
 |          super().meth(arg)
 |  This works for class methods too:
 |  class C(B):
 |      @classmethod
 |      def cmeth(cls, arg):
 |          super().cmeth(arg)
 |  
 |  Methods defined here:
 |  
 |  __get__(self, instance, owner, /)
 |      Return an attribute of instance, which is of type owner.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __init__(self, /, *args, **kwargs)
 |      Initialize self.  See help(type(self)) for accurate signature.
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new obje

In [8]:
net_in = tf.keras.layers.Input((32,32,3))
net_out = tf.keras.layers.Conv2D(32, kernel_size=5, activation='relu')(net_in)
net_out = tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu')(net_out)
net_out = tf.keras.layers.GlobalAvgPool2D()(net_out)
net_out = tf.keras.layers.Dense(10, activation='softmax')(net_out)
model = tf.keras.Model(inputs=net_in, outputs=net_out)

In [27]:
lay = model.layers[-1]
lay.output_shape

(None, 10)

In [6]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

In [5]:
lr = 0.0001

model.compile(optimizer=tf.keras.optimizers.SGD(lr, momentum=0.9),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [26]:
model.fit(x_train, y_train, batch_size=64, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fa0a24b4c18>

In [38]:
dec_model.fit(x_train, y_train, batch_size=4, epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f97ed030ba8>

In [39]:
model.layers[1].name

'conv2d_2'

In [126]:
model.save('test.h5')

In [12]:
model = tf.keras.models.load_model('../models/mobilenet-f16_trt/')

In [None]:
model.su