In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import time
from tqdm import tqdm

In [2]:
sns.set()
tf.enable_eager_execution()
tf.set_random_seed(1867)

In [3]:
#Prepare MNIST Data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

#将数据切片，缓冲区1000，batch64
dataset_train = tf.data.Dataset.from_tensor_slices((
    tf.cast(x_train/255, tf.float32),
    tf.cast(y_train, tf.int64)
)).shuffle(1000).batch(64)

dataset_test = tf.data.Dataset.from_tensor_slices((
    tf.cast(x_test/255, tf.float32),
    tf.cast(y_test, tf.int64)
)).batch(64)

In [4]:
#Model Definition
model_orig = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 5, activation=tf.nn.relu, input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPool2D(2, 2),
    tf.keras.layers.Conv2D(64, 5, activation=tf.nn.relu),
    tf.keras.layers.MaxPool2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1024, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.75),
    tf.keras.layers.Dense(10, use_bias=False),
])

In [5]:
def train_model(model):
    optimizer = tf.train.AdamOptimizer()
    global_step = tf.train.get_or_create_global_step()

    training_losses = []
    training_accuracy = []

    for epoch in range(10):
        epoch_loss_avg = tf.contrib.eager.metrics.Mean()
        epoch_accuracy = tf.contrib.eager.metrics.Accuracy()
        for x, y in tqdm(dataset_train, total=round(len(x_train)/64)):
            x = tf.reshape(x, [-1, 28, 28, 1])
            with tf.GradientTape() as tape:
                outputs = model(x)
                loss = tf.losses.softmax_cross_entropy(tf.one_hot(y, 10), outputs)
            grads = tape.gradient(loss, model.trainable_weights)
            optimizer.apply_gradients(zip(grads, model.trainable_weights), global_step)
            epoch_loss_avg(loss)
            epoch_accuracy(tf.argmax(outputs, axis=1, output_type=tf.int64), y)
        training_losses.append(epoch_loss_avg.result())
        training_accuracy.append(epoch_accuracy.result())

    return training_losses, training_accuracy

In [6]:
def test(model, dataset):
    epoch_loss_avg = tf.contrib.eager.metrics.Mean()
    epoch_accuracy = tf.contrib.eager.metrics.Accuracy()
    for x, y in dataset:
        x = tf.reshape(x, [-1, 28, 28, 1])
        outputs = model(x)
        loss = tf.losses.softmax_cross_entropy(tf.one_hot(y, 10), outputs)
        epoch_loss_avg(loss)
        epoch_accuracy(tf.argmax(outputs, axis=1, output_type=tf.int64), y)
    return epoch_loss_avg.result().numpy(), epoch_accuracy.result().numpy()

In [7]:
loss1, accuracies1 = train_model(model_orig)

100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:29<00:00, 31.16it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:28<00:00, 32.98it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:28<00:00, 33.02it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:28<00:00, 33.15it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:28<00:00, 32.97it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:28<00:00, 32.81it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:28<00:00, 32.84it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:28<00:00, 32.73it/s]
100%|███████████████████████████████████

In [8]:
def unit_prune(dense_model, percentile):
    conv_layer_cnt = 2
    prev_kept_columns = None
    pruned_model = tf.keras.models.Sequential()
    num_layers = len(dense_model.trainable_weights)
    filter_channel_norms = []
    filter_norms = []
    weights_thislayer_np = dense_model.trainable_weights[0].numpy()

    #conv layer
    for i_conv_layer in range(conv_layer_cnt):
        weights_nextlayer_np = dense_model.trainable_weights[i_conv_layer*2+2].numpy()
        for i_filter in range(weights_thislayer_np.shape[3]):
            for d3 in range(weights_thislayer_np.shape[2]):
                filter_channel_norms.append(np.linalg.norm(weights_thislayer_np[:, :, d3, i_filter], ord=2, axis=None))
            filter_norms.append(np.linalg.norm(filter_channel_norms, ord=2))
            filter_channel_norms.clear()
        critical_value = np.percentile(filter_norms, percentile)
        keep_mask = filter_norms >= critical_value
        weights_thislayer_np = weights_thislayer_np[:, :, :, keep_mask]
        if i_conv_layer < conv_layer_cnt-1 :
            weights_nextlayer_np = weights_nextlayer_np[:, :, keep_mask, :]
        else :
            flatten_mask = np.zeros(shape=(len(keep_mask), len(weights_nextlayer_np)//len(keep_mask)))
            flatten_mask[:] = np.array(keep_mask).reshape(-1, 1)
            flatten_mask = flatten_mask.reshape(1, -1)
            flatten_mask = flatten_mask[0]
            weights_nextlayer_np = weights_nextlayer_np[np.argwhere(flatten_mask)[:, 0], :]
        bias = dense_model.trainable_weights[i_conv_layer*2+1].numpy()[keep_mask]
        if i_conv_layer == 0 :
            new_layer = tf.keras.layers.Conv2D(weights_thislayer_np.shape[3], 5, activation=tf.nn.relu, input_shape=(28, 28, 1))
        else :
            new_layer = tf.keras.layers.Conv2D(weights_thislayer_np.shape[3], 5, activation=tf.nn.relu)
        pruned_model.add(new_layer)
        new_layer.set_weights([weights_thislayer_np, bias])
        weights_thislayer_np = weights_nextlayer_np
        filter_norms.clear()
        
        pruned_model.add(tf.keras.layers.MaxPool2D(2, 2))
    
    #flatten_layer
    new_layer = tf.keras.layers.Flatten()
    pruned_model.add(new_layer)
    
    new_layer = tf.keras.layers.Dense(1024, activation=tf.nn.relu)
    pruned_model.add(new_layer)
    new_layer.set_weights([weights_thislayer_np, dense_model.trainable_weights[5].numpy()])
    #fc layer
    pruned_model.add(tf.keras.layers.Dropout(0.75))
    new_layer = tf.keras.layers.Dense(10, use_bias=False)
    pruned_model.add(new_layer)
    new_layer.set_weights(dense_model.layers[7].get_weights())
    
    return pruned_model

In [32]:
p_model = unit_prune(model_orig, 90)

In [34]:
loss_, accu_ = test(model_ft, dataset_test)
print(loss_)
print(accu_)

0.040772115297962735
0.9875


In [17]:
def fine_tuning(pruned_model):
    optimizer = tf.train.AdamOptimizer()
    global_step = tf.train.get_or_create_global_step()

    training_losses = []
    training_accuracy = []

    for epoch in range(4):
        epoch_loss_avg = tf.contrib.eager.metrics.Mean()
        epoch_accuracy = tf.contrib.eager.metrics.Accuracy()
        for x, y in tqdm(dataset_train, total=round(len(x_train)/64)):
            with tf.GradientTape() as tape:
                x = tf.reshape(x, [-1, 28, 28, 1])
                outputs = pruned_model(x)
                loss = tf.losses.softmax_cross_entropy(tf.one_hot(y, 10), outputs)
            grads = tape.gradient(loss, pruned_model.trainable_weights)
            optimizer.apply_gradients(zip(grads, pruned_model.trainable_weights), global_step)
            epoch_loss_avg(loss)
            epoch_accuracy(tf.argmax(outputs, axis=1, output_type=tf.int64), y)
        training_losses.append(epoch_loss_avg.result())
        training_accuracy.append(epoch_accuracy.result())

    return pruned_model

In [33]:
model_ft = fine_tuning(p_model)

100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:20<00:00, 45.84it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:20<00:00, 46.62it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:20<00:00, 46.84it/s]
100%|████████████████████████████████████████████████████████████████████████████████| 938/938 [00:19<00:00, 46.98it/s]


In [None]:
print(model_ft.trainable_weights[0])