In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import time
import os
from tqdm import tqdm

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
#tf.device('/gpu:1')

sns.set()
tf.enable_eager_execution()
tf.set_random_seed(1867)

In [2]:
#Prepare MNIST Data
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()

#将数据切片，缓冲区1000，batch64
dataset_train = tf.data.Dataset.from_tensor_slices((
    tf.cast(x_train/255, tf.float32),
    tf.cast(y_train, tf.int64)
)).shuffle(1000).batch(64)

dataset_test = tf.data.Dataset.from_tensor_slices((
    tf.cast(x_test/255, tf.float32),
    tf.cast(y_test, tf.int64)
)).batch(64)

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [3]:
#Model Definition
model_orig = tf.keras.Sequential([
    tf.keras.layers.Conv2D(32, 5, activation=tf.nn.relu, input_shape=(32, 32, 3), kernel_regularizer=tf.keras.regularizers.l1(0.001)),
    tf.keras.layers.MaxPool2D(2, 2),
    tf.keras.layers.Conv2D(64, 5, activation=tf.nn.relu, kernel_regularizer=tf.keras.regularizers.l1(0.001)),
    tf.keras.layers.MaxPool2D(2, 2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(1024, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.75),
    tf.keras.layers.Dense(10, use_bias=False),
])

In [28]:
def train_model(model):
    optimizer = tf.train.AdamOptimizer()
    global_step = tf.train.get_or_create_global_step()

    training_losses = []
    training_accuracy = []

    for epoch in range(10):
        epoch_loss_avg = tf.contrib.eager.metrics.Mean()
        epoch_accuracy = tf.contrib.eager.metrics.Accuracy()
        for x, y in tqdm(dataset_train, total=round(len(x_train)/64)):
            x = tf.reshape(x, [-1, 32, 32, 3])
            y = y[:, 0]
            with tf.GradientTape() as tape:
                outputs = model(x)
                loss = tf.losses.softmax_cross_entropy(tf.one_hot(y, 10), outputs)
            grads = tape.gradient(loss, model.trainable_weights)
            optimizer.apply_gradients(zip(grads, model.trainable_weights), global_step)
            epoch_loss_avg(loss)
            epoch_accuracy(tf.argmax(outputs, axis=1, output_type=tf.int64), y)
        training_losses.append(epoch_loss_avg.result())
        training_accuracy.append(epoch_accuracy.result())

    return training_losses, training_accuracy


In [32]:
def test(model, dataset):
    epoch_loss_avg = tf.contrib.eager.metrics.Mean()
    epoch_accuracy = tf.contrib.eager.metrics.Accuracy()
    for x, y in dataset:
        x = tf.reshape(x, [-1, 32, 32, 3])
        y = y[:, 0]
        outputs = model(x)
        loss = tf.losses.softmax_cross_entropy(tf.one_hot(y, 10), outputs)
        epoch_loss_avg(loss)
        epoch_accuracy(tf.argmax(outputs, axis=1, output_type=tf.int64), y)
    return epoch_loss_avg.result().numpy(), epoch_accuracy.result().numpy()


In [33]:
#Train Model
def unit_prune(dense_model, percentile):
    conv_layer_cnt = 2
    pruned_model = tf.keras.models.Sequential()
    filter_channel_norms = []
    filter_norms = []
    weights_thislayer_np = dense_model.trainable_weights[0].numpy()

    # conv layer
    for i_conv_layer in range(conv_layer_cnt):
        weights_nextlayer_np = dense_model.trainable_weights[i_conv_layer * 2 + 2].numpy()
        for i_filter in range(weights_thislayer_np.shape[3]):
            filter_norms.append(np.mean(np.fabs(weights_thislayer_np[:, :, :, i_filter])))
        critical_value = np.percentile(filter_norms, percentile)
        keep_mask = filter_norms >= critical_value
        weights_thislayer_np = weights_thislayer_np[:, :, :, keep_mask]
        if i_conv_layer < conv_layer_cnt - 1:
            weights_nextlayer_np = weights_nextlayer_np[:, :, keep_mask, :]
        else:
            flatten_mask = np.zeros(shape=(len(keep_mask), len(weights_nextlayer_np) // len(keep_mask)))
            flatten_mask[:] = np.array(keep_mask).reshape(-1, 1)
            flatten_mask = flatten_mask.reshape(1, -1)
            flatten_mask = flatten_mask[0]
            weights_nextlayer_np = weights_nextlayer_np[np.argwhere(flatten_mask)[:, 0], :]
        bias = dense_model.trainable_weights[i_conv_layer * 2 + 1].numpy()[keep_mask]
        if i_conv_layer == 0:
            new_layer = tf.keras.layers.Conv2D(weights_thislayer_np.shape[3], 5, activation=tf.nn.relu,
                                               input_shape=(32, 32, 1))
        else:
            new_layer = tf.keras.layers.Conv2D(weights_thislayer_np.shape[3], 5, activation=tf.nn.relu)
        pruned_model.add(new_layer)
        new_layer.set_weights([weights_thislayer_np, bias])
        weights_thislayer_np = weights_nextlayer_np
        filter_norms.clear()
        pruned_model.add(tf.keras.layers.MaxPool2D(2, 2))

    # flatten_layer
    new_layer = tf.keras.layers.Flatten()
    pruned_model.add(new_layer)

    new_layer = tf.keras.layers.Dense(1024, activation=tf.nn.relu)
    pruned_model.add(new_layer)
    new_layer.set_weights([weights_thislayer_np, dense_model.trainable_weights[5].numpy()])
    # fc layer
    pruned_model.add(tf.keras.layers.Dropout(0.75))
    new_layer = tf.keras.layers.Dense(10, use_bias=False)
    pruned_model.add(new_layer)
    new_layer.set_weights(dense_model.layers[7].get_weights())

    return pruned_model

In [34]:
def fine_tuning(pruned_model, epoch_num):
    optimizer = tf.train.AdamOptimizer()
    global_step = tf.train.get_or_create_global_step()

    training_losses = []
    training_accuracy = []

    for epoch in range(epoch_num):
        epoch_loss_avg = tf.contrib.eager.metrics.Mean()
        epoch_accuracy = tf.contrib.eager.metrics.Accuracy()
        for x, y in tqdm(dataset_train, total=round(len(x_train)/64)):
            with tf.GradientTape() as tape:
                x = tf.reshape(x, [-1, 32, 32, 3])
                y = y[:, 0]
                outputs = pruned_model(x)
                loss = tf.losses.softmax_cross_entropy(tf.one_hot(y, 10), outputs)
            grads = tape.gradient(loss, pruned_model.trainable_weights)
            optimizer.apply_gradients(zip(grads, pruned_model.trainable_weights), global_step)
            epoch_loss_avg(loss)
            epoch_accuracy(tf.argmax(outputs, axis=1, output_type=tf.int64), y)
        training_losses.append(epoch_loss_avg.result())
        training_accuracy.append(epoch_accuracy.result())

    return pruned_model

In [36]:
train_model(model_orig)

782it [00:07, 103.73it/s]                         
782it [00:08, 91.73it/s]                          
782it [00:07, 103.24it/s]                         
782it [00:07, 103.97it/s]                         
782it [00:07, 104.23it/s]                         
782it [00:07, 100.09it/s]                         
782it [00:07, 104.31it/s]                         
782it [00:07, 103.85it/s]                         
782it [00:08, 92.38it/s]                          
782it [00:07, 103.27it/s]                         


([<tf.Tensor: id=1713754, shape=(), dtype=float64, numpy=0.1430478488652469>,
  <tf.Tensor: id=1867861, shape=(), dtype=float64, numpy=0.11510063390976386>,
  <tf.Tensor: id=2021968, shape=(), dtype=float64, numpy=0.11258356908009962>,
  <tf.Tensor: id=2176075, shape=(), dtype=float64, numpy=0.1015678774129273>,
  <tf.Tensor: id=2330182, shape=(), dtype=float64, numpy=0.09721637058995969>,
  <tf.Tensor: id=2484289, shape=(), dtype=float64, numpy=0.08750068436405334>,
  <tf.Tensor: id=2638396, shape=(), dtype=float64, numpy=0.07658848277253845>,
  <tf.Tensor: id=2792503, shape=(), dtype=float64, numpy=0.0721656753707801>,
  <tf.Tensor: id=2946610, shape=(), dtype=float64, numpy=0.07511396806203591>,
  <tf.Tensor: id=3100717, shape=(), dtype=float64, numpy=0.06678990963900037>],
 [<tf.Tensor: id=1713759, shape=(), dtype=float64, numpy=0.95076>,
  <tf.Tensor: id=1867866, shape=(), dtype=float64, numpy=0.96042>,
  <tf.Tensor: id=2021973, shape=(), dtype=float64, numpy=0.96294>,
  <tf.Tenso

In [9]:
model_pruned = unit_prune(model_orig, 90)

In [10]:
model_ft = model_pruned

In [16]:
model_ft = fine_tuning(model_ft, 1)
print(test(model_ft, dataset_test))

100%|██████████| 938/938 [00:07<00:00, 119.95it/s]


(0.048614128166201705, 0.9856)


In [19]:
model_pruned_30 = unit_prune(model_orig, 30)
model_ft_30 = fine_tuning(model_pruned_30, 1)
print(test(model_ft_30, dataset_test))

model_pruned_30 = unit_prune(model_ft_30, 30/0.7)
model_ft_30 = fine_tuning(model_pruned_30, 1)
print(test(model_ft_30, dataset_test))

model_pruned_30 = unit_prune(model_ft_30, 30/0.7/0.7)
model_ft_30 = fine_tuning(model_pruned_30, 1)
print(test(model_ft_30, dataset_test))


100%|██████████| 938/938 [00:07<00:00, 118.55it/s]
  0%|          | 2/938 [00:00<00:49, 18.90it/s]

(0.02926730891900012, 0.9887)


100%|██████████| 938/938 [00:07<00:00, 120.16it/s]
  0%|          | 2/938 [00:00<00:50, 18.68it/s]

(0.03253549118105016, 0.9882)


100%|██████████| 938/938 [00:07<00:00, 119.39it/s]


(0.04158904143478603, 0.9847)


In [18]:
model_ft_30.trainable_weights[0].shape

TensorShape([Dimension(5), Dimension(5), Dimension(1), Dimension(5)])

In [37]:
loss_orig, accu_orig = test(model_orig, dataset_test)
print(loss_orig)
print(accu_orig)

2.7692928458475006
0.6656
