In [1]:
%tensorflow_version 1.12.0

`%tensorflow_version` only switches the major version: 1.x or 2.x.
You set: `1.12.0`. This will be interpreted as: `1.x`.


TensorFlow 1.x selected.


In [2]:
from keras.preprocessing.image import ImageDataGenerator as IDG
from keras.utils import to_categorical


def data_generator(X, y, num_classes, batch_size=32):
    return IDG(rescale=1. / 225, shear_range=0.2, zoom_range=0.2, horizontal_flip=True) \
        .flow(X, to_categorical(y, num_classes), batch_size=batch_size)

Using TensorFlow backend.


In [0]:
from tensorflow import keras
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras import Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Input
import numpy as np
import os
import keras.backend as K
import tensorflow as tf

class Agent:

    def __init__(self, env):

        self.state_dim = env.state_size #ex (Nl,Ml), Nl=Number of filters, Ml= filter size but in one dimension (3,3,3)==>9
        self.action_size = env.action_size #Number of filters in one layer (Nl)
        self.env=env
        #self.discount_factor = 0.99
        self.learning_rate = 0.01
        self.states, self.actions, self.rewards = [], [], []
        self.model = self._build_model()
        if os.path.exists('pruning_agent.h5'):
            self.model.load_weights('./saved_model/pruning_agent.h5')

    def _build_model(self):
        model = Sequential()
        model.add(Conv2D(32, (7, 7), activation='relu', padding="same", input_shape=(self.state_dim[0],self.state_dim[1],1)))
        model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
        model.add(Conv2D(64, (7, 7), padding="same", activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
        model.add(Conv2D(64, (7, 7), padding="same", activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
        model.add(Conv2D(64, (7, 7), padding="same", activation='relu'))
        model.add(MaxPooling2D(pool_size=(2, 2), padding="same"))
        model.add(Flatten())
        model.add(Dense(24, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='sigmoid'))
        def custom_loss(reward):
            def loss(y_true,y_pred):
                reward=K.max(y_true)
                log_like=K.log(y_pred)
                #sum_log=K.sum(log_like)
                return log_like*reward
            return loss
        model.compile(loss=custom_loss(3), optimizer=Adam(lr=self.learning_rate), metrics=['accuracy'])
        return model


In [5]:
import sys
sys.path.append('/content/Pruning Agent')
!pip install tfkerassurgeon



In [0]:
from tensorflow import keras
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.datasets import cifar10
from tensorflow.keras import Input
from tensorflow.keras.optimizers import Adam
from tfkerassurgeon.operations import delete_channels

#from utils import data_generator

import os
import math
import numpy as np

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 


class Cifar10VGG16:

    def __init__(self, b=0.5):
        (self.x_train, self.y_train), (self.x_test, self.y_test) = cifar10.load_data()
        self.model = self.__build_model() #Entry model (VGG IN THIS EXAMPLE)
        self.num_classes = 10
        self.b = b  #Defined in the paper
        self.action_size = None
        self.state_size = None
        self.epochs = 2
        self.base_model_accuracy = None
        self._current_state = 0 #entier allant de 0 à nb_filtre d'une couche donnée
        self.layer_name = None

    def __build_model(self):
        """Builds the VGG16 Model
        """
        input_shape = self.x_train.shape[1:]
        input_tensor = Input(shape=input_shape)
        model=Sequential()
        vgg = VGG16(include_top=False, input_tensor=input_tensor, weights='imagenet')
        flatten = Flatten(name='Flatten')(vgg.output)
        prediction = Dense(10, activation='softmax')(flatten)
        model = Model(input_tensor, prediction)
        model.compile(loss="binary_crossentropy", optimizer=Adam(lr=0.01), metrics=['accuracy'])
        return model


    def _accuracy_term(self, new_model):
        train_data_generator = data_generator(self.x_train, self.y_train, self.num_classes)
        eval_data_generator = data_generator(self.x_test, self.y_test, self.num_classes)
        train_steps = train_data_generator.n // train_data_generator.batch_size
        validation_steps = eval_data_generator.n // eval_data_generator.batch_size
        new_model.fit_generator(generator=train_data_generator, steps_per_epoch=train_steps, epochs=self.epochs,
                                validation_data=eval_data_generator, validation_steps=validation_steps,verbose=0)

        p_hat = new_model.evaluate_generator(eval_data_generator, eval_data_generator.n, verbose=0)[1] #accuracy of the new model... used in the reward function
        print('Accuracy of the new model', p_hat)
        if not self.base_model_accuracy:
            print('Calculating the accuracy of the base line model')
            self.base_model_accuracy = self.model.evaluate_generator(eval_data_generator, eval_data_generator.n,
                                                                     verbose=1)[1]
        accuracy_term = (self.b - (self.base_model_accuracy - p_hat)) / self.b #first part of the reward function 1- (p-p*)/b
        return accuracy_term

    def _efficiency_term(self,action):
        '''
        Defined in the paper
        '''
        return math.log10(self.action_size /( self.action_size - len(action)))

    
    def get_reward(self,action):
        new_model = delete_channels(self.model, layer=self.model.get_layer(self.layer_name), channels=action)
        new_model.compile(loss="binary_crossentropy", optimizer=Adam(lr=0.01), metrics=['accuracy'])
        return self._accuracy_term(new_model) + 100*self._efficiency_term(action) +1
        #return 100*self._efficiency_term(action)

In [7]:
env = Cifar10VGG16()
env.layer_name='block1_conv1'
x = env.model.get_layer('block1_conv1').get_weights()[0]
y=x.transpose(3,0,1,2).reshape(64,27)
env.state_size=y.shape
env.action_size=y.shape[0]

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [0]:
env.x_train=env.x_train[:100,:,:,:]
env.y_train=env.y_train[:100,:]
env.x_test=env.x_test[:100,:,:,:]
env.y_test=env.y_test[:100,:]

In [0]:
agent=Agent(env)

In [0]:

input = np.expand_dims(y, axis=2)
input = np.expand_dims(input, axis=0)

In [0]:
things=[]

In [0]:
for i in range(300):
    print("*************************************",i,"********************************************")
    a=agent.model.predict(input)
    action = np.where(a > 0.5, 0, 1)
    action = np.where(action[0] == 1)[0]
    things.append(action)
    reward=env.get_reward(action)
    print("the reward is:", reward)
    output=np.ones((1,64))*reward
    agent.model.fit(input,output)


************************************* 0 ********************************************
Deleting 37/64 channels from layer: block1_conv1
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Accuracy of the new model 0.81399983
Calculating the accuracy of the base line model
the reward is: 39.481621101699275
Train on 1 samples
************************************* 1 ********************************************
Deleting 36/64 channels from layer: block1_conv1
Accuracy of the new model 0.814
the reward is: 37.90219474100395
Train on 1 samples
************************************* 2 ********************************************
Deleting 46/64 channels from layer: block1_conv1
Accuracy of the new model 0.8139997
the reward is: 57.09074676884882
Train on 1 samples
************************************* 3 ********************************************
Deleting 46/64 channels from layer: block1_conv1
Accuracy of the new model 0.81399983
the reward is: 57.090747007267396
Train on 1 samples
************************************* 4 ********************************************
Deleting 46/64 channels from layer: block1_conv1
Accuracy of the new model 0.81399995
the reward is: 57.090747245