In [1]:
# !git clone https://github.com/GabrielGlzSa/ModelCompression.git
# %cd ModelCompression/
# !git checkout --track origin/CompressionV2
# !pip install Pillow \
#     h5py \
#     keras_preprocessing \
#     matplotlib \
#     mock \
#     numpy \
#     scipy \
#     sklearn \
#     pandas \
#     future \
#     portpicker \
#     enum34 \
#     tensorflow==2.6.2 \
#     tensorflow_datasets==4.0.1\
#     tensorflow_transform\
#     tensorboard_plugin_profile\
#     seaborn\
#     pyparsing==2.4.7\
#     tf_agents \
#     tensorflow-model-optimization

In [2]:
from google.colab import drive
import sys

drive.mount('/content/drive/')
%cd ./drive/MyDrive/Colab Projects/
%ls

sys.path.insert(0, './ModelCompression')
dataset = 'mnist'

Mounted at /content/drive/
/content/drive/MyDrive/Colab Projects
 Adadeep.ipynb   [0m[01;34mdata[0m/                     [01;34mModelCompression[0m/
 [01;34mcheckpoints[0m/   'DQN Optimization.ipynb'  'Test compressors.ipynb'


In [3]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import logging
import pandas as pd
from IPython.display import clear_output
from utils import load_dataset
from CompressionTechniques import *
from replay_buffer import ReplayBuffer
from environments import *
from custom_layers import ROIEmbedding, ROIEmbedding1D
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s -%(levelname)s - %(funcName)s -  %(message)s')

# Agent 

In [4]:
class DQNAgent:
    def __init__(self, name, state_shape, n_actions, epsilon=0.0, layer_type='fc'):
        """A simple DQN agent"""
        self.name = name
        
        if layer_type=='fc':
          input = tf.keras.layers.Input(shape=(None, state_shape[-1]))
          x = ROIEmbedding1D(n_bins=[32, 16, 8 ,4, 2, 1])(input)
          x = tf.keras.layers.Dense(512, activation='relu')(x)
          output = tf.keras.layers.Dense(n_actions, activation=tf.keras.activations.linear)(x)
        else:          
          input = tf.keras.layers.Input(shape=(None, None, state_shape[-1]))
          x = tf.keras.layers.Conv2D(64, kernel_size=3)(input)
          x = tf.keras.layers.Conv2D(64, kernel_size=3)(x)
          x = ROIEmbedding(n_bins=[(4,4), (2,2), (1,1)])(x)
          x = tf.keras.layers.Dense(512, activation='relu')(x)
          output = tf.keras.layers.Dense(n_actions, activation=tf.keras.activations.linear)(x)
        self.model = tf.keras.Model(inputs=input, outputs=output, name=name)
        self.model.summary()
        self.weights = self.model.trainable_weights
        self.epsilon = epsilon

    def get_symbolic_qvalues(self, state_t):
        """takes agent's observation, returns qvalues. Both are tf Tensors"""
        qvalues = self.model(state_t)
        return qvalues

    def get_qvalues(self, state_t):
        """Same as symbolic step except it operates on numpy arrays"""
        qvalues = self.model(state_t)
        return qvalues

    def sample_actions(self, qvalues):
        """pick actions given qvalues. Uses epsilon-greedy exploration strategy. """
        epsilon = self.epsilon
        batch_size, n_actions = qvalues.shape
        random_actions = np.random.choice(n_actions, size=batch_size)
        best_actions = qvalues.argmax(axis=-1)
        should_explore = np.random.choice([0, 1], batch_size, p=[1 - epsilon, epsilon])
        return np.where(should_explore, random_actions, best_actions)


# Evaluate agent

In [5]:

def evaluate_adadeep(env, conv_agent, fc_agent, n_games=1, greedy=True, t_max=10000):
    """ Plays n_games full games. If greedy, picks actions as argmax(qvalues). Returns mean reward. """
    rewards = []
    acc = []
    weights = []
    infos = []
    for _ in range(n_games):
        s = env.reset()
        reward = 0
        df = pd.DataFrame()
        for k in range(len(env.layer_name_list)):
          next_layer_name = env.layer_name_list[env._layer_counter]
          layer = env.model.get_layer(next_layer_name)
          was_conv = False
          random_feature = s[np.random.choice(s.shape[0], size=1)]
          if isinstance(layer, tf.keras.layers.Conv2D):
              qvalues = conv_agent.get_qvalues(random_feature).numpy()
              action = qvalues.argmax(axis=-1)[0] if greedy else conv_agent.sample_actions(qvalues)[0]
          if isinstance(layer, tf.keras.layers.Dense):
              qvalues = fc_agent.get_qvalues(random_feature).numpy()
              action = qvalues.argmax(axis=-1)[0] if greedy else fc_agent.sample_actions(qvalues)[0]

          new_s, r, done, info = env.step(action)
          s = env.get_current_state()
          if done:
              s = env.reset()
              break

          row = {'state': s, 'action': action, 'reward': r, 'next_state': new_s, 'done':done, 'info':info}
          df = df.append(row, ignore_index = True)

        # Calculate reward using stats before and after compression
        before_stats = df.iloc[0]['info']
        after_stats = df.iloc[-1]['info']
        reward = after_stats['weights_after']/before_stats['weights_before'] + after_stats['acc_after']

        rewards.append(reward)
        acc.append(info['acc_after'])
        weights.append(info['weights_after'])
    print(infos)
    return np.mean(rewards), np.mean(acc), np.mean(weights)
    
def play_and_record_adadeep(conv_agent, fc_agent, env, conv_replay, fc_replay, dataset, n_steps=1, debug=False):
    """
    Play the game for exactly n steps, record every (s,a,r,s', done) to replay buffer.
    Whenever game ends, add record with done=True and reset the game.
    It is guaranteed that env has done=False when passed to this function.

    PLEASE DO NOT RESET ENV UNLESS IT IS "DONE"

    :returns: return sum of rewards over time
    """
    # initial state
    s = env.reset()
    # Play the game for n_steps as per instructions above
    rewards = 0
    
    for it in range(n_steps):
        df = pd.DataFrame()
        for k in range(len(env.layer_name_list)):
          next_layer_name = env.layer_name_list[env._layer_counter]
          layer = env.model.get_layer(next_layer_name)
          was_conv = False
          if isinstance(layer, tf.keras.layers.Conv2D):
              random_image = s[np.random.choice(s.shape[0], size=1)]
              qvalues = conv_agent.get_qvalues(random_image).numpy()
              action = conv_agent.sample_actions(qvalues)[0]
              was_conv = True
          if isinstance(layer, tf.keras.layers.Dense):
            random_image = s[np.random.choice(s.shape[0], size=1)]
            qvalues = fc_agent.get_qvalues(random_image).numpy()
            action = fc_agent.sample_actions(qvalues)[0]

          new_s, r, done, info = env.step(action)

          info['was_conv'] = was_conv

          if debug:
            print('Iteration: {} of {}'.format(it, n_steps), s.shape, action, r, new_s.shape, done, info)


          row = {'state': s, 'action': action, 'reward': r, 'next_state': new_s, 'done':done, 'info':info}
          df = df.append(row, ignore_index = True)

          s = env.get_current_state()
          if done:
              s = env.reset()
              break

        # Calculate reward using stats before and after compression
        before_stats = df.iloc[0]['info']
        after_stats = df.iloc[-1]['info']

        reward = after_stats['weights_after']/before_stats['weights_before'] + after_stats['acc_after']

        # Set the same reward for all actions.
        df['reward'] = reward
        
        for idx, row in df.iterrows():
          was_conv = row['info']['was_conv']
          if was_conv:
            for idx, feature in enumerate(row['state']):
              conv_replay.add(feature, row['action'], row['reward'], row['next_state'][idx], row['done'])
          else:
            for idx, feature in enumerate(row['state']):
              fc_replay.add(feature, row['action'], row['reward'], row['next_state'][idx], row['done'])
        
        fc_replay.save('./data/{}_adadeep_fc_replay.pkl'.format(dataset))
        conv_replay.save('./data/{}_adadeep_conv_replay.pkl'.format(dataset))
    return reward

In [6]:
# % rm -r ./data/full_model/test_mnist/

# Create environment

In [7]:
def make_env(dataset):
    train_ds, val_ds, test_ds, input_shape, num_classes = load_dataset(dataset)
    train_ds, valid_ds, test_ds, input_shape, num_classes = load_dataset(dataset)

    model_path = './data/full_model/test_'+dataset
    try:
      model = tf.keras.models.load_model(model_path, compile=True)
    except OSError:
      optimizer = tf.keras.optimizers.Adam()
      loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
      train_metric = tf.keras.metrics.SparseCategoricalAccuracy()
      model = tf.keras.Sequential([tf.keras.layers.Conv2D(32, (3, 3), activation='relu', name='conv2d_0',
                                                          input_shape=input_shape),
                                    tf.keras.layers.Conv2D(32, (3, 3), activation='relu', name='conv2d_1'),
                                    tf.keras.layers.MaxPool2D((2, 2), 2),
                                    tf.keras.layers.Flatten(),
                                    tf.keras.layers.Dense(128, activation='relu', name='dense_0'),
                                    tf.keras.layers.Dense(128, activation='relu', name='dense_1'),
                                    tf.keras.layers.Dense(num_classes, activation='softmax', name='dense_softmax')
                                    ])
      model.compile(optimizer=optimizer, loss=loss_object, metrics=train_metric)
      model.fit(train_ds, epochs=5, validation_data=valid_ds)
      model.save(model_path)

#     model = tf.keras.applications.VGG16(
#     include_top=True,
#     weights="imagenet",
#     input_tensor=None,
#     input_shape=None,
#     pooling=None,
#     classes=1000,
#     classifier_activation="softmax",
# )

    w_comprs = ['InsertDenseSVD', 'InsertDenseSparse', 'DeepCompression'] # 'InsertDenseSVDCustom'
    c_comprs = ['InsertSVDConv','DepthwiseSeparableConvolution', 'SparseConnectionsCompression'] #SparseConvolutionCompression
    l_comprs = ['FireLayerCompression', 'MLPCompression','ReplaceDenseWithGlobalAvgPool']
    compressors_list = w_comprs + c_comprs + l_comprs

    parameters = {}
    parameters['DeepCompression'] = {'layer_name': 'dense_0', 'threshold': 0.001}
    parameters['ReplaceDenseWithGlobalAvgPool'] = {'layer_name': 'dense_1'}
    parameters['InsertDenseSVD'] = {'layer_name': 'dense_0', 'units': 16}
    parameters['InsertDenseSVDCustom'] = {'layer_name': 'dense_0', 'units': 16}
    parameters['InsertDenseSparse'] = {'layer_name': 'dense_0', 'verbose': True, 'units': 16}
    parameters['InsertSVDConv'] = {'layer_name': 'conv2d_1', 'units': 8}
    parameters['DepthwiseSeparableConvolution'] = {'layer_name': 'conv2d_1'}
    parameters['FireLayerCompression'] = {'layer_name': 'conv2d_1'}
    # parameters['MLPCompression'] = {'layer_name': 'conv2d_1'}
    parameters['SparseConnectionsCompression'] = {'layer_name': 'conv2d_1', 'epochs': 20,
                                                  'target_perc': 0.75, 'conn_perc_per_epoch': 0.1}

    layer_name_list = ['conv2d_1','dense_0', 'dense_1']
    # env = LayerEnv(compressors_list, model_path, parameters,
    #              train_ds, val_ds,
    #              layer_name_list, input_shape, features_type='input_shape')

    env = AdaDeepEnv(compressors_list, model_path, parameters,
                 train_ds, val_ds,
                 layer_name_list, input_shape, current_state_source='layer_input', next_state_source='layer_output')
    
    return env

env = make_env(dataset)
env.model.summary()

2022-04-05 20:16:23,914 -INFO - read_from_directory -  Load dataset info from ./data/mnist/3.0.1
2022-04-05 20:16:24,431 -INFO - read_from_directory -  Field info.citation from disk and from code do not match. Keeping the one from code.
2022-04-05 20:16:24,434 -INFO - download_and_prepare -  Reusing dataset mnist (./data/mnist/3.0.1)
2022-04-05 20:16:24,436 -INFO - as_dataset -  Constructing tf.data.Dataset for split ['train[:80%]', 'train[80%:]', 'test'], from ./data/mnist/3.0.1
2022-04-05 20:16:29,452 -INFO - read_from_directory -  Load dataset info from ./data/mnist/3.0.1
2022-04-05 20:16:29,466 -INFO - read_from_directory -  Field info.citation from disk and from code do not match. Keeping the one from code.
2022-04-05 20:16:29,473 -INFO - download_and_prepare -  Reusing dataset mnist (./data/mnist/3.0.1)
2022-04-05 20:16:29,479 -INFO - as_dataset -  Constructing tf.data.Dataset for split ['train[:80%]', 'train[80%:]', 'test'], from ./data/mnist/3.0.1


TypeError: ignored

# Create DQN for model compression


In [None]:
fc_state_dim = (1,)
fc_n_actions = len(env.dense_compressors)
conv_state_dim = list(env.get_current_state().shape)[1:]
conv_n_actions = len(env.conv_compressors)

fc_agent = DQNAgent("dqn_agent_fc", fc_state_dim, fc_n_actions, epsilon=0.9, layer_type='fc')
fc_target_network = DQNAgent("target_network_fc", fc_state_dim, fc_n_actions, layer_type='fc')


conv_agent = DQNAgent("dqn_agent_conv", conv_state_dim, conv_n_actions, epsilon=0.9, layer_type='cnn')
conv_target_network = DQNAgent("target_network_conv", conv_state_dim, conv_n_actions, layer_type='cnn')

def load_weigths_into_target_network(agent, target_network):
    """ assign target_network.weights variables to their respective agent.weights values. """
    for i in range(len(agent.model.layers)):
        target_network.model.layers[i].set_weights(agent.model.layers[i].get_weights())


load_weigths_into_target_network(fc_agent, fc_target_network)
load_weigths_into_target_network(conv_agent, conv_target_network)

for w, w2 in zip(fc_agent.weights, fc_target_network.weights):
    tf.assert_equal(w, w2)
for w, w2 in zip(conv_agent.weights, conv_target_network.weights):
    tf.assert_equal(w, w2)

print("It works!")

# Training func


In [None]:
def moving_average(x, span=100, **kw):
    return pd.DataFrame({'x': np.asarray(x)}).x.ewm(span=span, **kw).mean().values


mean_weights_history = []
mean_acc_history = []
mean_rw_history = []
td_loss_history = []




def sample_batch(exp_replay, batch_size):
    obs_batch, act_batch, reward_batch, next_obs_batch, is_done_batch = exp_replay.sample(batch_size)
    return {
        'state': obs_batch,
        'action': act_batch,
        'rewards': reward_batch,
        'next_state': next_obs_batch,
        'done': is_done_batch,
    }


@tf.function
def training_loop(state, action, rewards, next_state, done, agent, target_agent, loss_optimizer, n_actions, gamma=0.99):
    state = tf.cast(state, tf.float32)
    action = tf.cast(action, tf.int32)
    next_state = tf.cast(next_state, tf.float32)

    rewards = tf.cast(rewards, tf.float32)
    done = 1 - tf.cast(done, tf.float32)

    reference_qvalues = rewards + gamma * tf.reduce_max(target_agent.get_qvalues(next_state), axis=1)
    reference_qvalues = reference_qvalues * (1 - done) - done

    masks = tf.one_hot(action, n_actions)
    with tf.GradientTape() as tape:
        q_values = agent.get_qvalues(state)
        q_action = tf.reduce_sum(tf.multiply(q_values, masks), axis=1)
        td_loss = tf.reduce_mean((q_action - reference_qvalues) ** 2)

    gradients = tape.gradient(td_loss, agent.weights)
    loss_optimizer.apply_gradients(zip(gradients, agent.weights))
    return td_loss



# Generate some replay buffer samples

In [None]:

fc_agent.epsilon = 0.9
conv_agent.epsilon = 0.9
min_epsilon = 0.1
optimizer = tf.keras.optimizers.Adam(1e-5)
iterations = 1000

fc_exp_replay = ReplayBuffer(10 ** 5)
conv_exp_replay = ReplayBuffer(10 ** 5)

try:
  fc_exp_replay.load('./data/{}_adadeep_fc_replay.pkl'.format(dataset))
  conv_exp_replay.load('./data/{}_adadeep_conv_replay.pkl'.format(dataset))
except FileNotFoundError:
  pass

print('There are {} conv and {} fc instances.'.format(len(conv_exp_replay._storage),len(fc_exp_replay._storage)))
play_and_record_adadeep(conv_agent, fc_agent, env, conv_exp_replay, fc_exp_replay, dataset, 32, debug=True)

print('There are {} conv and {} fc instances.'.format(len(conv_exp_replay._storage),len(fc_exp_replay._storage)))

# RFL Training

In [None]:
from tqdm import tqdm
logging.basicConfig(level=logging.WARNING, format='%(asctime)s -%(levelname)s - %(funcName)s -  %(message)s')

with tqdm(total=iterations,
          bar_format="{l_bar}{bar}|{n}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}, {postfix[0]}: {postfix[1]:.4f} {postfix[2]}: {postfix[3][0]:.2f}, {postfix[3][1]:.2f} & {postfix[3][2]:.2f}]",
          postfix=["Epsilon", conv_agent.epsilon, 'Last 3 RW', dict({0:0,1:0,2:0})]) as t:
    for i in range(iterations):
        # play
        play_and_record_adadeep(conv_agent, fc_agent, env, conv_exp_replay, fc_exp_replay, dataset, 1, debug=True)

        # train fc
        batch_data = sample_batch(conv_exp_replay, batch_size=32)
        batch_data['agent'] = conv_agent
        batch_data['target_agent'] = conv_target_network
        batch_data['loss_optimizer'] = optimizer
        batch_data['n_actions'] = conv_n_actions
        conv_loss_t = training_loop(**batch_data)

        # train
        batch_data = sample_batch(fc_exp_replay, batch_size=32)
        batch_data['agent'] = fc_agent
        batch_data['target_agent'] = fc_target_network
        batch_data['loss_optimizer'] = optimizer
        batch_data['n_actions'] = fc_n_actions
        fc_loss_t = training_loop(**batch_data)
        td_loss_history.append(conv_loss_t+fc_loss_t)

        # adjust agent parameters
        if i % 10 == 0:
            load_weigths_into_target_network(conv_agent, conv_target_network)
            conv_target_network.model.save_weights('./checkpoints/{}_my_checkpoint_conv'.format(dataset))
            conv_agent.epsilon = max(conv_agent.epsilon * 0.98, min_epsilon)
            t.postfix[1] = conv_agent.epsilon

            load_weigths_into_target_network(fc_agent, fc_target_network)
            fc_target_network.model.save_weights('./checkpoints/{}_my_checkpoint_fc'.format(dataset))
            fc_agent.epsilon = conv_agent.epsilon

            rw, acc, weights = evaluate_adadeep(make_env(dataset), conv_agent, fc_agent, n_games=3)
            mean_rw_history.append(rw)
            mean_acc_history.append(acc)
            mean_weights_history.append(weights)
            t.postfix[3][2] = mean_rw_history[-1]
            try:
                t.postfix[3][1] = mean_rw_history[-2]
            except IndexError:
                t.postfix[3][1] = 0
            try:
                t.postfix[3][0] = mean_rw_history[-3]
            except IndexError:
                t.postfix[3][0] = 0
        t.update()


        if i%100==0:
            clear_output(True)
            plt.subplot(1, 3, 1)
            plt.title("mean reward per game")
            plt.plot(mean_rw_history)
            plt.grid()

            assert not np.isnan(td_loss_history).any()

            plt.subplot(1, 3, 2)
            plt.title("TD loss history (moving average)")
            plt.plot(moving_average(np.array(td_loss_history), span=100, min_periods=100))
            plt.grid()

            plt.subplot(1, 4, 3)
            plt.title("Weights history")
            plt.plot(mean_weights_history)
            plt.grid()

            plt.subplot(1, 4, 4)
            plt.title("Acc history)")
            plt.plot(mean_acc_history)
            plt.grid()
            plt.show()