In [6]:
import tensorflow as tf
import tensorflow_datasets as tfds
import logging
from uuid import uuid4
from datetime import datetime
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from functools import partial
import gc
import os

from CompressionLibrary.reinforcement_models import DuelingDQNAgentBigger as DuelingDQNAgent
from CompressionLibrary.reward_functions import reward_MnasNet as calculate_reward
from CompressionLibrary.environments import ModelCompressionSVDIntEnv
from CompressionLibrary.utils import calculate_model_weights

from sklearn.neighbors import NearestCentroid
from scipy.spatial import distance_matrix
from tensorflow.keras import layers
from tensorflow.keras.models import Model


In [2]:
# 'fashion_mnist','kmnist', 'mnist', , 'fashion_mnist-kmnist-mnist'
agents_names = list(map(lambda x: 'LeNet_DDQN_discrete_tuning_zero_rw_FM_best_img_'+x, ['fashion_mnist-kmnist', 'fashion_mnist-mnist','kmnist-mnist']))
dataset_names = ['fashion_mnist','kmnist', 'mnist']
run_id = datetime.now().strftime('%Y-%m-%d-%H-%M%S-') + str(uuid4())

print(agents_names)

strategy = tf.distribute.OneDeviceStrategy(device="/gpu:0")
data_path = "./data/"

log_name = 'DDQN_SVD10_test_agents'
test_filename = data_path + 'stats/DDQN_SVD10_{}_tests'.format(log_name)

agents_path = data_path+'agents/DDQN/checkpoints/'

if strategy:
    print('Number of devices: {}'.format(strategy.num_replicas_in_sync))



logging.basicConfig(level=logging.DEBUG, handlers=[
    logging.FileHandler(data_path + f'logs/{log_name}.log', 'w+')],
    format='%(asctime)s -%(levelname)s - %(funcName)s -  %(message)s')
logging.root.setLevel(logging.DEBUG)

log = logging.getLogger('tensorflow')
log.setLevel(logging.ERROR)

logger = logging.getLogger()

# Parameters shared in training and testing env
current_state = 'layer_input'
next_state = 'layer_output'
tuning_epochs = 0
tuning_mode = 'final'

batch_size_per_replica = 128
tuning_batch_size = batch_size_per_replica * strategy.num_replicas_in_sync


# Env variables
training_state_set_source = 'test_all'
training_num_feature_maps = -1
reward_step = True


# Testing variables
testing_state_set_source = 'test_best'
testing_num_feature_maps = -1
eval_n_samples = 1

#Autoencoder
latent_dim = 64

verbose = 0

epsilon_start_value = 1.0



layer_name_list = ['conv2d_1',  'dense', 'dense_1']


['LeNet_DDQN_discrete_tuning_zero_rw_FM_best_img_fashion_mnist-kmnist', 'LeNet_DDQN_discrete_tuning_zero_rw_FM_best_img_fashion_mnist-mnist', 'LeNet_DDQN_discrete_tuning_zero_rw_FM_best_img_kmnist-mnist']
Number of devices: 1


# Model creation and data loading.


In [3]:
def create_model(dataset_name, train_ds, valid_ds):
    checkpoint_path = f"./data/models/lenet_{dataset_name}/cp.ckpt"
    optimizer = tf.keras.optimizers.Adam(1e-5)
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    train_metric = tf.keras.metrics.SparseCategoricalAccuracy()
    input = tf.keras.layers.Input((28,28,1))
    x = tf.keras.layers.Conv2D(6, (5,5), padding='SAME', activation='sigmoid', name='conv2d')(input)
    x = tf.keras.layers.AveragePooling2D((2,2), strides=2, name='avg_pool_1')(x)
    x = tf.keras.layers.Conv2D(16, (5,5), padding='VALID', activation='sigmoid', name='conv2d_1')(x)
    x = tf.keras.layers.AveragePooling2D((2,2), strides=2, name='avg_pool_2')(x)
    x = tf.keras.layers.Flatten(name='flatten')(x)
    x = tf.keras.layers.Dense(120, activation='sigmoid', name='dense')(x)
    x = tf.keras.layers.Dense(84, activation='sigmoid', name='dense_1')(x)
    x = tf.keras.layers.Dense(10, activation='softmax', name='predictions')(x)

    model = tf.keras.Model(input, x, name='LeNet')
    model.compile(optimizer=optimizer, loss=loss_object,
                    metrics=[train_metric])

    try:
        model.load_weights(checkpoint_path).expect_partial()
    except:
        cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', save_best_only=True,
                                                 save_weights_only=True,
                                                 verbose=1)
        model.fit(train_ds,
          epochs=3000,
          validation_data=valid_ds,
          callbacks=[cp_callback])

    return model             

class Autoencoder(Model):
  def __init__(self, latent_dim):
    super(Autoencoder, self).__init__()
    self.latent_dim = latent_dim
    self.encoder = tf.keras.Sequential([
      layers.Flatten(),
      layers.Dense(latent_dim, activation='relu'),
    ])
    self.decoder = tf.keras.Sequential([
      layers.Dense(784, activation='sigmoid'),
      layers.Reshape((28, 28))
    ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded


def dataset_preprocessing_img2img(img, label):
    img = tf.cast(img, tf.float32)
    img = img/255.0
    return img, img

def dataset_preprocessing_img2label(img, label):
    img = tf.cast(img, tf.float32)
    img = img/255.0
    return img, label

def load_dataset(dataset_name, dataset_preprocessing, batch_size=128):
    splits, info = tfds.load(dataset_name, as_supervised=True, with_info=True, shuffle_files=True,
                                split=['train[:80%]', 'train[80%:]','test'])

    (train_examples, validation_examples, test_examples) = splits
    num_examples = info.splits['train'].num_examples

    num_classes = info.features['label'].num_classes
    input_shape = info.features['image'].shape

    input_shape = (28,28,1)

    train_ds = train_examples.map(dataset_preprocessing, num_parallel_calls=tf.data.AUTOTUNE).cache().shuffle(buffer_size=1000, reshuffle_each_iteration=True).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    valid_ds = validation_examples.map(dataset_preprocessing, num_parallel_calls=tf.data.AUTOTUNE).cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)
    test_ds = test_examples.map(dataset_preprocessing, num_parallel_calls=tf.data.AUTOTUNE).cache().batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return train_ds, valid_ds, test_ds, input_shape, num_classes



def get_best_per_class(tf_dataset, autoencoder):
    x_train = []
    y_train = []

    for x,y in tfds.as_numpy(tf_dataset):
        x_train.append(x)
        y_train.append(y)

    x_train = np.concatenate(x_train, axis=0)
    y_train = np.concatenate(y_train, axis=0)
    encoded_imgs = autoencoder.encoder(x_train).numpy()
    decoded_imgs = autoencoder.decoder(encoded_imgs).numpy()
    clf = NearestCentroid()
    clf.fit(encoded_imgs, y_train)

    best_images = []
    best_decoded = []
    labels_best = []
    num_classes = clf.centroids_.shape[0]
    for class_number in range(num_classes):
        class_members = np.argwhere(y_train == class_number).flatten()
        dm = distance_matrix(encoded_imgs[class_members], clf.centroids_)
        idx_best = np.argmin(dm[:, class_number])
        best_images.append(x_train[class_members[idx_best]])
        best_decoded.append(decoded_imgs[class_members[idx_best]])
        labels_best.append(y_train[[class_members[idx_best]]])
        logger.debug(f'Best member of {class_number} is {class_members[idx_best]} with class {labels_best[-1]}')

    return tf.data.Dataset.from_tensor_slices((best_images, labels_best)).batch(num_classes)

def generate_dataset_best_img(dataset_name, latent_dim, batch_size):
    autoencoder = Autoencoder(latent_dim)
    autoencoder.compile(optimizer='adam', loss=tf.keras.losses.MeanSquaredError())
    train_ds, valid_ds, test_ds, input_shape, _ = load_dataset(dataset_name, dataset_preprocessing_img2img, batch_size)
    autoencoder.fit(train_ds,
                    epochs=50,
                    shuffle=True,
                    validation_data=valid_ds, verbose=0)

    ft_train_ds, ft_valid_ds, ft_test_ds, input_shape, num_classes = load_dataset(dataset_name, dataset_preprocessing_img2label, batch_size)

    train_state_ds = get_best_per_class(ft_train_ds, autoencoder)
    valid_state_ds = get_best_per_class(ft_valid_ds, autoencoder)
    test_state_ds = get_best_per_class(ft_test_ds, autoencoder)



    return train_state_ds, valid_state_ds, test_state_ds, ft_train_ds, ft_valid_ds, ft_test_ds, input_shape, num_classes

input_shape = (28,28,1)

def create_environments(dataset_names, num_feature_maps, state_set_source):
    w_comprs = ['InsertDenseSVD'] 
    l_comprs = ['MLPCompression']
    compressors_list = w_comprs +  l_comprs

    parameters = {}
    parameters['InsertDenseSVD'] = {'layer_name': None, 'percentage': None}
    parameters['MLPCompression'] = {'layer_name': None, 'percentage': None}
    environments = []
    for dataset in dataset_names:
        train_state_ds, valid_state_ds, test_state_ds, train_ds, valid_ds, test_ds, input_shape, num_classes = generate_dataset_best_img(dataset, latent_dim, tuning_batch_size)
        if state_set_source=='test_all':
            state_ds = test_ds
        elif state_set_source == 'test_best':
            state_ds = test_state_ds

        new_func = partial(create_model, dataset_name=dataset, train_ds=train_ds, valid_ds=valid_ds)
        env = ModelCompressionSVDIntEnv(
                reward_func=calculate_reward,
                compressors_list=compressors_list, 
                create_model_func=new_func, 
                compr_params=parameters, 
                train_ds=train_ds, 
                validation_ds=valid_ds, 
                test_ds=test_ds, 
                layer_name_list=layer_name_list, 
                input_shape=input_shape, 
                tuning_batch_size=tuning_batch_size, 
                tuning_epochs=tuning_epochs,
                state_ds=state_ds, 
                current_state_source=current_state, 
                next_state_source=next_state, 
                num_feature_maps=num_feature_maps, 
                verbose=verbose,
                tuning_mode=tuning_mode,
                strategy=strategy)

        environments.append(env)

    return environments

test_all_envs = create_environments(dataset_names,num_feature_maps=training_num_feature_maps, state_set_source=training_state_set_source)
test_envs = create_environments(dataset_names,num_feature_maps=testing_num_feature_maps, state_set_source=testing_state_set_source)

conv_shape, dense_shape = test_all_envs[0].observation_space()
action_space = test_all_envs[0].action_space()
num_actions = len(action_space)

print(conv_shape, dense_shape)


fc_n_actions = conv_n_actions = num_actions

print(f'The action space is {action_space}')

[10000    14    14    16] (10000, 400)
The action space is [5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]


# Evaluate

In [4]:
def play_and_record(conv_agent, fc_agent,env, conv_replay, fc_replay,run_id, test_number, dataset_name, save_name, n_games=10, exploration=True):
    # initial state
    s = env.reset()
    # Play the game for n_steps as per instructions above

    logger = logging.getLogger(__name__)
    rewards = []
    acc = []
    weights = []
    infos = []
    total_time = 0

    for it in range(n_games):
        start = datetime.now()
        last_conv_data = None
        skip_add_replay = False
        data = []
        for k in range(1, len(env.original_layer_name_list)+1):
            tf.keras.backend.clear_session()
            # Get the current layer name
            current_layer_name = env.original_layer_name_list[env._layer_counter]
            # Get the layer.
            layer = env.model.get_layer(current_layer_name)

            if env._layer_counter+1<len(env.original_layer_name_list):
                
                # Get the next layer name
                next_layer_name = env.original_layer_name_list[env._layer_counter+1]
                # Get the layer.
                next_layer = env.model.get_layer(next_layer_name)
                if isinstance(layer, tf.keras.layers.Conv2D) and not isinstance(next_layer, tf.keras.layers.Conv2D):
                    logger.debug('Last convolutional layer.')
                    skip_add_replay = True
                else:
                    skip_add_replay = False

            was_conv = True
            # Choose agent depending on layer type.
            if isinstance(layer, tf.keras.layers.Conv2D):
                # Calculate q values for batch of images
                qvalues = conv_agent.get_qvalues(s)
                action = conv_agent.sample_actions(qvalues.numpy(), exploration=exploration)[0]
            if isinstance(layer, tf.keras.layers.Dense):
                was_conv = False
                s = np.squeeze(s)
                temp = np.zeros(shape=(s.shape[0], 400))
                temp[:, :s.shape[-1]] = s
                s = temp
                qvalues = fc_agent.get_qvalues(s)
                action = fc_agent.sample_actions(qvalues.numpy(), exploration=exploration)[0]

            # Action is the mode of the action.
            
            logger.debug(f'Action for layer {current_layer_name} layer is {action}')

            # Apply action
            new_s, r, done, info = env.step(action) 
            # r * = 100
            

            logger.debug(f'Iteration {it} - Layer {current_layer_name} {k}/{len(env.original_layer_name_list)}\tChosen action {action} has {r} reward.')
            logger.debug(info)

            num_inst = s.shape[0]

            # Use input of next layer instead of output of current for all states except final.
            if not done:
                new_s = env.get_state('current_state')

            if exploration:
                new_s = np.squeeze(new_s)
                temp = np.zeros(shape=(new_s.shape[0], 400))
                temp[:, :new_s.shape[-1]] = new_s
                new_s = temp
                data.append([s, action, r, new_s, done, was_conv])

            
            s = env.get_state('current_state')

            if done:
                if exploration:
                    for row in data:
                        # Replace r with _ for assigning the same reward to all actions of episode.
                        s, a, rw, sn, done, conv = row
                        actions_batch = np.array([a]*num_inst)
                        done_float = 1.0 if done else 0.0
                        num_inst = s.shape[0]
                        if conv:
                            logger.debug(f'Conv replay has {len(conv_replay)} examples.')
                            td_errors = calculate_td_error_conv(s, actions_batch, [r]*num_inst, sn, done_float )
                            td_errors = np.reshape(np.abs(td_errors), -1)
                            conv_replay.add_multiple(s, [a]*num_inst, [r]*num_inst, sn, td_errors, [done]*num_inst, dataset_name)
                            logger.debug(f'Conv replay has {len(conv_replay)} examples.')
                        else:
                            logger.debug(f'FC replay has {len(fc_replay)} examples.')
                            td_errors = calculate_td_error_fc(s, actions_batch, [rw]*num_inst, sn, done_float )
                            td_errors = np.reshape(np.abs(td_errors), -1)
                            fc_replay.add_multiple(s, [a]*num_inst, [r]*num_inst, sn, td_errors, [done]*num_inst, dataset_name)
                            logger.debug(f'FC replay has {len(fc_replay)} examples.')
                        logging.debug(f'Layer TD error is {td_errors}')
                s = env.reset()
                break

        gc.collect()
        

        # Using 0f as actions are percentages without decimals.
        info['actions'] = ','.join(['{:.0f}'.format(x) for x in info['actions']] )
        info['run_id'] = run_id
        info['test_number'] = test_number
        info['game_id'] = it
        info['dataset'] = dataset_name
        del info['layer_name']
        rewards.append(r)
        acc.append(info['test_acc_after'])
        weights.append(info['weights_after'])
        new_row = pd.DataFrame(info, index=[0])
        if not os.path.isfile(save_name):
            new_row.to_csv(save_name, index=False)
        else: # else it exists so append without writing the header
            new_row.to_csv(save_name, mode='a', index=False, header=False)

        # Correct reward is the last value of r.
        
        end = datetime.now()
        time_diff = (end - start).total_seconds()
        total_time += time_diff
        logger.info(f'Took {time_diff} seconds for one compression.')

    logger.info(f'Evaluation of {n_games} took {total_time} secs. An average of {total_time/n_games} secs per game.')

    return np.mean(rewards), np.mean(acc), np.mean(weights)

# Evaluation of results

In [7]:
fc_agent = DuelingDQNAgent(name="ddqn_agent_fc", state_shape=dense_shape,
                        n_actions=fc_n_actions, epsilon=epsilon_start_value, layer_type='fc')

    
conv_agent = DuelingDQNAgent(
    name="ddqn_agent_conv", state_shape=conv_shape, n_actions=conv_n_actions, epsilon=epsilon_start_value, layer_type='cnn')

iterations = len(dataset_names) * len(agents_names)

conv_exp_replay = None
fc_exp_replay = None

with tqdm(total=iterations) as t:
    for idx, dataset_name in enumerate(dataset_names):
        env_all = test_all_envs[idx]
        env_best = test_envs[idx]

        for agent_name in agents_names:
            conv_agent.model.load_weights(agents_path+agent_name+'_conv.ckpt')
            fc_agent.model.load_weights(agents_path+agent_name+'_fc.ckpt')

            rw, acc, weights = play_and_record(conv_agent, fc_agent, env_all, conv_exp_replay, fc_exp_replay,run_id=run_id,test_number=agent_name, dataset_name=dataset_name,save_name=test_filename+'_all.csv', n_games=eval_n_samples, exploration=False)
            rw, acc, weights = play_and_record(conv_agent, fc_agent, env_best, conv_exp_replay, fc_exp_replay,run_id=run_id,test_number=agent_name, dataset_name=dataset_name,save_name=test_filename+'_best.csv', n_games=eval_n_samples, exploration=False)

                  

  0%|          | 0/9 [01:02<?, ?it/s]
