In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import tqdm
import matplotlib.pyplot as plt
import pandas as pd
from CompressionLibrary.environments import ModelCompressionSVDIntEnv
from CompressionLibrary.custom_layers import ROIEmbedding
from CompressionLibrary.utils import calculate_reward
from uuid import uuid4
from datetime import datetime
from tqdm import tqdm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import logging
from functools import partial
from IPython.display import clear_output
import scipy.signal as scignal
%matplotlib inline

  from .autonotebook import tqdm as notebook_tqdm


# Parameters

In [2]:
dataset_names = ['fashion_mnist', 'kmnist']

run_id = datetime.now().strftime('%Y-%m-%d-%H-%M%S-') + str(uuid4())

data_path = "G:\\Python project\\MODEL COMPRESSION\\ModelCompressionRL\\data\\"

agent_name = 'PPO_DISCRETE_MKI_GENERALIST'

test_frequency = 10

test_counter = 0


#PPO parameters
clip_ratio = 0.2
gamma = 0.99
lam = 0.97
policy_learning_rate = 1e-5
value_function_learning_rate = 1e-5
train_policy_iterations = 80
train_value_iterations = 80
target_kl = 0.01

log_name = '-'.join(dataset_names)
logging.basicConfig(level=logging.DEBUG, handlers=[
    logging.FileHandler(data_path + f'logs\\{agent_name}_{log_name}.log', 'w+')],
    format='%(asctime)s -%(levelname)s - %(funcName)s -  %(message)s')
logging.root.setLevel(logging.DEBUG)

log = logging.getLogger('tensorflow')
log.setLevel(logging.ERROR)
logger = logging.getLogger(__name__)



exploration_filename = data_path + f'stats\\{agent_name}_training.csv'
test_filename = data_path + f'stats\\{agent_name}_testing.csv'
agents_path = data_path+'agents\\PPO\\{}\\{}_{}'.format(agent_name,agent_name, log_name)


current_state = 'layer_weights'
next_state = 'layer_weights'
layer_name_list = ['conv2d_1',  'dense', 'dense_1']


n_games_training = 5
n_games_testing = 1

replay_num_samples = len(layer_name_list) * n_games_training
verbose = 0
rl_iterations = 1000
eval_n_samples = 5
n_samples_mode = 256
batch_size_per_replica = 32
tuning_batch_size = 128
tuning_mode = 'final'
rl_batch_size = batch_size_per_replica
tuning_epochs = 0
strategy = None



mean_rw_history = np.zeros(rl_iterations//test_frequency)

# Load Dataset and creation of LeNet

In [3]:
def create_model(dataset_name, train_ds, valid_ds):
    checkpoint_path = f"./data/models/lenet_{dataset_name}/cp.ckpt"
    optimizer = tf.keras.optimizers.Adam(1e-5)
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    train_metric = tf.keras.metrics.SparseCategoricalAccuracy()
    input = tf.keras.layers.Input((28,28,1))
    x = tf.keras.layers.Conv2D(6, (5,5), padding='SAME', activation='sigmoid', name='conv2d')(input)
    x = tf.keras.layers.AveragePooling2D((2,2), strides=2, name='avg_pool_1')(x)
    x = tf.keras.layers.Conv2D(16, (5,5), padding='VALID', activation='sigmoid', name='conv2d_1')(x)
    x = tf.keras.layers.AveragePooling2D((2,2), strides=2, name='avg_pool_2')(x)
    x = tf.keras.layers.Flatten(name='flatten')(x)
    x = tf.keras.layers.Dense(120, activation='sigmoid', name='dense')(x)
    x = tf.keras.layers.Dense(84, activation='sigmoid', name='dense_1')(x)
    x = tf.keras.layers.Dense(10, activation='softmax', name='predictions')(x)

    model = tf.keras.Model(input, x, name='LeNet')
    model.compile(optimizer=optimizer, loss=loss_object,
                    metrics=[train_metric])

    try:
        model.load_weights(checkpoint_path).expect_partial()
    except:
        cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', save_best_only=True,
                                                 save_weights_only=True,
                                                 verbose=1)
        model.fit(train_ds,
          epochs=3000,
          validation_data=valid_ds,
          callbacks=[cp_callback])

    return model       

def dataset_preprocessing(img, label):
    img = tf.cast(img, tf.float32)
    img = img/255.0
    return img, label

def load_dataset(dataset_name, batch_size=128):
    splits, info = tfds.load(dataset_name, as_supervised=True, with_info=True, shuffle_files=True, 
                                split=['train[:80%]', 'train[80%:]','test'])

    (train_examples, validation_examples, test_examples) = splits
    num_examples = info.splits['train'].num_examples

    num_classes = info.features['label'].num_classes
    input_shape = info.features['image'].shape

    input_shape = (28,28,1)

    train_ds = train_examples.map(dataset_preprocessing, num_parallel_calls=tf.data.AUTOTUNE).shuffle(buffer_size=1000, reshuffle_each_iteration=True).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    valid_ds = validation_examples.map(dataset_preprocessing, num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    test_ds = test_examples.map(dataset_preprocessing, num_parallel_calls=tf.data.AUTOTUNE).batch(batch_size).prefetch(tf.data.AUTOTUNE)

    return train_ds, valid_ds, test_ds, input_shape, num_classes

# Create Environments

In [4]:
def make_env(create_model, train_ds, valid_ds, test_ds, reward_func, input_shape, layer_name_list, num_feature_maps, tuning_batch_size, tuning_epochs, verbose=0, tuning_mode='final', current_state_source='layer_input', next_state_source='layer_output', strategy=None, model_path='./data'):

    w_comprs = ['InsertDenseSVD'] 
    l_comprs = ['MLPCompression']
    compressors_list = w_comprs +  l_comprs

    parameters = {}
    parameters['InsertDenseSVD'] = {'layer_name': None, 'percentage': None}
    parameters['MLPCompression'] = {'layer_name': None, 'percentage': None}

    env = ModelCompressionSVDIntEnv(compressors_list=compressors_list, 
                                    create_model_func=create_model, 
                                    compr_params=parameters, 
                                    train_ds=train_ds, 
                                    validation_ds=valid_ds, 
                                    test_ds=test_ds, 
                                    layer_name_list=layer_name_list, 
                                    input_shape=input_shape, 
                                    reward_func=reward_func,
                                    tuning_batch_size=tuning_batch_size, 
                                    tuning_epochs=tuning_epochs, 
                                    tuning_mode=tuning_mode, 
                                    current_state_source=current_state_source, 
                                    next_state_source=next_state_source, 
                                    num_feature_maps=num_feature_maps, 
                                    verbose=verbose,
                                    strategy=strategy, 
                                    model_path=model_path)

    return env

def create_environments(dataset_names):
    environments = []
    for dataset in dataset_names:
        train_ds, valid_ds, test_ds, input_shape, _ = load_dataset(dataset, tuning_batch_size)
        create_model_dataset = partial(create_model, dataset_name=dataset, train_ds=train_ds, valid_ds=valid_ds)
        new_create_model_func = partial(create_model_dataset, dataset_name=dataset, train_ds=train_ds, valid_ds=valid_ds)

        env = make_env(
                create_model=new_create_model_func, 
                train_ds=train_ds, 
                valid_ds=valid_ds, 
                test_ds=test_ds, 
                input_shape=input_shape,
                reward_func = calculate_reward,
                layer_name_list=layer_name_list, 
                num_feature_maps=n_samples_mode,
                tuning_batch_size=tuning_batch_size,
                tuning_epochs = tuning_epochs, 
                verbose=verbose, 
                tuning_mode=tuning_mode, 
                current_state_source=current_state, 
                next_state_source=next_state, 
                strategy=strategy, 
                model_path=data_path)

        environments.append(env)

    return environments

envs = create_environments(dataset_names)
envs[0].model.summary()

conv_shape, dense_shape = envs[0].observation_space()

n_actions = len(envs[0].action_space())
logger.debug(f'Number of actions {n_actions}')


Model: "LeNet"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 28, 28, 6)         156       
                                                                 
 avg_pool_1 (AveragePooling2  (None, 14, 14, 6)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 10, 10, 16)        2416      
                                                                 
 avg_pool_2 (AveragePooling2  (None, 5, 5, 16)         0         
 D)                                                              
                                                                 
 flatten (Flatten)           (None, 400)               0     

# Replay Buffer

In [5]:
def cumulative_discounted_rewards(rw, discount):
    return scignal.lfilter([1], [1, float(-discount)], rw[::-1], axis=0)[::-1]

class ReplayBufferMultipleDatasetsPPO(object):
    def __init__(self, size, dataset_names, gamma=0.99, lam=0.95):
        
        self.dataset_names = dataset_names
        self.gamma = gamma
        self.lam = lam
        self._maxsize = size

        # Dictionary to get the index of each dataset.
        self.dataset_dict = dict(zip(dataset_names,range(len(self.dataset_names))))

        # Create variables to store data.
        self.reset()


    def __len__(self):
        return np.sum(list(map(lambda x: len(self._states[x]), self.dataset_names)))
        
    def reset(self):
        self._states = dict(zip(dataset_names, map(lambda x: [], dataset_names)))
        num_datasets = len(self.dataset_names)
        self._actions = np.zeros((num_datasets, self._maxsize), dtype=np.int32)
        self._advantages = np.zeros((num_datasets, self._maxsize), dtype=np.float32)
        self._rewards = np.zeros((num_datasets, self._maxsize), dtype=np.float32)
        self._returns = np.zeros((num_datasets, self._maxsize), dtype=np.float32)
        self._values = np.zeros((num_datasets, self._maxsize), dtype=np.float32)
        self._logprobs = np.zeros((num_datasets, self._maxsize), dtype=np.float32)

        self._next_idx = np.zeros((num_datasets), dtype=np.int32)
        self._trajectory_idx = np.zeros((num_datasets), dtype=np.int32)
        
    def add(self, s, a, rw, value, logprobs, dataset_name):

        dataset_index = self.dataset_dict[dataset_name]
        self._states[dataset_name].append(s)
        self._actions[dataset_index][self._next_idx] = a
        self._rewards[dataset_index][self._next_idx] = rw
        self._values[dataset_index][self._next_idx] = value
        self._logprobs[dataset_index][self._next_idx] = logprobs
        self._next_idx[dataset_index] = (self._next_idx[dataset_index]+1) % (self._maxsize/len(self.dataset_names))


    def finish_trajectory(self, dataset_name, last_value=0):
        dataset_index = self.dataset_dict[dataset_name]

        path_slice = slice(self._trajectory_idx[dataset_index], max(self._next_idx[dataset_index], len(self._states[dataset_name])))
        
        # Retrieve rewards and V(s)
        rewards = np.append(self._rewards[dataset_index][path_slice], last_value)
        values = np.append(self._values[dataset_index][path_slice], last_value)

        # Calculate advantage and reward to go.
        deltas = rewards[:-1] + self.gamma * values[1:] - values[:-1]
        self._advantages[dataset_index][path_slice] = cumulative_discounted_rewards(deltas, self.gamma * self.lam)
        self._returns[dataset_index][path_slice] = cumulative_discounted_rewards(rewards, self.gamma)[:-1]

        self._trajectory_idx[dataset_index] = self._next_idx[dataset_index]
    
    
    def sample(self, batch_size):
        # Empty list for states
        s = []
        actions = []
        advantages = []
        returns = []
        logprobs = []

        num_datasets = len(self.dataset_names)
        batch_counter = 0
        recommended_batch_size = batch_size//num_datasets
        for dataset_name in self._states.keys():
            if batch_counter + recommended_batch_size > batch_size:
                recommended_batch_size = batch_size - batch_counter

            if recommended_batch_size < len(self._states[dataset_name]):
                batch = np.random.choice(len(self._states[dataset_name]), recommended_batch_size, replace=False)
                batch_counter += recommended_batch_size
            else:
                num_storage_sampes = len(self._states[dataset_name])
                batch = np.random.choice(len(self._states[dataset_name]), num_storage_sampes, replace=False)
                batch_counter += num_storage_sampes


            for batch_element in batch:
                # Remove  dimensions of size 1 so that it can be stacked.
                s.append(tf.squeeze(self._states[dataset_name][batch_element]))

            dataset_index = self.dataset_dict[dataset_name]
            actions.extend(self._actions[dataset_index][batch])
            advantages.extend(self._advantages[dataset_index][batch])
            returns.extend(self._returns[dataset_index][batch])
            logprobs.extend(self._logprobs[dataset_index][batch])


        # Stack feature maps and add depth of 1.
        s = tf.expand_dims(tf.ragged.stack(s), axis=-1)
        return (s.to_tensor(),
                tf.convert_to_tensor(actions, dtype=tf.int32),
                tf.convert_to_tensor(advantages, dtype=tf.float32),
                tf.convert_to_tensor(returns, dtype=tf.float32),
                tf.convert_to_tensor(logprobs, dtype=tf.float32))
    
    
exp_replay = ReplayBufferMultipleDatasetsPPO(replay_num_samples, dataset_names)

# Agent

In [6]:
class PPOAgent:
    def __init__(self, name, input_channels, n_actions):
        """A simple DQN agent"""

        frames = tf.keras.layers.Input(shape=(None, None, input_channels))
        x = tf.keras.layers.Conv2D(64, 3, strides=1, activation='relu')(frames)
        x = tf.keras.layers.Conv2D(64, 3, strides=1, activation='relu')(x)
        x = tf.keras.layers.Conv2D(64, 3, strides=1, activation='relu')(x)
        x = ROIEmbedding(n_bins=[(8,8),(4,4), (2,2), (1,1)])(x)
        c = tf.keras.layers.Dense(256, activation='relu')(x)
        critic_output = tf.keras.layers.Dense(1, activation='linear')(c)
        a = tf.keras.layers.Dense(256, activation='relu')(x)
        actor_output =  tf.keras.layers.Dense(n_actions, activation='linear')(a)
    
        self.actor = tf.keras.Model(inputs=frames, outputs=actor_output, name=name+'actor')
        self.critic = tf.keras.Model(inputs=frames, outputs=critic_output, name='critic')
        self.n_actions = n_actions
        self.logger = logging.getLogger(__name__)


    def sample_actions(self, states, greedy=False):
        """pick actions given qvalues. Uses epsilon-greedy exploration strategy. """
        logits = self.actor(states)
        self.logger.debug(f'Logits are {logits}')
        if greedy:
            softmax_prob = tf.nn.softmax(logits)
            self.logger.debug(f'Softmax probabilities are {softmax_prob}.')
            action = tf.math.argmax(softmax_prob, axis=-1)
            self.logger.debug(f'Chosen action due to high probability was {action}.')
        else:
            action = tf.squeeze(tf.random.categorical(logits, 1), axis=1)
            self.logger.debug(f'Random chosen action was {action}.')
        return logits, action

input_channels = 1
agent = PPOAgent(name="PPO", input_channels=input_channels, n_actions=n_actions)

try:
    agent.actor.load_weights(agents_path+'actor.chkpt')
    agent.critic.load_weights(agents_path+'critic.chkpt')
except:
    print('No saved model was found.')
print(agent.actor.summary())
print(agent.critic.summary())


No saved model was found.
Model: "PPOactor"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, None, None, 1)]   0         
                                                                 
 conv2d (Conv2D)             (None, None, None, 64)    640       
                                                                 
 conv2d_1 (Conv2D)           (None, None, None, 64)    36928     
                                                                 
 conv2d_2 (Conv2D)           (None, None, None, 64)    36928     
                                                                 
 roi_embedding (ROIEmbedding  (None, 5440)             0         
 )                                                               
                                                                 
 dense_2 (Dense)             (None, 256)               1392896   
                                

# Sample generators

In [7]:

def get_log_prob(logits, a):
    # Compute the log-probabilities of taking actions a by using the logits (i.e. the output of the actor)
    logprobabilities_all = tf.nn.log_softmax(logits)
    logprobability = tf.reduce_sum(tf.one_hot(a, n_actions) * logprobabilities_all, axis=1)
    return logprobability

def play_and_record(env, run_id, test_number, dataset_name, save_name, n_games=1, save_replay=False, greedy=False):
    logger = logging.getLogger(__name__)

    # initial state
    s = env.reset()
    rewards = []
    acc = []
    weights = []
    total_time = 0

    # Play the game for n_steps as per instructions above
    for game_id in range(n_games):
        start = datetime.now()
        for layer_number in range(1, len(env.layer_name_list)+1):
            tf.keras.backend.clear_session()

            # Get the current layer name
            current_layer_name = env.layer_name_list[env._layer_counter]
            # Choose action
            logits, action = agent.sample_actions(s, greedy=greedy)
            logger.debug(f'Action for layer {current_layer_name} layer is {action}')
            new_s, r, done, info = env.step(action[0].numpy())
            logger.debug(f'Iteration {game_id} - Layer {current_layer_name} {layer_number}/{len(env.layer_name_list)}\tChosen action {action} has {r} reward.')
            logger.debug(info)
            
            new_s = env.get_state('current_state')
            if new_s is None:
                value_next_s = 0
            else:
                value_next_s = agent.critic(new_s)
            
            if save_replay:
                log_probability = get_log_prob(logits, action)
                exp_replay.add(s, action, r, value_next_s, log_probability, dataset_name)
            
            s = new_s

            if done:
                s = env.reset()
                break

        exp_replay.finish_trajectory(last_value=0, dataset_name=dataset_name)

        actions = info['actions']
        # Convert actions to str in one column.
        info['actions'] = ','.join(map(str, actions))
        info['run_id'] = run_id
        info['test_number'] = test_number
        info['game_id'] = game_id
        info['dataset'] = dataset_name
        del info['layer_name']
        reward = info['reward']

        rewards.append(reward)
        acc.append(info['test_acc_after'])
        weights.append(info['weights_after'])
        new_row = pd.DataFrame(info, index=[0])
        new_row.to_csv(save_name, mode='a', index=False)
        end = datetime.now()
        time_diff = (end - start).total_seconds()
        total_time += time_diff
        logger.info(f'Took {time_diff} seconds for one compression.')

    logger.info(f'Evaluation of {n_games} took {total_time} secs. An average of {total_time/n_games} secs per game.')

    return np.mean(rewards), np.mean(acc), np.mean(weights)

# Training function

In [8]:
policy_optimizer = tf.keras.optimizers.Adam(policy_learning_rate, clipvalue=1.0)
value_function_optimizer = tf.keras.optimizers.Adam(value_function_learning_rate,clipvalue=1.0)


@tf.function#(experimental_relax_shapes=True)
def train_policy(states, actions, log_probabilities, advantages):
    with tf.GradientTape() as tape:
        logits = agent.actor(states)
        ratio = tf.exp(get_log_prob(logits, actions)-log_probabilities)
        min_advantage = tf.where(advantages>0, (1+clip_ratio)* advantages, (1-clip_ratio)* advantages )
        new_adv = tf.minimum(ratio*advantages, min_advantage)
        entropy = - tf.reduce_sum(tf.nn.softmax(logits) * tf.nn.log_softmax(logits))
        policy_loss = - tf.reduce_mean(new_adv) + entropy
    
    gradients = tape.gradient(policy_loss, agent.actor.trainable_weights)
    policy_optimizer.apply_gradients(zip(gradients, agent.actor.trainable_weights))

    kl = tf.reduce_mean(log_probabilities - get_log_prob(agent.actor(states), actions))
    kl = tf.reduce_sum(kl)
    return policy_loss, kl

@tf.function#(experimental_relax_shapes=True)
def train_value_function(states, returns):
    with tf.GradientTape() as tape:
        value_loss = tf.reduce_mean((returns - agent.critic(states))**2)

    gradients = tape.gradient(value_loss, agent.critic.trainable_weights)
    value_function_optimizer.apply_gradients(zip(gradients, agent.critic.trainable_weights))


# Training loop

In [9]:
np.seterr(divide='ignore', invalid='ignore')


num_datasets = len(dataset_names)

num_tests = (rl_iterations//test_frequency) + 1

weights_history_tests = np.zeros(shape=(num_tests, num_datasets))
acc_history_tests = np.zeros(shape=(num_tests, num_datasets))
rw_history_tests = np.zeros(shape=(num_tests, num_datasets))
test_counter = 1

policy_losses = np.zeros(rl_iterations*train_policy_iterations, dtype=np.float32)

for idx, env in enumerate(envs):
    weights_history_tests[0, idx ] = env.weights_before
    acc_history_tests[0, idx] = env.test_acc_before

with tqdm(total=rl_iterations,
      bar_format="{l_bar}{bar}|{n}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}, Last 3 RW: {postfix[0][0]:.2f}, {postfix[0][1]:.2f} & {postfix[0][2]:.2f} W: {postfix[1][0]:.2f}, {postfix[1][1]:.2f} & {postfix[1][2]:.2f} Acc: {postfix[2][0]:.2f}, {postfix[2][1]:.2f} & {postfix[2][2]:.2f}].",
      postfix=[
         dict({0: 0, 1: 0, 2: np.mean(rw_history_tests[0])}),
         dict({0: 0, 1: 0, 2: np.mean(acc_history_tests[0])}),
         dict({0: 0, 1: 0, 2: np.mean(weights_history_tests[0])})]) as t:
    
   for i in range(1, rl_iterations+1):
      for idx, env in enumerate(envs):
         dataset_name = dataset_names[idx]
         logger.info(f'Generating samples for dataset {dataset_name}.')
         rw, acc, weights = play_and_record(env, run_id=run_id, test_number=i, dataset_name=dataset_name, save_name=exploration_filename, n_games=n_games_training, greedy=False,  save_replay=True)
         logger.info(f'Training stats for dataset {dataset_name} were rw:{rw}, acc:{acc}, w:{weights}.')
      
      
      # Train actor
      for idx_policy_loss in range(train_policy_iterations):
         states, actions, advantages, returns, logprobs = exp_replay.sample(rl_batch_size)
         policy_loss, kl = train_policy(states=states, actions=actions, log_probabilities=logprobs, advantages=advantages)
         policy_losses[i*train_policy_iterations + idx_policy_loss] = policy_loss
         if kl > 1.5 * target_kl:
            break
      
      # Train critic
      for _ in range(train_value_iterations):
         states, _, _, returns, _ = exp_replay.sample(rl_batch_size)
         train_value_function(states=states, returns=returns)
   

      # Clear replay data.
      exp_replay.reset()

      if i % test_frequency == 0:
         logger.info(f'Testing datasets.')
         for idx, env in enumerate(envs):
               dataset_name = dataset_names[idx]
               logger.info(f'Testing agent for dataset {dataset_name}.')
               rw, acc, weights = play_and_record(env, run_id=run_id, test_number=i, dataset_name=dataset_name,save_name=test_filename,n_games=n_games_testing, greedy=True, save_replay=False)
               logger.info(f'Testing stats for dataset {dataset_name} were rw:{rw}, acc:{acc}, w:{weights}.')
               rw_history_tests[test_counter, idx] = rw
               acc_history_tests[test_counter, idx] = acc
               weights_history_tests[test_counter, idx] = weights
         
         agent.actor.save_weights(agents_path+'actor.chkpt')
         agent.critic.save_weights(agents_path+'critic.chkpt')

         t.postfix[0][2] = np.mean(rw_history_tests[test_counter])
            
            
         try:
               t.postfix[0][1] = np.mean(rw_history_tests[test_counter-1])
         except IndexError:
               t.postfix[0][1] = 0
         try:
               t.postfix[0][0] =  np.mean(rw_history_tests[test_counter-2])
         except IndexError:
               t.postfix[0][0] = 0

         t.postfix[1][2] = np.mean(weights_history_tests[test_counter])
         try:
               t.postfix[1][1] = np.mean(weights_history_tests[test_counter-1])
         except IndexError:
               t.postfix[1][1] = 0
         try:
               t.postfix[1][0] = np.mean(weights_history_tests[test_counter-2])
         except IndexError:
               t.postfix[1][0] = 0

         t.postfix[2][2] = np.mean(acc_history_tests[test_counter])
         try:
               t.postfix[2][1] = np.mean(acc_history_tests[test_counter-1])
         except IndexError:
               t.postfix[2][1] = 0
         try:
               t.postfix[2][0] = np.mean(acc_history_tests[test_counter-2])
         except IndexError:
               t.postfix[2][0] = 0

         
                    
         #clear_output(True)

         test_counter += 1
         fig = plt.figure(figsize=(12,6))
         ax1 = fig.add_subplot(131)
         ax2 = fig.add_subplot(132)
         ax3 = fig.add_subplot(133)
         ax1.title.set_text('Accuracy')
         for idx, dataset_name in enumerate(dataset_names):
               ax1.plot(acc_history_tests[:test_counter, idx])
         ax1.legend(dataset_names)
         ax2.title.set_text('Weights')
         for idx, dataset_name in enumerate(dataset_names):
               ax2.plot(weights_history_tests[:test_counter, idx])
         ax2.legend(dataset_names)
         ax3.title.set_text('Reward')
         for idx, dataset_name in enumerate(dataset_names):
               ax3.plot(rw_history_tests[:test_counter, idx])
         ax3.legend(dataset_names)
         plt.xlabel('Epochs')
         plt.savefig(data_path + f'figures\\{agent_name}.png', dpi=1200)
         plt.close()

      t.update()
    
         

  2%|▏         |17/1000 [1:08:27<65:58:53, 241.64s/it, Last 3 RW: 0.10, 0.10 & 0.10 W: 61253.00, 61253.00 & 61253.00 Acc: 0.54, 0.54 & 0.54].


KeyboardInterrupt: 