# INVASE algoritmus (KERAS változat)


Reference: Jinsung Yoon, James Jordon, Mihaela van der Schaar, 
           "IINVASE: Instance-wise Variable Selection using Neural Networks," 

           International Conference on Learning Representations (ICLR), 2019.

Paper Link: https://openreview.net/forum?id=BJg_roAcK7

Contact: jsyoon0823@gmail.com

# **Data generation**


---



In [None]:
# Necessary packages
import numpy as np 

In [None]:
"""
Generating 6 synthetic datasets

x ~ N(0,I) where d = 11 or 100

y = 1/(1+logit) where logit for each synthetic dataset is
- syn1: logit = exp(x1 * x2)
- syn2: logit = exp(x3^2 + x4^2 + x5^2 + x6^2 -4)
- syn3: logit = -10 sin(2 * x7) + 2|x8| + x9 + exp(-x10) - 2.4
- syn4: If x11 < 0, follows syn1, else if x11 >= 0, follows syn2
- syn5: If x11 < 0, follows syn1, else if x11 >= 0, follows syn3
- syn6: If x11 < 0, follows syn2, else if x11 >= 0, follows syn3
"""

def generate_x (n, dim):
  x = np.random.randn(n, dim)
  return x

def generate_y (x, data_type):
  # number of samples
  n = x.shape[0]
    
  # Logit computation
  if data_type == 'syn1':
    logit = np.exp(x[:, 0]*x[:, 1])
  elif data_type == 'syn2':       
    logit = np.exp(np.sum(x[:, 2:6]**2, axis = 1) - 4.0) 
  elif data_type == 'syn3':
    logit = np.exp(-10 * np.sin(0.2*x[:, 6]) + abs(x[:, 7]) + \
                   x[:, 8] + np.exp(-x[:, 9])  - 2.4)     
  elif data_type == 'syn4':
    logit1 = np.exp(x[:, 0]*x[:, 1])
    logit2 = np.exp(np.sum(x[:, 2:6]**2, axis = 1) - 4.0) 
  elif data_type == 'syn5':
    logit1 = np.exp(x[:, 0]*x[:, 1])
    logit2 = np.exp(-10 * np.sin(0.2*x[:, 6]) + abs(x[:, 7]) + \
                    x[:, 8] + np.exp(-x[:, 9]) - 2.4) 
  elif data_type == 'syn6':
    logit1 = np.exp(np.sum(x[:,2:6]**2, axis = 1) - 4.0) 
    logit2 = np.exp(-10 * np.sin(0.2*x[:, 6]) + abs(x[:, 7]) + \
                    x[:, 8] + np.exp(-x[:, 9]) - 2.4) 
    
  # For syn4, syn5 and syn6 only
  if data_type in ['syn4', 'syn5', 'syn6']:
    # Based on X[:,10], combine two logits        
    idx1 = (x[:, 10]< 0)*1
    idx2 = (x[:, 10]>=0)*1    
    logit = logit1 * idx1 + logit2 * idx2    
        
  # Compute P(Y=0|X)
  prob_0 = np.reshape((logit / (1+logit)), [n, 1])
    
  # Sampling process
  y = np.zeros([n, 2])
  y[:, 0] = np.reshape(np.random.binomial(1, prob_0), [n,])
  y[:, 1] = 1-y[:, 0]

  return y

def generate_ground_truth(x, data_type):

  # Number of samples and features
  n, d = x.shape

  # Output initialization
  ground_truth = np.zeros([n, d])
        
  # For each data_type
  if data_type == 'syn1':
    ground_truth[:, :2] = 1
  elif data_type == 'syn2':
    ground_truth[:, 2:6] = 1
  elif data_type == 'syn3':
    ground_truth[:, 6:10] = 1
        
  # Index for syn4, syn5 and syn6
  if data_type in ['syn4', 'syn5', 'syn6']:        
    idx1 = np.where(x[:, 10]<0)[0]
    idx2 = np.where(x[:, 10]>=0)[0]
    ground_truth[:, 10] = 1
        
  if data_type == 'syn4':        
    ground_truth[idx1, :2] = 1
    ground_truth[idx2, 2:6] = 1
  elif data_type == 'syn5':        
    ground_truth[idx1, :2] = 1
    ground_truth[idx2, 6:10] = 1
  elif data_type == 'syn6':        
    ground_truth[idx1, 2:6] = 1
    ground_truth[idx2, 6:10] = 1
        
  return ground_truth

    
def generate_dataset(n = 10000, dim = 11, data_type = 'syn1', seed = 0):

  # Seed
  np.random.seed(seed)

  # x generation
  x = generate_x(n, dim)
  # y generation
  y = generate_y(x, data_type)
  # ground truth generation
  ground_truth = generate_ground_truth(x, data_type)
  
  return x, y, ground_truth

# **Utility functions**
(1) Feature performance metrics

(2) Prediction performance metrics

(3) Bernoulli sampling


---



In [None]:
# Necessary packages
from sklearn.metrics import roc_auc_score, average_precision_score, accuracy_score

In [None]:
def feature_performance_metric (ground_truth, importance_score):

  n = importance_score.shape[0]
  
  tpr = np.zeros([n, ])
  fdr = np.zeros([n, ])

  # For each sample
  for i in range(n):    
    # tpr   
    tpr_nom = np.sum(importance_score[i, :] * ground_truth[i, :])
    tpr_den = np.sum(ground_truth[i, :])
    tpr[i] = 100 * float(tpr_nom)/float(tpr_den + 1e-8)
        
    # fdr
    fdr_nom = np.sum(importance_score[i, :] * (1-ground_truth[i, :]))
    fdr_den = np.sum(importance_score[i,:])
    fdr[i] = 100 * float(fdr_nom)/float(fdr_den+1e-8)
    
  mean_tpr = np.mean(tpr)
  std_tpr = np.std(tpr)
  mean_fdr = np.mean(fdr)
  std_fdr = np.std(fdr)  
  
  return mean_tpr, std_tpr, mean_fdr, std_fdr

In [None]:
def prediction_performance_metric (y_test, y_hat):
  
  auc = roc_auc_score (y_test[:, 1], y_hat[:, 1])
  apr = average_precision_score (y_test[:, 1], y_hat[:, 1])
  acc = accuracy_score (y_test[:, 1], 1.*(y_hat[:, 1] > 0.5))
  
  return auc, apr, acc

In [None]:
def bernoulli_sampling (prob):

  n, d = prob.shape
  samples = np.random.binomial(1, prob, (n, d))
        
  return samples

# **INVASE Modole**

---



In [None]:
# Necessary packages
from keras.layers import Input, Dense, Multiply
from keras.layers import BatchNormalization
from keras.models import Sequential, Model
from keras.optimizers import Adam
from keras import regularizers
from keras import backend as K

import tensorflow as tf
import numpy as np

In [None]:
class invase():

  def __init__(self, x_train, y_train, model_type, model_parameters):
    
    self.lamda = model_parameters['lamda']
    self.actor_h_dim = model_parameters['actor_h_dim']
    self.critic_h_dim = model_parameters['critic_h_dim']
    self.n_layer = model_parameters['n_layer']
    self.batch_size = model_parameters['batch_size']
    self.iteration = model_parameters['iteration']
    self.activation = model_parameters['activation']
    self.learning_rate = model_parameters['learning_rate']
    
    self.dim = x_train.shape[1] 
    self.label_dim = y_train.shape[1]
    
    self.model_type = model_type

    optimizer = Adam(self.learning_rate)
        
    # Build and compile critic
    self.critic = self.build_critic()
    self.critic.compile(loss='categorical_crossentropy', 
                        optimizer=optimizer, metrics=['acc'])

    # Build and compile the actor
    self.actor = self.build_actor()
    self.actor.compile(loss=self.actor_loss, optimizer=optimizer)

    if self.model_type == 'invase':
      # Build and compile the baseline
      self.baseline = self.build_baseline()
      self.baseline.compile(loss='categorical_crossentropy', 
                            optimizer=optimizer, metrics=['acc'])


  def actor_loss(self, y_true, y_pred):

    # Actor output
    actor_out = y_true[:, :self.dim]
    # Critic output
    critic_out = y_true[:, self.dim:(self.dim+self.label_dim)]
    
    if self.model_type == 'invase':
      # Baseline output
      baseline_out = \
      y_true[:, (self.dim+self.label_dim):(self.dim+2*self.label_dim)]
      # Ground truth label
      y_out = y_true[:, (self.dim+2*self.label_dim):]        
    elif self.model_type == 'invase_minus':
      # Ground truth label
      y_out = y_true[:, (self.dim+self.label_dim):]         
        
    # Critic loss
    critic_loss = -tf.reduce_sum(y_out * tf.math.log(critic_out + 1e-8), axis = 1)  

    if self.model_type == 'invase':        
      # Baseline loss
      baseline_loss = -tf.reduce_sum(y_out * tf.math.log(baseline_out + 1e-8), 
                                     axis = 1)  
      # Reward
      Reward = -(critic_loss - baseline_loss)
    elif self.model_type == 'invase_minus':
      Reward = -critic_loss

    # Policy gradient loss computation. 
    custom_actor_loss = \
    Reward * tf.reduce_sum(actor_out * K.log(y_pred + 1e-8) + \
    (1-actor_out) * K.log(1-y_pred + 1e-8), axis = 1) - \
   self.lamda * tf.reduce_mean(y_pred, axis = 1)
        
    # custom actor loss
    custom_actor_loss = tf.reduce_mean(-custom_actor_loss)

    return custom_actor_loss


  def build_actor(self):
 
    actor_model = Sequential()    
    actor_model.add(Dense(self.actor_h_dim, activation=self.activation, 
                          kernel_regularizer=regularizers.l2(1e-3), 
                          input_dim = self.dim))
    for _ in range(self.n_layer - 2):
      actor_model.add(Dense(self.actor_h_dim, activation=self.activation, 
                            kernel_regularizer=regularizers.l2(1e-3)))
    actor_model.add(Dense(self.dim, activation = 'sigmoid', 
                          kernel_regularizer=regularizers.l2(1e-3)))

    feature = Input(shape=(self.dim,), dtype='float32')
    selection_probability = actor_model(feature)

    return Model(feature, selection_probability)


  def build_critic (self):
  
    critic_model = Sequential()
                
    critic_model.add(Dense(self.critic_h_dim, activation=self.activation, 
                           kernel_regularizer=regularizers.l2(1e-3), 
                           input_dim = self.dim)) 
    critic_model.add(BatchNormalization())
    for _ in range(self.n_layer - 2):
      critic_model.add(Dense(self.critic_h_dim, activation=self.activation, 
                             kernel_regularizer=regularizers.l2(1e-3)))
      critic_model.add(BatchNormalization())
    critic_model.add(Dense(self.label_dim, activation ='softmax', 
                           kernel_regularizer=regularizers.l2(1e-3)))
        
    ## Inputs
    # Features
    feature = Input(shape=(self.dim,), dtype='float32')
    # Binary selection
    selection = Input(shape=(self.dim,), dtype='float32')         
        
    # Element-wise multiplication
    critic_model_input = Multiply()([feature, selection])
    y_hat = critic_model(critic_model_input)

    return Model([feature, selection], y_hat)
        

  def build_baseline (self):

    baseline_model = Sequential()
                
    baseline_model.add(Dense(self.critic_h_dim, activation=self.activation, 
                           kernel_regularizer=regularizers.l2(1e-3), 
                           input_dim = self.dim)) 
    baseline_model.add(BatchNormalization())
    for _ in range(self.n_layer - 2):
      baseline_model.add(Dense(self.critic_h_dim, activation=self.activation, 
                               kernel_regularizer=regularizers.l2(1e-3)))
      baseline_model.add(BatchNormalization())
    baseline_model.add(Dense(self.label_dim, activation ='softmax', 
                             kernel_regularizer=regularizers.l2(1e-3)))
            
    # Input
    feature = Input(shape=(self.dim,), dtype='float32')       
    # Output        
    y_hat = baseline_model(feature)

    return Model(feature, y_hat)


  def train(self, x_train, y_train):

    for iter_idx in range(self.iteration):

      ## Train critic
      # Select a random batch of samples
      idx = np.random.randint(0, x_train.shape[0], self.batch_size)
      x_batch = x_train[idx,:]
      y_batch = y_train[idx,:]

      # Generate a batch of selection probability
      selection_probability = self.actor.predict(x_batch)            
      # Sampling the features based on the selection_probability
      selection = bernoulli_sampling(selection_probability)     
      # Critic loss
      critic_loss = self.critic.train_on_batch([x_batch, selection], y_batch)                        
      # Critic output
      critic_out = self.critic.predict([x_batch, selection])
         
      # Baseline output
      if self.model_type == 'invase':   
        # Baseline loss
        baseline_loss = self.baseline.train_on_batch(x_batch, y_batch)                        
        # Baseline output
        baseline_out = self.baseline.predict(x_batch)
            
      ## Train actor
      # Use multiple things as the y_true: 
      # - selection, critic_out, baseline_out, and ground truth (y_batch)
      if self.model_type == 'invase':
        y_batch_final = np.concatenate((selection, 
                                        np.asarray(critic_out), 
                                        np.asarray(baseline_out), 
                                        y_batch), axis = 1)
      elif self.model_type == 'invase_minus':
        y_batch_final = np.concatenate((selection, 
                                        np.asarray(critic_out), 
                                        y_batch), axis = 1)
        
      # Train the actor
      actor_loss = self.actor.train_on_batch(x_batch, y_batch_final)

      if self.model_type == 'invase':
        # Print the progress
        dialog = 'Iterations: ' + str(iter_idx) + \
                 ', critic accuracy: ' + str(critic_loss[1]) + \
                 ', baseline accuracy: ' + str(baseline_loss[1]) + \
                 ', actor loss: ' + str(np.round(actor_loss,4))
      elif self.model_type == 'invase_minus':
        # Print the progress
        dialog = 'Iterations: ' + str(iter_idx) + \
                 ', critic accuracy: ' + str(critic_loss[1]) + \
                 ', actor loss: ' + str(np.round(actor_loss,4))

      if iter_idx % 100 == 0:
        print(dialog)
    
      
  def importance_score(self, x):
  
    feature_importance = self.actor.predict(x)        
    return np.asarray(feature_importance)
     

  def predict(self, x):
        
    # Generate a batch of selection probability
    selection_probability = self.actor.predict(x)            
    # Sampling the features based on the selection_probability
    selection = bernoulli_sampling(selection_probability)   
    # Prediction 
    y_hat = self.critic.predict([x, selection])
     
    return np.asarray(y_hat)


# **Main**


---

(1) Data generation

(2) Train INVASE or INVASE-

(3) Evaluate INVASE on ground truth feature importance and prediction

In [None]:
# Necessary packages
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse

In [None]:
def main (args):
  """Main function for INVASE.
  
  Args:
    - data_type: synthetic data type (syn1 to syn6)
    - train_no: the number of samples for training set
    - train_no: the number of samples for testing set
    - dim: the number of features
    - model_type: invase or invase_minus
    - model_parameters:
      - actor_h_dim: hidden state dimensions for actor
      - critic_h_dim: hidden state dimensions for critic
      - n_layer: the number of layers
      - batch_size: the number of samples in mini batch
      - iteration: the number of iterations
      - activation: activation function of models
      - learning_rate: learning rate of model training
      - lamda: hyper-parameter of INVASE
    
  Returns:
    - performance:
      - mean_tpr: mean value of true positive rate
      - std_tpr: standard deviation of true positive rate
      - mean_fdr: mean value of false discovery rate
      - std_fdr: standard deviation of false discovery rate
      - auc: area under roc curve
      - apr: average precision score
      - acc: accuracy
  """
  
  # Generate dataset
  x_train, y_train, g_train = generate_dataset (n = args.train_no, 
                                                dim = args.dim, 
                                                data_type = args.data_type, 
                                                seed = 0)
  
  x_test, y_test, g_test = generate_dataset (n = args.test_no,
                                             dim = args.dim, 
                                             data_type = args.data_type, 
                                             seed = 0)
  
  model_parameters = {'lamda': args.lamda,
                      'actor_h_dim': args.actor_h_dim, 
                      'critic_h_dim': args.critic_h_dim,
                      'n_layer': args.n_layer,
                      'batch_size': args.batch_size,
                      'iteration': args.iteration, 
                      'activation': args.activation, 
                      'learning_rate': args.learning_rate}
  
  # Train the model
  model = invase(x_train, y_train, args.model_type, model_parameters)
 
  model.train(x_train, y_train)    
    
  ## Evaluation
  # Compute importance score
  g_hat = model.importance_score(x_test)
  importance_score = 1.*(g_hat > 0.5)
    
  # Evaluate the performance of feature importance
  mean_tpr, std_tpr, mean_fdr, std_fdr = \
  feature_performance_metric(g_test, importance_score)
   
  # Print the performance of feature importance    
  print('TPR mean: ' + str(np.round(mean_tpr,1)) + '\%, ' + \
        'TPR std: ' + str(np.round(std_tpr,1)) + '\%, ')
  print('FDR mean: ' + str(np.round(mean_fdr,1)) + '\%, ' + \
        'FDR std: ' + str(np.round(std_fdr,1)) + '\%, ')
  
  # Predict labels
  y_hat = model.predict(x_test)
    
  # Evaluate the performance of feature importance
  auc, apr, acc = prediction_performance_metric(y_test, y_hat)
   
  # Print the performance of feature importance    
  print('AUC: ' + str(np.round(auc, 3)) + \
        ', APR: ' + str(np.round(apr, 3)) + \
        ', ACC: ' + str(np.round(acc, 3)))
  
  performance = {'mean_tpr': mean_tpr, 'std_tpr': std_tpr,
                 'mean_fdr': mean_fdr, 'std_fdr': std_fdr,
                 'auc': auc, 'apr': apr, 'acc': acc}
  
  return performance

In [None]:
#Paraméterel megadása a modellnek
class args_in:
  data_type: str
  train_no: int
  test_no: int
  dim : int
  model_type : str
  actor_h_dim : int
  critic_h_dim :int
  n_layer: int
  batch_size : int
  iteration : int
  activation : str
  learning_rate : float
  lamda : float


# Inputs for the main function
args_in.data_type = 'syn1'
args_in.train_no = 10000
args_in.test_no = 10000
args_in.dim = 11
args_in.model_type = 'invase'
args_in.actor_h_dim = 100
args_in.critic_h_dim = 200
args_in.n_layer = 3
args_in.batch_size = 1000
args_in.iteration = 10000
args_in.activation = 'relu'
args_in.learning_rate = 0.0001
args_in.lamda = 0.1

  
# Call main function  
performance = main(args_in)







[1;30;43mA streamkimeneten csak az utolsó 5000 sor látható.[0m
Iterations: 8400, critic accuracy: 0.6510000228881836, baseline accuracy: 0.9990000128746033, actor loss: -0.256
Iterations: 8500, critic accuracy: 0.6510000228881836, baseline accuracy: 1.0, actor loss: -0.306
Iterations: 8600, critic accuracy: 0.6320000290870667, baseline accuracy: 1.0, actor loss: -0.2261
Iterations: 8700, critic accuracy: 0.6349999904632568, baseline accuracy: 1.0, actor loss: -0.2165
Iterations: 8800, critic accuracy: 0.6589999794960022, baseline accuracy: 1.0, actor loss: -0.3351
Iterations: 8900, critic accuracy: 0.6240000128746033, baseline accuracy: 0.9990000128746033, actor loss: -0.2357
Iterations: 9000, critic accuracy: 0.6629999876022339, baseline accuracy: 0.9990000128746033, actor loss: -0.2537
Iterations: 9100, critic accuracy: 0.6359999775886536, baseline accuracy: 1.0, actor loss: -0.29
Iterations: 9200, critic accuracy: 0.656000018119812, baseline accuracy: 1.0, actor loss: -0.2401
Iter

# **Main**


---

(1) Data generation

(2) Train INVASE or INVASE-

(3) Evaluate INVASE on ground truth feature importance and prediction

In [None]:
# Necessary packages
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse

In [None]:
def main (args):
  """Main function for INVASE.
  
  Args:
    - data_type: synthetic data type (syn1 to syn6)
    - train_no: the number of samples for training set
    - train_no: the number of samples for testing set
    - dim: the number of features
    - model_type: invase or invase_minus
    - model_parameters:
      - actor_h_dim: hidden state dimensions for actor
      - critic_h_dim: hidden state dimensions for critic
      - n_layer: the number of layers
      - batch_size: the number of samples in mini batch
      - iteration: the number of iterations
      - activation: activation function of models
      - learning_rate: learning rate of model training
      - lamda: hyper-parameter of INVASE
    
  Returns:
    - performance:
      - mean_tpr: mean value of true positive rate
      - std_tpr: standard deviation of true positive rate
      - mean_fdr: mean value of false discovery rate
      - std_fdr: standard deviation of false discovery rate
      - auc: area under roc curve
      - apr: average precision score
      - acc: accuracy
  """
  
  # Generate dataset
  x_train, y_train, g_train = generate_dataset (n = args.train_no, 
                                                dim = args.dim, 
                                                data_type = args.data_type, 
                                                seed = 0)
  
  x_test, y_test, g_test = generate_dataset (n = args.test_no,
                                             dim = args.dim, 
                                             data_type = args.data_type, 
                                             seed = 0)
  
  model_parameters = {'lamda': args.lamda,
                      'actor_h_dim': args.actor_h_dim, 
                      'critic_h_dim': args.critic_h_dim,
                      'n_layer': args.n_layer,
                      'batch_size': args.batch_size,
                      'iteration': args.iteration, 
                      'activation': args.activation, 
                      'learning_rate': args.learning_rate}
  
  # Train the model
  model = invase(x_train, y_train, args.model_type, model_parameters)
 
  model.train(x_train, y_train)    
    
  ## Evaluation
  # Compute importance score
  g_hat = model.importance_score(x_test)
  importance_score = 1.*(g_hat > 0.5)
    
  # Evaluate the performance of feature importance
  mean_tpr, std_tpr, mean_fdr, std_fdr = \
  feature_performance_metric(g_test, importance_score)
   
  # Print the performance of feature importance    
  print('TPR mean: ' + str(np.round(mean_tpr,1)) + '\%, ' + \
        'TPR std: ' + str(np.round(std_tpr,1)) + '\%, ')
  print('FDR mean: ' + str(np.round(mean_fdr,1)) + '\%, ' + \
        'FDR std: ' + str(np.round(std_fdr,1)) + '\%, ')
  
  # Predict labels
  y_hat = model.predict(x_test)
    
  # Evaluate the performance of feature importance
  auc, apr, acc = prediction_performance_metric(y_test, y_hat)
   
  # Print the performance of feature importance    
  print('AUC: ' + str(np.round(auc, 3)) + \
        ', APR: ' + str(np.round(apr, 3)) + \
        ', ACC: ' + str(np.round(acc, 3)))
  
  performance = {'mean_tpr': mean_tpr, 'std_tpr': std_tpr,
                 'mean_fdr': mean_fdr, 'std_fdr': std_fdr,
                 'auc': auc, 'apr': apr, 'acc': acc}
  
  return performance

In [None]:
#Paraméterel megadása a modellnek
class args_in:
  data_type: str
  train_no: int
  test_no: int
  dim : int
  model_type : str
  actor_h_dim : int
  critic_h_dim :int
  n_layer: int
  batch_size : int
  iteration : int
  activation : str
  learning_rate : float
  lamda : float


# Inputs for the main function
args_in.data_type = 'syn1'
args_in.train_no = 10000
args_in.test_no = 10000
args_in.dim = 11
args_in.model_type = 'invase'
args_in.actor_h_dim = 100
args_in.critic_h_dim = 200
args_in.n_layer = 3
args_in.batch_size = 1000
args_in.iteration = 10000
args_in.activation = 'relu'
args_in.learning_rate = 0.0001
args_in.lamda = 0.1

  
# Call main function  
performance = main(args_in)







[1;30;43mA streamkimeneten csak az utolsó 5000 sor látható.[0m
Iterations: 8400, critic accuracy: 0.6510000228881836, baseline accuracy: 0.9990000128746033, actor loss: -0.256
Iterations: 8500, critic accuracy: 0.6510000228881836, baseline accuracy: 1.0, actor loss: -0.306
Iterations: 8600, critic accuracy: 0.6320000290870667, baseline accuracy: 1.0, actor loss: -0.2261
Iterations: 8700, critic accuracy: 0.6349999904632568, baseline accuracy: 1.0, actor loss: -0.2165
Iterations: 8800, critic accuracy: 0.6589999794960022, baseline accuracy: 1.0, actor loss: -0.3351
Iterations: 8900, critic accuracy: 0.6240000128746033, baseline accuracy: 0.9990000128746033, actor loss: -0.2357
Iterations: 9000, critic accuracy: 0.6629999876022339, baseline accuracy: 0.9990000128746033, actor loss: -0.2537
Iterations: 9100, critic accuracy: 0.6359999775886536, baseline accuracy: 1.0, actor loss: -0.29
Iterations: 9200, critic accuracy: 0.656000018119812, baseline accuracy: 1.0, actor loss: -0.2401
Iter