In [1]:
import pandas as pd
from keras.models import Sequential, Model
import keras.layers as layers
from keras.optimizers import Adam
from keras import backend as K
import numpy as np
import random
import os
import pdb
import matplotlib.pyplot as plt
from copy import deepcopy
import tensorflow as tf
import time
from tensorflow.python.client import device_lib
import sys
import keras
 
print("TF INFO:", device_lib.list_local_devices())
print('GPU INFO:', K.tensorflow_backend._get_available_gpus())

config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.9
K.set_session(tf.Session(config=config))
K.tensorflow_backend.set_session(tf.Session(config=config))


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


TF INFO: [name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6510251721243927723
]
GPU INFO: []


# DQN Algorithm

In [2]:
class DQNAgent:
    def __init__(self, df_batch, state_size, action_size,
                 minibatch_size=128, gamma=.95, lr=0.001, units=128, priority_aplha = 0.5, 
                 lookback=3, is_lstm=False, layers=4, bcq=0,
                 copy_online_to_target_ep=100, eval_after=100, mode="normal"):
        """
        creates a DQN Agent for batch learning
        param: df_batch is the batch data in MDP format
        param: state_size
        param: action_size
        param: minibatch_size 
        param: gamma
        param: lr
        param: units
        param: priority_aplha for Prioritized Experience Reply. 0 Makes it Vanilla Experience Reply
        param: copy_online_to_target_ep copies current network to terget network. meaningless for Double DQN
        param: eval_after 
        param: lookback how many historical states are inluced including the current one
        param: is_lstm is for layer type
        param: layers is for number of layers including output layer
        param: mode is for baselines. normal is no baseline. other options are: random, 0, 1, 2, 3
        
        """
        
        #adding priority as noise in batch
        df_batch.at[:, 'weight'] = 0.0
        for i, row in df_batch.iterrows():
            df_batch.at[i, 'priority'] = (np.random.uniform(0, 0.001))**priority_aplha
        
        # setting parameters
        self.state_size = state_size
        self.action_size = action_size
        self.batch = df_batch
        
        self.minibatch_size = minibatch_size
        self.gamma = gamma
        self.learning_rate = lr
        self.units = units
        self.priority_aplha = priority_aplha
        self.lookback = lookback
        self.is_lstm = is_lstm
        self.layers = layers
        if self.layers<3:
            print("MIN LAYERS SHOULD BE 3. FORCING 3 LAYERS (including output)")
            self.layers = 3
        self.bcq = bcq
        
        self.copy_online_to_target_ep = copy_online_to_target_ep
        self.eval_after = eval_after
        
        self.batch = self._setup_lookback_states(self.batch)
        
        self.mode = mode
        
        # setting up the models
        self.model_1 = self._build_model()
        self.model_2 = self._build_model()
        
        
        # evaluation variables
        self.ecrs = []
        self.IS = []
        self.WIS = []
        self.PDIS = []
        self.PDWIS = []
        self.DR = []
        self.remediations = []
    
    
    def _setup_lookback_states(self, df):
        curr_ep = -1
        for i, row in df.iterrows():
            if curr_ep!=row['episode_id']:
                curr_ep = row['episode_id']
                prevs = []
                for j in range(self.lookback-1):
                    prevs.append(np.full(shape=self.state_size, fill_value=0))
                state = row['state'] 
                prevs.append(state)
                pervs = deepcopy(prevs)
            df.at[i, 'state'] = np.array(prevs)
            prevs = deepcopy(prevs[1:])
            prevs.append(row['next_state'])
            pervs = deepcopy(prevs)
            df.at[i, 'next_state'] = np.array(prevs)
            prevs = deepcopy(prevs)    
        
        for i, row in df.iterrows():
            state, next_state = self.get_transformed_state(row)
            df.at[i, 'state'] = state
            df.at[i, 'next_state'] = next_state
        
        
        return df
    
    def get_transformed_state(self, row):
        if self.is_lstm:
            state = row['state'].reshape(1, self.lookback, self.state_size)
            next_state = row['next_state'].reshape(1, self.lookback, self.state_size)
        else:
            state = row['state'].reshape(1, self.state_size * self.lookback)
            next_state = row['next_state'].reshape(1, self.state_size * self.lookback)
        
        return state, next_state
    
    def _build_model(self):
        """
        Standard DQN model
        """
        model = Sequential()
        
        if self.is_lstm:
            # 1 layer
            model.add(layers.LSTM(self.units, input_shape=(self.lookback, self.state_size), 
                                  activation='relu', kernel_regularizer=keras.regularizers.l2(), 
                                  return_sequences=True, kernel_initializer='glorot_normal'))
            
            for i in range(self.layers-3):
                model.add(layers.LSTM(self.units, activation='relu', kernel_regularizer=keras.regularizers.l2(), 
                                      return_sequences=True, kernel_initializer='glorot_normal'))
            # 1 layer
            model.add(layers.LSTM(self.units, activation='relu', kernel_regularizer=keras.regularizers.l2(), 
                                  return_sequences=False, kernel_initializer='glorot_normal'))
        else:
            model.add(layers.Dense(self.units, input_dim=self.state_size * self.lookback, activation='relu', 
                                   kernel_regularizer=keras.regularizers.l2(), kernel_initializer='glorot_normal'))
            for i in range(self.layers-2):
                model.add(layers.Dense(self.units, activation='relu', 
                                       kernel_regularizer=keras.regularizers.l2(), kernel_initializer='glorot_normal'))
            
        # 1 layer
        model.add(layers.Dense(self.action_size, activation='linear', 
                               kernel_regularizer=keras.regularizers.l2(), kernel_initializer='glorot_normal'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate), metrics=[tf.keras.metrics.RootMeanSquaredError(), 'mae'])
        return model
    
    def act(self, state):
        act_values = self.model_2.predict(state)
        return np.argmax(act_values[0]), np.max(act_values[0])
    
    def state_value(self, state):
        act_values = self.model_2.predict(state)
        return np.sum(act_values[0]) 
    
    def q_value(self, state, action):
        act_values = self.model_2.predict(state)
        return act_values[0][action] 
    
    def _filter_bcq(self, row, ns_act_values):
        if self.bcq==0:
            return np.argmax(ns_act_values)
        
        gw = self.batch.loc[self.batch['cluster']==row['ns_cluster']].sample(100)
        gwa = gw.groupby(['action']).count()['episode_id'].tolist()
        gwap = np.array(gwa)/100
        
        for i, p in enumerate(gwap):
            if p<self.bcq:
                ns_act_values[i] = -9999
        
        return np.argmax(ns_act_values)
    
    def _fit_model(self, row):
        i = row.name
        state, action, reward, next_state, done = row['state'], row['action'], row['reward'], row['next_state'], row['done']
            
        target_q = reward
        
        if not done:    
            if self.mode=="normal":
                ns_act_values = self.model_1.predict(next_state)[0]
#                 a_prime = np.argmax(ns_act_values)
                a_prime = self._filter_bcq(row, ns_act_values)
            elif self.mode=="random":
                a_prime = np.random.choice(range(self.action_size))
            else:
                a_prime = int(self.mode)
            
            target_ns_act_values = self.model_2.predict(next_state)[0]
            target_ns_q = target_ns_act_values[a_prime]
                           

            target_q = reward + self.gamma*target_ns_q

            self.batch.loc[i, 'pred_action'] = a_prime
            self.batch.loc[i, 'pred_q'] = target_q
        
        target_f = self.model_1.predict(state)

        # Prioritized Experience Reply with noise
        
        self.batch.loc[i, 'priority'] = (abs(target_q - target_f[0][action]) + np.random.uniform(0, 0.001))**self.priority_aplha

        target_f[0][action] = target_q
        self.model_1.fit(state, target_f, epochs=1, verbose=0)
        
    
    def _learn_minibatch(self):
        priority_sum = self.batch['priority'].sum()
        self.batch['weight'] = self.batch['priority']/priority_sum
        minibatch = self.batch.sample(self.minibatch_size, weights=self.batch['weight'])
        minibatch.apply(self._fit_model, axis=1) 
            
    
    def ecr_reward(self):
        reward = 0.0
        count = 0
        for i, row in self.batch.loc[self.batch['transition_id']==0].iterrows():
            state = row['state']
            reward += self.act(state)[1]
            count += 1
            
        ecr = reward/count
        self.ecrs.append(ecr)
        return ecr

    
    def get_ips(self, action):
        if self.action_size==2:
            ips = 1.0/0.5
        else:
            if action == 0 or action == 1:
                ips = 1.0/0.1
            else:
                ips = 1.0/0.4
        return ips
        
    
    
    def get_eval(self):
        # set up roh with action_predicted
        self.batch['roh'] = -1.0
        curr_ep = -1
        for i, row in self.batch.iterrows():
            if row['episode_id']!=curr_ep:
                roh_t = 1
                curr_ep = row['episode_id']
            if row['action']!=row['pred_action']:
                ips = 0
            else:
                ips = self.get_ips(row['action'])
                
            roh_t *= ips
            self.batch.at[i, 'roh'] = roh_t
        
        total_eps = len(self.batch['episode_id'].unique())
        # equation found in emma brunskill's lecture note
        
        # as each roh is calculated multiplicatively, the last roh is the entire multiplication result
        # summing (gamma**t)*(R_t^i) will result to delayed reward
        # we can take the last roh (where done is true) and the delayed reward for each ep
        a_is = sum(self.batch.loc[self.batch['done']==True].apply(lambda x: x['roh'] * x['delayed_reward'], axis=1))
        isamp = a_is/total_eps
        
        # for weighted important samplng
        sum_roh = sum(self.batch.loc[self.batch['done']==True, 'roh'])
        if sum_roh==0:
            wis = 0
        else:
            wis = a_is/sum_roh
        
        pdis = 0
        for transition_id in self.batch['transition_id'].unique():
            d = self.batch.loc[self.batch['transition_id']==transition_id]
            a_pdis = (self.gamma**transition_id) * sum(d.apply(lambda x: x['roh'] * x['reward'], axis=1))
            pdis += (a_pdis/total_eps)
        
        curr_ep = -1
        trans = -1
        dr = 0
        pdwis = 0
        pdwis_nom = 0
        pdwis_denom = 0
        for i, row in self.batch.iterrows():
            if curr_ep!=row['episode_id']:
                curr_ep = row['episode_id']
                trans = 0
            if row['transition_id']!=trans:
                print("ERROR:", curr_ep, row['transition_id'], trans)
            
            gamma_t = self.gamma**row['transition_id']
            q_pi_e = self.q_value(row['state'], row['action'])
            v_pi_e = self.state_value(row['state'])
            roh_t_i = row['roh']
            if trans!=0:
                roh_t_sub_1_i = self.batch.loc[(self.batch['episode_id']==row['episode_id']) &
                                                  (self.batch['transition_id']==(trans-1)), 'roh'].tolist()[0]
            else:
                roh_t_sub_1_i = 1
            
            dr += ((gamma_t*roh_t_i*(row['reward'] - q_pi_e)) + (gamma_t*roh_t_sub_1_i*v_pi_e))
            
            # PDWIS
            sum_roh_t = float(sum(self.batch.loc[self.batch['transition_id']==row['transition_id']]['roh']))
            if sum_roh_t==0:
                w_t_i = 0
            else:
                w_t_i = row['roh']/sum_roh_t
            pdwis_nom += (w_t_i * gamma_t * row['reward'])
            pdwis_denom += (w_t_i * gamma_t)
            
            trans += 1
        
        dr = dr/total_eps
        pdwis = pdwis_nom/pdwis_denom
        
        
        self.IS.append(isamp)
        self.WIS.append(wis)
        self.PDIS.append(pdis)
        self.PDWIS.append(pdwis)
        self.DR.append(dr)
        
        return isamp, wis, pdis, pdwis, dr
        
    
    def predict(self):
        self.batch['pred_action'] = -1
        self.batch['pred_q'] = 0
        self.batch.apply(self._predict_row, axis=1)
    
    def _predict_row(self, row):
        i = row.name
        state = row['state']
        act, q = self.act(state)
        self.batch.loc[i, 'pred_action'] = act
        self.batch.loc[i, 'pred_q'] = q
    
    def learn(self, epoch):
        for i in range(epoch):
            self._learn_minibatch()
            
            if (i+1)%self.copy_online_to_target_ep==0:
                self.model_2.set_weights(self.model_1.get_weights())
            
            if (i+1)%self.eval_after==0:
                t1 = time.time()
                self.predict()
                
                ecr = self.ecr_reward()
                isamp, wis, pdis, pdwis, dr = self.get_eval()
                t2 = time.time()
                print("Eval Time:", (t2-t1))
                print("--epoch: {}/{} | ECR: {:.5f} | IS: {:.5f} | WIS: {:.5f} | PDIS: {:.5f} | PDWIS: {:.5f} | DR: {:.5f} --".format(i+1, epoch, ecr, isamp, wis, pdis, pdwis, dr))
                self.summary()
        
        self.model_2.set_weights(self.model_1.get_weights())
        self.predict()
                
    
    def get_all_eval_df(self):
        eval_df = pd.DataFrame(columns=['ECR', 'IS', 'WIS', 'PDIS', 'PDWIS', 'DR', 'REMEDIATION'])
        
        eval_df['ECR'] = self.ecrs
        eval_df['IS'] = self.IS
        eval_df['WIS'] = self.WIS
        eval_df['PDIS'] = self.PDIS
        eval_df['PDWIS'] = self.PDWIS
        eval_df['DR'] = self.DR
        eval_df['REMEDIATION'] = self.remediations
        
        return eval_df
    
    
    def summary(self):
        pred_const = len(self.batch.loc[self.batch['pred_action'] == 3]) 
        pred_active = len(self.batch.loc[self.batch['pred_action'] == 2])
        pred_pass = len(self.batch.loc[self.batch['pred_action'] == 1])
        pred_none = len(self.batch.loc[self.batch['pred_action'] == 0])

        self.remediations.append({"constructive": pred_const, "active": pred_active, "passive": pred_pass,
                                "none": pred_none})
        print("Pred-> Constructive: {}, Active: {}, Passive: {}, None: {}"
              .format(pred_const, pred_active, pred_pass, pred_none))
        
    
    

# Result

In [3]:
def summary_result(df):

    true_const = len(df.loc[df['action'] == 3])
    true_active = len(df.loc[df['action'] == 2])
    true_pass = len(df.loc[df['action'] == 1])
    true_none = len(df.loc[df['action'] == 0])
    
    pred_const = len(df.loc[df['pred_action'] == 3]) 
    pred_active = len(df.loc[df['pred_action'] == 2])
    pred_pass = len(df.loc[df['pred_action'] == 1])
    pred_none = len(df.loc[df['pred_action'] == 0])
     
    
    print("True-> Constructive: {}, Active: {}, Passive: {}, None: {}"
          .format(true_const, true_active, true_pass, true_none))
    print("Pred-> Constructive: {}, Active: {}, Passive: {}, None: {}"
          .format(pred_const, pred_active, pred_pass, pred_none))
    
    
    
    true_reward = df.loc[df['done']==True]['reward']
    pred_reward = df.loc[df['transition_id']==0]['pred_q']
    true_reward_mean = np.mean(true_reward)
    true_reward_std = np.std(true_reward)
    pred_reward_mean = np.mean(pred_reward)
    pred_reward_std = np.std(pred_reward)
    
    print("-> True Reward: {:.5f}/{:.5f}, Pred Reward: {:.5f}/{:.5f}".format(
        true_reward_mean, true_reward_std, pred_reward_mean, pred_reward_std))

    
    ret_dict = {"True": {"Constructive": true_const, "Active": true_active, "Passive": true_pass, 
                         "None": true_none, "RewardMean": true_reward_mean, "RewardStd": true_reward_std},
               "Pred": {"Constructive": pred_const, "Active": pred_active, "Passive": pred_pass, 
                         "None": pred_none, "RewardMean": pred_reward_mean, "RewardStd": pred_reward_std}
               }
    
    
    
    return ret_dict

# Run Program

In [6]:
df_org = pd.read_pickle('../temp/df_all_norm_cluster.pkl')
df = df_org.copy()
epoch = 20000
df['reward'] = df['delayed_reward']
result_dir = '../result/'
def run(agent, run_name, epoch):
    print("===== STARTING ", run_name, "=====")
    agent.learn(epoch)
    result_df = agent.batch
    eval_df = agent.get_all_eval_df()
    eval_df.to_pickle(result_dir + run_name +'_eval.pkl')
    summary_result(result_df)
    result_df.to_pickle(result_dir + run_name +'_result.pkl')

    return result_df, eval_df

for is_lstm in [True]:
    for lookback in [1]:
        for random_state in [1,2,3,4,5]:
            np.random.seed(random_state)
            random.seed(random_state)
            tf.set_random_seed(random_state)
            print("TF INFO:", device_lib.list_local_devices())
            print('GPU INFO:', K.tensorflow_backend._get_available_gpus())

            config = tf.ConfigProto(log_device_placement=True)
            config.gpu_options.per_process_gpu_memory_fraction = 0.9
            K.set_session(tf.Session(config=config))
            K.tensorflow_backend.set_session(tf.Session(config=config))

            
            run_name = "DQN_is_lstm_"+ str(is_lstm) + "_lookback_" + str(lookback) + "_run_" + str(epoch) + "_rs_" + str(random_state)
            agent = DQNAgent(df_batch=df.copy(), state_size=len(df.iloc[0]['state']), action_size=4, 
                             copy_online_to_target_ep=100, eval_after=100, bcq=0.1,
                             lookback=lookback, layers=4, is_lstm=is_lstm, mode="normal")
            result_df, eval_df = run(agent, run_name, epoch)



TF INFO: [name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1448305049376309946
]
GPU INFO: []
===== STARTING  DQN_is_lstm_True_lookback_1_run_20000_rs_1 =====
Eval Time: 23.228199243545532
--epoch: 100/20000 | ECR: 5.71830 | IS: 1.62567 | WIS: 63.33600 | PDIS: 1.62567 | PDWIS: 0.52367 | DR: 54.08142 --
Pred-> Constructive: 795, Active: 0, Passive: 1341, None: 2862
Eval Time: 23.105072259902954
--epoch: 200/20000 | ECR: 13.42545 | IS: 1.36899 | WIS: 66.67000 | PDIS: 1.36899 | PDWIS: 0.51939 | DR: 147.72490 --
Pred-> Constructive: 0, Active: 0, Passive: 1571, None: 3427
Eval Time: 23.135251998901367
--epoch: 300/20000 | ECR: 16.08883 | IS: 11.54320 | WIS: 63.34127 | PDIS: 10.96524 | PDWIS: 8.09803 | DR: 186.97350 --
Pred-> Constructive: 2029, Active: 0, Passive: 2969, None: 0
Eval Time: 23.255128145217896
--epoch: 400/20000 | ECR: 23.15277 | IS: 13.94529 | WIS: 60.70486 | PDIS: 13.19203 | PDWIS: 4.91045 | DR: 278.71897 --
Pred-> Constructive: 182

Eval Time: 23.495348930358887
--epoch: 3900/20000 | ECR: 44.86792 | IS: 6.79936 | WIS: 53.51576 | PDIS: 6.52196 | PDWIS: 25.51824 | DR: 357.53618 --
Pred-> Constructive: 697, Active: 0, Passive: 4173, None: 128
Eval Time: 23.504768133163452
--epoch: 4000/20000 | ECR: 45.42400 | IS: 25.08642 | WIS: 64.19486 | PDIS: 23.05495 | PDWIS: 7.75403 | DR: 592.49287 --
Pred-> Constructive: 2511, Active: 0, Passive: 2130, None: 357
Eval Time: 23.716893911361694
--epoch: 4100/20000 | ECR: 46.62831 | IS: 15.12631 | WIS: 66.96830 | PDIS: 14.14867 | PDWIS: 3.05442 | DR: 780.22356 --
Pred-> Constructive: 0, Active: 4685, Passive: 106, None: 207
Eval Time: 25.172689199447632
--epoch: 4200/20000 | ECR: 43.18216 | IS: 37.16895 | WIS: 68.67112 | PDIS: 32.94580 | PDWIS: 10.70768 | DR: 580.60529 --
Pred-> Constructive: 2088, Active: 1721, Passive: 0, None: 1189
Eval Time: 23.509796857833862
--epoch: 4300/20000 | ECR: 43.43054 | IS: 15.12631 | WIS: 71.00253 | PDIS: 14.14867 | PDWIS: 3.18752 | DR: 656.33333 --

Eval Time: 23.24097204208374
--epoch: 7800/20000 | ECR: 46.50555 | IS: 30.11683 | WIS: 67.00081 | PDIS: 26.78469 | PDWIS: 5.59874 | DR: 801.92532 --
Pred-> Constructive: 1007, Active: 2570, Passive: 1316, None: 105
Eval Time: 23.101923942565918
--epoch: 7900/20000 | ECR: 46.42586 | IS: 20.67336 | WIS: 74.75028 | PDIS: 18.97644 | PDWIS: 6.21954 | DR: 642.72002 --
Pred-> Constructive: 1167, Active: 2842, Passive: 231, None: 758
Eval Time: 23.274250030517578
--epoch: 8000/20000 | ECR: 47.54500 | IS: 31.98224 | WIS: 76.09333 | PDIS: 29.46989 | PDWIS: 6.58909 | DR: 726.17092 --
Pred-> Constructive: 2707, Active: 1337, Passive: 899, None: 55
Eval Time: 23.235345363616943
--epoch: 8100/20000 | ECR: 47.55012 | IS: 22.00004 | WIS: 76.35829 | PDIS: 20.22640 | PDWIS: 5.52371 | DR: 666.92026 --
Pred-> Constructive: 3470, Active: 121, Passive: 284, None: 1123
Eval Time: 23.29237723350525
--epoch: 8200/20000 | ECR: 48.38805 | IS: 31.27642 | WIS: 63.79736 | PDIS: 28.69280 | PDWIS: 4.85658 | DR: 766.7

Eval Time: 23.12685227394104
--epoch: 11700/20000 | ECR: 54.07612 | IS: 27.18717 | WIS: 73.17528 | PDIS: 25.06094 | PDWIS: 10.07452 | DR: 639.37466 --
Pred-> Constructive: 1909, Active: 958, Passive: 2043, None: 88
Eval Time: 23.193170070648193
--epoch: 11800/20000 | ECR: 52.26047 | IS: 30.61383 | WIS: 72.61581 | PDIS: 28.47739 | PDWIS: 7.85819 | DR: 684.92038 --
Pred-> Constructive: 3071, Active: 437, Passive: 1461, None: 29
Eval Time: 23.16294813156128
--epoch: 11900/20000 | ECR: 51.18113 | IS: 28.51315 | WIS: 77.27807 | PDIS: 26.34628 | PDWIS: 8.79722 | DR: 656.78468 --
Pred-> Constructive: 2187, Active: 867, Passive: 1446, None: 498
Eval Time: 23.16779112815857
--epoch: 12000/20000 | ECR: 54.31620 | IS: 33.59469 | WIS: 81.42142 | PDIS: 30.88688 | PDWIS: 7.87117 | DR: 723.72724 --
Pred-> Constructive: 2990, Active: 824, Passive: 293, None: 891
Eval Time: 23.11808204650879
--epoch: 12100/20000 | ECR: 56.15858 | IS: 36.64736 | WIS: 80.89412 | PDIS: 33.12160 | PDWIS: 13.47617 | DR: 679

Eval Time: 23.090989112854004
--epoch: 15600/20000 | ECR: 50.98170 | IS: 22.95046 | WIS: 71.38920 | PDIS: 21.15869 | PDWIS: 8.72598 | DR: 572.88458 --
Pred-> Constructive: 1998, Active: 1676, Passive: 1272, None: 52
Eval Time: 23.029202938079834
--epoch: 15700/20000 | ECR: 54.30980 | IS: 23.08245 | WIS: 70.39470 | PDIS: 21.12947 | PDWIS: 8.43606 | DR: 643.96321 --
Pred-> Constructive: 1697, Active: 1809, Passive: 1454, None: 38
Eval Time: 23.106637716293335
--epoch: 15800/20000 | ECR: 55.23373 | IS: 21.80466 | WIS: 61.89505 | PDIS: 20.08919 | PDWIS: 5.35422 | DR: 714.47442 --
Pred-> Constructive: 2219, Active: 2149, Passive: 612, None: 18
Eval Time: 23.024715900421143
--epoch: 15900/20000 | ECR: 55.29870 | IS: 15.09005 | WIS: 70.40820 | PDIS: 14.18457 | PDWIS: 4.02118 | DR: 732.40693 --
Pred-> Constructive: 2832, Active: 986, Passive: 1170, None: 10
Eval Time: 23.091798067092896
--epoch: 16000/20000 | ECR: 57.39984 | IS: 24.60619 | WIS: 72.21524 | PDIS: 22.59617 | PDWIS: 13.01149 | DR:

Eval Time: 23.073830127716064
--epoch: 19400/20000 | ECR: 55.10122 | IS: 27.50701 | WIS: 77.51705 | PDIS: 25.25355 | PDWIS: 6.78950 | DR: 694.57572 --
Pred-> Constructive: 3427, Active: 744, Passive: 290, None: 537
Eval Time: 23.028486013412476
--epoch: 19500/20000 | ECR: 56.20119 | IS: 25.40559 | WIS: 75.99246 | PDIS: 23.51556 | PDWIS: 8.36126 | DR: 622.83638 --
Pred-> Constructive: 2608, Active: 961, Passive: 1220, None: 209
Eval Time: 23.203238010406494
--epoch: 19600/20000 | ECR: 53.64435 | IS: 514.62584 | WIS: 62.98028 | PDIS: 347.78389 | PDWIS: 9.68571 | DR: 1255.56328 --
Pred-> Constructive: 3190, Active: 873, Passive: 895, None: 40
Eval Time: 23.127691984176636
--epoch: 19700/20000 | ECR: 55.37136 | IS: 43.97344 | WIS: 79.91629 | PDIS: 38.47946 | PDWIS: 8.45117 | DR: 782.66684 --
Pred-> Constructive: 2106, Active: 1554, Passive: 1305, None: 33
Eval Time: 23.023164987564087
--epoch: 19800/20000 | ECR: 56.23463 | IS: 27.31451 | WIS: 74.81007 | PDIS: 25.02977 | PDWIS: 9.62104 | DR

Eval Time: 23.57548689842224
--epoch: 3100/20000 | ECR: 42.23773 | IS: 0.29343 | WIS: 7.14500 | PDIS: 0.29343 | PDWIS: 0.10891 | DR: 783.84671 --
Pred-> Constructive: 1171, Active: 0, Passive: 3827, None: 0
Eval Time: 23.625163078308105
--epoch: 3200/20000 | ECR: 46.40624 | IS: 16.65128 | WIS: 68.83117 | PDIS: 15.30533 | PDWIS: 3.14628 | DR: 867.77574 --
Pred-> Constructive: 2621, Active: 1, Passive: 1490, None: 886
Eval Time: 23.573643922805786
--epoch: 3300/20000 | ECR: 51.55694 | IS: 1.69790 | WIS: 27.56250 | PDIS: 1.69790 | PDWIS: 0.34483 | DR: 590.27659 --
Pred-> Constructive: 2069, Active: 0, Passive: 2929, None: 0
Eval Time: 23.617965936660767
--epoch: 3400/20000 | ECR: 51.01484 | IS: 0.29343 | WIS: 7.14500 | PDIS: 0.29343 | PDWIS: 0.08804 | DR: 837.06058 --
Pred-> Constructive: 1179, Active: 0, Passive: 2608, None: 1211
Eval Time: 23.594804286956787
--epoch: 3500/20000 | ECR: 49.45610 | IS: 15.08583 | WIS: 69.76187 | PDIS: 13.81815 | PDWIS: 4.75971 | DR: 719.20088 --
Pred-> Con

Eval Time: 23.661354064941406
--epoch: 7000/20000 | ECR: 53.78802 | IS: 24.88313 | WIS: 84.11685 | PDIS: 23.13658 | PDWIS: 6.88654 | DR: 734.06697 --
Pred-> Constructive: 1440, Active: 1, Passive: 1046, None: 2511
Eval Time: 23.602411031723022
--epoch: 7100/20000 | ECR: 53.68912 | IS: 24.21385 | WIS: 78.77843 | PDIS: 22.44382 | PDWIS: 5.43559 | DR: 730.37998 --
Pred-> Constructive: 2929, Active: 0, Passive: 408, None: 1661
Eval Time: 23.62943696975708
--epoch: 7200/20000 | ECR: 54.12590 | IS: 25.03893 | WIS: 77.88557 | PDIS: 23.30384 | PDWIS: 5.92129 | DR: 765.80932 --
Pred-> Constructive: 1952, Active: 2, Passive: 1322, None: 1722
Eval Time: 23.60665798187256
--epoch: 7300/20000 | ECR: 60.81148 | IS: 26.35291 | WIS: 77.63397 | PDIS: 24.37432 | PDWIS: 6.04916 | DR: 834.71798 --
Pred-> Constructive: 2762, Active: 2, Passive: 617, None: 1617
Eval Time: 23.634979009628296
--epoch: 7400/20000 | ECR: 64.00617 | IS: 25.22694 | WIS: 80.06857 | PDIS: 23.38701 | PDWIS: 5.33284 | DR: 1056.34024 

Eval Time: 23.607611894607544
--epoch: 10900/20000 | ECR: 56.68507 | IS: 33.45228 | WIS: 81.32922 | PDIS: 30.76535 | PDWIS: 6.23240 | DR: 921.73530 --
Pred-> Constructive: 2269, Active: 164, Passive: 750, None: 1815
Eval Time: 23.62635588645935
--epoch: 11000/20000 | ECR: 56.56310 | IS: 22.52864 | WIS: 78.36748 | PDIS: 20.05944 | PDWIS: 4.09862 | DR: 892.57674 --
Pred-> Constructive: 1840, Active: 800, Passive: 438, None: 1920
Eval Time: 23.555312156677246
--epoch: 11100/20000 | ECR: 57.37336 | IS: 20.57150 | WIS: 77.62380 | PDIS: 19.03455 | PDWIS: 5.54005 | DR: 766.14390 --
Pred-> Constructive: 1367, Active: 592, Passive: 1220, None: 1819
Eval Time: 23.62041997909546
--epoch: 11200/20000 | ECR: 57.32335 | IS: 12.06919 | WIS: 70.97596 | PDIS: 10.93169 | PDWIS: 3.86500 | DR: 735.65898 --
Pred-> Constructive: 1322, Active: 650, Passive: 142, None: 2884
Eval Time: 23.555766105651855
--epoch: 11300/20000 | ECR: 53.80455 | IS: 16.95667 | WIS: 74.85912 | PDIS: 15.59618 | PDWIS: 4.17784 | DR:

Eval Time: 24.041085958480835
--epoch: 14700/20000 | ECR: 56.93413 | IS: 31.95864 | WIS: 77.45622 | PDIS: 29.27131 | PDWIS: 8.07386 | DR: 732.35642 --
Pred-> Constructive: 2130, Active: 559, Passive: 255, None: 2054
Eval Time: 23.672898054122925
--epoch: 14800/20000 | ECR: 55.12465 | IS: 11.22736 | WIS: 75.09322 | PDIS: 10.03523 | PDWIS: 4.12645 | DR: 794.04076 --
Pred-> Constructive: 812, Active: 1905, Passive: 284, None: 1997
Eval Time: 23.59976100921631
--epoch: 14900/20000 | ECR: 55.72088 | IS: 218.21888 | WIS: 61.69672 | PDIS: 179.89480 | PDWIS: 15.75444 | DR: 742.33478 --
Pred-> Constructive: 2435, Active: 676, Passive: 237, None: 1650
Eval Time: 23.684120893478394
--epoch: 15000/20000 | ECR: 55.61370 | IS: 22.77682 | WIS: 79.58608 | PDIS: 20.40119 | PDWIS: 4.57791 | DR: 741.53623 --
Pred-> Constructive: 2096, Active: 360, Passive: 154, None: 2388
Eval Time: 23.845811128616333
--epoch: 15100/20000 | ECR: 54.02284 | IS: 25.16545 | WIS: 78.90911 | PDIS: 22.81585 | PDWIS: 9.01342 | 

Eval Time: 23.67101216316223
--epoch: 18500/20000 | ECR: 58.42499 | IS: 8.92497 | WIS: 67.51786 | PDIS: 8.53249 | PDWIS: 2.58720 | DR: 642.44103 --
Pred-> Constructive: 1978, Active: 1143, Passive: 874, None: 1003
Eval Time: 23.72392201423645
--epoch: 18600/20000 | ECR: 59.27981 | IS: 6.04696 | WIS: 62.82387 | PDIS: 5.73071 | PDWIS: 2.02379 | DR: 587.37732 --
Pred-> Constructive: 1091, Active: 860, Passive: 1219, None: 1828
Eval Time: 23.717600107192993
--epoch: 18700/20000 | ECR: 58.99765 | IS: 14.45560 | WIS: 64.73448 | PDIS: 13.39205 | PDWIS: 4.15584 | DR: 686.01780 --
Pred-> Constructive: 1151, Active: 1586, Passive: 1568, None: 693
Eval Time: 23.72558307647705
--epoch: 18800/20000 | ECR: 56.20476 | IS: 14.84476 | WIS: 68.44402 | PDIS: 13.91412 | PDWIS: 3.73848 | DR: 803.89703 --
Pred-> Constructive: 1380, Active: 1164, Passive: 918, None: 1536
Eval Time: 23.69684910774231
--epoch: 18900/20000 | ECR: 57.02449 | IS: 7.91609 | WIS: 63.58990 | PDIS: 7.57949 | PDWIS: 1.82001 | DR: 686.

Eval Time: 24.61218023300171
--epoch: 2200/20000 | ECR: 48.01443 | IS: 12.63502 | WIS: 57.23959 | PDIS: 12.04072 | PDWIS: 4.86723 | DR: 516.07679 --
Pred-> Constructive: 1459, Active: 0, Passive: 2616, None: 923
Eval Time: 24.395246028900146
--epoch: 2300/20000 | ECR: 54.32758 | IS: 25.89296 | WIS: 55.88863 | PDIS: 23.80650 | PDWIS: 4.84184 | DR: 770.12749 --
Pred-> Constructive: 3583, Active: 0, Passive: 381, None: 1034
Eval Time: 24.465481996536255
--epoch: 2400/20000 | ECR: 54.24813 | IS: 25.89296 | WIS: 58.48057 | PDIS: 23.80650 | PDWIS: 5.46611 | DR: 757.50729 --
Pred-> Constructive: 3807, Active: 0, Passive: 23, None: 1168
Eval Time: 24.446573972702026
--epoch: 2500/20000 | ECR: 58.50730 | IS: 25.89296 | WIS: 58.48057 | PDIS: 23.80650 | PDWIS: 5.49163 | DR: 805.96035 --
Pred-> Constructive: 3598, Active: 0, Passive: 70, None: 1330
Eval Time: 24.4287531375885
--epoch: 2600/20000 | ECR: 57.12244 | IS: 0.29343 | WIS: 7.14500 | PDIS: 0.29343 | PDWIS: 0.12219 | DR: 648.67330 --
Pred->

Eval Time: 24.334674835205078
--epoch: 6100/20000 | ECR: 51.38121 | IS: 25.35821 | WIS: 76.14303 | PDIS: 23.50997 | PDWIS: 6.06890 | DR: 693.19177 --
Pred-> Constructive: 2892, Active: 0, Passive: 2098, None: 8
Eval Time: 24.247824907302856
--epoch: 6200/20000 | ECR: 52.35087 | IS: 25.35821 | WIS: 76.14303 | PDIS: 23.50997 | PDWIS: 5.84805 | DR: 742.34926 --
Pred-> Constructive: 3148, Active: 76, Passive: 1147, None: 627
Eval Time: 24.423388957977295
--epoch: 6300/20000 | ECR: 52.90328 | IS: 24.82359 | WIS: 69.70286 | PDIS: 22.84140 | PDWIS: 5.41026 | DR: 743.85647 --
Pred-> Constructive: 3572, Active: 839, Passive: 364, None: 223
Eval Time: 24.23771595954895
--epoch: 6400/20000 | ECR: 51.65062 | IS: 30.56378 | WIS: 72.60761 | PDIS: 27.52103 | PDWIS: 5.61670 | DR: 790.76060 --
Pred-> Constructive: 924, Active: 3515, Passive: 359, None: 200
Eval Time: 24.224169969558716
--epoch: 6500/20000 | ECR: 48.56059 | IS: 6.34086 | WIS: 61.76000 | PDIS: 6.12087 | PDWIS: 2.55220 | DR: 393.66196 --


Eval Time: 24.407958984375
--epoch: 10000/20000 | ECR: 57.27486 | IS: 20.50411 | WIS: 74.65794 | PDIS: 19.00223 | PDWIS: 5.68148 | DR: 641.59955 --
Pred-> Constructive: 2662, Active: 1825, Passive: 143, None: 368
Eval Time: 24.571167945861816
--epoch: 10100/20000 | ECR: 57.41568 | IS: 27.23191 | WIS: 75.37870 | PDIS: 24.95054 | PDWIS: 6.61217 | DR: 784.83920 --
Pred-> Constructive: 3285, Active: 806, Passive: 408, None: 499
Eval Time: 24.36076307296753
--epoch: 10200/20000 | ECR: 59.47313 | IS: 24.21024 | WIS: 75.76153 | PDIS: 22.41247 | PDWIS: 6.98109 | DR: 703.73726 --
Pred-> Constructive: 2963, Active: 1295, Passive: 127, None: 613
Eval Time: 24.221719980239868
--epoch: 10300/20000 | ECR: 56.21910 | IS: 23.64318 | WIS: 74.13588 | PDIS: 21.69360 | PDWIS: 4.76919 | DR: 909.33504 --
Pred-> Constructive: 2484, Active: 1425, Passive: 1, None: 1088
Eval Time: 24.27505612373352
--epoch: 10400/20000 | ECR: 53.97899 | IS: 25.60123 | WIS: 77.46981 | PDIS: 23.67014 | PDWIS: 6.37374 | DR: 699.1

Eval Time: 29.853420734405518
--epoch: 13900/20000 | ECR: 58.30970 | IS: 11.81392 | WIS: 63.05075 | PDIS: 11.23494 | PDWIS: 2.56060 | DR: 678.18430 --
Pred-> Constructive: 1544, Active: 2789, Passive: 74, None: 591
Eval Time: 29.775308847427368
--epoch: 14000/20000 | ECR: 57.38099 | IS: 7.36176 | WIS: 78.57918 | PDIS: 7.04893 | PDWIS: 2.72042 | DR: 545.46893 --
Pred-> Constructive: 2146, Active: 1708, Passive: 595, None: 549
Eval Time: 32.55334520339966
--epoch: 14100/20000 | ECR: 54.77070 | IS: 16.14795 | WIS: 71.89989 | PDIS: 14.97574 | PDWIS: 6.42607 | DR: 499.35198 --
Pred-> Constructive: 1927, Active: 1654, Passive: 182, None: 1235
Eval Time: 30.504772901535034
--epoch: 14200/20000 | ECR: 54.16268 | IS: 11.11192 | WIS: 66.60315 | PDIS: 10.62573 | PDWIS: 3.15777 | DR: 578.02794 --
Pred-> Constructive: 2115, Active: 1556, Passive: 3, None: 1324
Eval Time: 29.17105793952942
--epoch: 14300/20000 | ECR: 51.53104 | IS: 15.24012 | WIS: 73.75839 | PDIS: 14.38601 | PDWIS: 6.09461 | DR: 489

Eval Time: 28.872628927230835
--epoch: 17800/20000 | ECR: 59.63203 | IS: 13.25516 | WIS: 68.40013 | PDIS: 12.60318 | PDWIS: 3.73860 | DR: 761.61195 --
Pred-> Constructive: 1480, Active: 970, Passive: 1292, None: 1256
Eval Time: 28.515246152877808
--epoch: 17900/20000 | ECR: 64.05931 | IS: 17.64276 | WIS: 77.88802 | PDIS: 16.05489 | PDWIS: 5.98028 | DR: 736.85320 --
Pred-> Constructive: 2748, Active: 243, Passive: 1125, None: 882
Eval Time: 28.320116996765137
--epoch: 18000/20000 | ECR: 61.66707 | IS: 9.35597 | WIS: 15.21560 | PDIS: 8.32045 | PDWIS: 7.50240 | DR: 543.74973 --
Pred-> Constructive: 1937, Active: 794, Passive: 1368, None: 899
Eval Time: 28.066009759902954
--epoch: 18100/20000 | ECR: 58.69909 | IS: 9.78375 | WIS: 78.99995 | PDIS: 8.74822 | PDWIS: 7.56029 | DR: 529.97740 --
Pred-> Constructive: 1974, Active: 558, Passive: 1161, None: 1305
Eval Time: 28.16629409790039
--epoch: 18200/20000 | ECR: 59.59257 | IS: 13.57184 | WIS: 73.69460 | PDIS: 12.32376 | PDWIS: 6.01385 | DR: 5

Eval Time: 28.443772077560425
--epoch: 1500/20000 | ECR: 46.09816 | IS: 25.89296 | WIS: 58.48057 | PDIS: 23.80650 | PDWIS: 5.95308 | DR: 671.66443 --
Pred-> Constructive: 3153, Active: 0, Passive: 0, None: 1845
Eval Time: 28.53163194656372
--epoch: 1600/20000 | ECR: 48.07617 | IS: 14.55593 | WIS: 73.64922 | PDIS: 13.57829 | PDWIS: 2.94144 | DR: 829.76363 --
Pred-> Constructive: 0, Active: 2804, Passive: 0, None: 2194
Eval Time: 28.573652982711792
--epoch: 1700/20000 | ECR: 51.65140 | IS: 0.00000 | WIS: 0.00000 | PDIS: 0.00000 | PDWIS: 0.00000 | DR: 634.83862 --
Pred-> Constructive: 0, Active: 0, Passive: 2603, None: 2395
Eval Time: 28.81318688392639
--epoch: 1800/20000 | ECR: 49.67318 | IS: 14.55593 | WIS: 73.64922 | PDIS: 13.57829 | PDWIS: 2.82608 | DR: 873.60068 --
Pred-> Constructive: 0, Active: 2947, Passive: 0, None: 2051
Eval Time: 29.07265877723694
--epoch: 1900/20000 | ECR: 45.70305 | IS: 20.13019 | WIS: 77.45899 | PDIS: 18.45886 | PDWIS: 6.63827 | DR: 570.31559 --
Pred-> Const

Eval Time: 25.40792179107666
--epoch: 5400/20000 | ECR: 48.16331 | IS: 22.46166 | WIS: 77.27208 | PDIS: 20.69087 | PDWIS: 6.09023 | DR: 693.06061 --
Pred-> Constructive: 2670, Active: 33, Passive: 56, None: 2239
Eval Time: 25.11331796646118
--epoch: 5500/20000 | ECR: 51.76947 | IS: 0.81278 | WIS: 79.16500 | PDIS: 0.81278 | PDWIS: 0.26344 | DR: 670.54962 --
Pred-> Constructive: 574, Active: 0, Passive: 2520, None: 1904
Eval Time: 25.12427592277527
--epoch: 5600/20000 | ECR: 54.79202 | IS: 19.32589 | WIS: 75.67201 | PDIS: 17.97766 | PDWIS: 6.30687 | DR: 603.36898 --
Pred-> Constructive: 1058, Active: 1698, Passive: 396, None: 1846
Eval Time: 25.044169902801514
--epoch: 5700/20000 | ECR: 52.14009 | IS: 10.72796 | WIS: 71.44641 | PDIS: 10.01844 | PDWIS: 2.50284 | DR: 781.81602 --
Pred-> Constructive: 53, Active: 3209, Passive: 185, None: 1551
Eval Time: 25.071693181991577
--epoch: 5800/20000 | ECR: 53.18755 | IS: 22.14382 | WIS: 78.96780 | PDIS: 20.35470 | PDWIS: 5.59392 | DR: 751.36981 --

Eval Time: 25.154462814331055
--epoch: 9300/20000 | ECR: 58.99678 | IS: 25.34274 | WIS: 85.30048 | PDIS: 23.42465 | PDWIS: 8.23695 | DR: 686.11663 --
Pred-> Constructive: 2260, Active: 1508, Passive: 723, None: 507
Eval Time: 25.176556825637817
--epoch: 9400/20000 | ECR: 57.48310 | IS: 21.42793 | WIS: 86.73579 | PDIS: 19.78319 | PDWIS: 5.50498 | DR: 754.64302 --
Pred-> Constructive: 841, Active: 2425, Passive: 1077, None: 655
Eval Time: 25.164530992507935
--epoch: 9500/20000 | ECR: 57.84805 | IS: 39.81486 | WIS: 70.66911 | PDIS: 36.06940 | PDWIS: 14.17369 | DR: 611.62635 --
Pred-> Constructive: 1449, Active: 948, Passive: 496, None: 2105
Eval Time: 25.13826274871826
--epoch: 9600/20000 | ECR: 54.55774 | IS: 12.87334 | WIS: 77.75899 | PDIS: 12.24321 | PDWIS: 6.46129 | DR: 485.73848 --
Pred-> Constructive: 2470, Active: 37, Passive: 2233, None: 258
Eval Time: 25.30037498474121
--epoch: 9700/20000 | ECR: 56.39341 | IS: 48.47387 | WIS: 74.57224 | PDIS: 43.65654 | PDWIS: 10.65600 | DR: 729.

Eval Time: 25.070723056793213
--epoch: 13100/20000 | ECR: 43.17287 | IS: 44.37115 | WIS: 71.95421 | PDIS: 39.83456 | PDWIS: 13.84212 | DR: 436.50340 --
Pred-> Constructive: 2019, Active: 1293, Passive: 986, None: 700
Eval Time: 25.17318296432495
--epoch: 13200/20000 | ECR: 45.15270 | IS: 37.57237 | WIS: 65.78964 | PDIS: 33.94755 | PDWIS: 9.18127 | DR: 567.27112 --
Pred-> Constructive: 1954, Active: 1259, Passive: 857, None: 928
Eval Time: 25.199906826019287
--epoch: 13300/20000 | ECR: 49.85241 | IS: 9.87934 | WIS: 64.14983 | PDIS: 9.54360 | PDWIS: 4.54107 | DR: 431.92586 --
Pred-> Constructive: 829, Active: 857, Passive: 2736, None: 576
Eval Time: 25.25575089454651
--epoch: 13400/20000 | ECR: 50.60845 | IS: 39.34247 | WIS: 68.65768 | PDIS: 35.03454 | PDWIS: 9.17441 | DR: 631.51120 --
Pred-> Constructive: 3001, Active: 156, Passive: 644, None: 1197
Eval Time: 25.318554162979126
--epoch: 13500/20000 | ECR: 50.69798 | IS: 24.15375 | WIS: 82.36586 | PDIS: 22.42123 | PDWIS: 9.19418 | DR: 48

Eval Time: 25.135705947875977
--epoch: 16900/20000 | ECR: 60.13444 | IS: 18.92673 | WIS: 54.52018 | PDIS: 17.93192 | PDWIS: 6.91964 | DR: 555.02401 --
Pred-> Constructive: 1513, Active: 634, Passive: 2354, None: 497
Eval Time: 25.224241018295288
--epoch: 17000/20000 | ECR: 61.60102 | IS: 19.28577 | WIS: 61.84146 | PDIS: 17.82447 | PDWIS: 4.45621 | DR: 746.36044 --
Pred-> Constructive: 2693, Active: 838, Passive: 606, None: 861
Eval Time: 25.14502191543579
--epoch: 17100/20000 | ECR: 61.21146 | IS: 7.35236 | WIS: 34.82650 | PDIS: 7.09592 | PDWIS: 2.54500 | DR: 528.36903 --
Pred-> Constructive: 1576, Active: 986, Passive: 2367, None: 69
Eval Time: 25.075128078460693
--epoch: 17200/20000 | ECR: 58.95983 | IS: 11.90642 | WIS: 47.45514 | PDIS: 11.47351 | PDWIS: 3.62755 | DR: 584.80531 --
Pred-> Constructive: 2005, Active: 2152, Passive: 328, None: 513
Eval Time: 25.49035406112671
--epoch: 17300/20000 | ECR: 54.48439 | IS: 36.60504 | WIS: 60.49343 | PDIS: 32.99819 | PDWIS: 6.88462 | DR: 787.

Eval Time: 26.095462322235107
--epoch: 600/20000 | ECR: 32.68570 | IS: 35.41532 | WIS: 65.08401 | PDIS: 31.57768 | PDWIS: 8.63300 | DR: 610.69517 --
Pred-> Constructive: 1245, Active: 2819, Passive: 934, None: 0
Eval Time: 25.705193996429443
--epoch: 700/20000 | ECR: 36.36755 | IS: 15.06928 | WIS: 74.31633 | PDIS: 14.09163 | PDWIS: 2.55087 | DR: 680.22365 --
Pred-> Constructive: 1767, Active: 3231, Passive: 0, None: 0
Eval Time: 25.742722034454346
--epoch: 800/20000 | ECR: 35.06154 | IS: 141.23433 | WIS: 84.60081 | PDIS: 107.89624 | PDWIS: 11.05197 | DR: 730.10104 --
Pred-> Constructive: 2198, Active: 2800, Passive: 0, None: 0
Eval Time: 25.788870096206665
--epoch: 900/20000 | ECR: 37.58787 | IS: 21.72675 | WIS: 73.44677 | PDIS: 19.82854 | PDWIS: 6.48201 | DR: 413.43007 --
Pred-> Constructive: 3047, Active: 1951, Passive: 0, None: 0
Eval Time: 25.803316116333008
--epoch: 1000/20000 | ECR: 38.71394 | IS: 16.35264 | WIS: 64.35343 | PDIS: 15.31083 | PDWIS: 3.31064 | DR: 656.12107 --
Pred-

Eval Time: 25.781837224960327
--epoch: 4500/20000 | ECR: 49.86387 | IS: 25.89296 | WIS: 67.59060 | PDIS: 23.80650 | PDWIS: 7.36216 | DR: 689.11646 --
Pred-> Constructive: 2520, Active: 0, Passive: 2478, None: 0
Eval Time: 25.827509880065918
--epoch: 4600/20000 | ECR: 49.30114 | IS: 0.29343 | WIS: 7.14500 | PDIS: 0.29343 | PDWIS: 0.12219 | DR: 558.28377 --
Pred-> Constructive: 0, Active: 0, Passive: 4998, None: 0
Eval Time: 25.829063892364502
--epoch: 4700/20000 | ECR: 49.26715 | IS: 15.06928 | WIS: 67.48264 | PDIS: 14.09163 | PDWIS: 3.18599 | DR: 782.88962 --
Pred-> Constructive: 0, Active: 3323, Passive: 1675, None: 0
Eval Time: 26.32573103904724
--epoch: 4800/20000 | ECR: 47.58361 | IS: 28.43836 | WIS: 67.09816 | PDIS: 24.98083 | PDWIS: 5.94907 | DR: 819.21085 --
Pred-> Constructive: 269, Active: 2791, Passive: 1938, None: 0
Eval Time: 26.164997816085815
--epoch: 4900/20000 | ECR: 46.29178 | IS: 2.31627 | WIS: 34.70846 | PDIS: 2.31627 | PDWIS: 0.91965 | DR: 577.36809 --
Pred-> Constr

Eval Time: 26.695120096206665
--epoch: 8400/20000 | ECR: 50.56981 | IS: 1.05570 | WIS: 20.56500 | PDIS: 1.05570 | PDWIS: 0.43431 | DR: 600.83556 --
Pred-> Constructive: 94, Active: 501, Passive: 4371, None: 32
Eval Time: 26.664772748947144
--epoch: 8500/20000 | ECR: 51.39373 | IS: 16.47642 | WIS: 71.52329 | PDIS: 15.00814 | PDWIS: 4.58174 | DR: 634.66592 --
Pred-> Constructive: 1606, Active: 1149, Passive: 2220, None: 23
Eval Time: 26.661784887313843
--epoch: 8600/20000 | ECR: 51.56498 | IS: 4.23418 | WIS: 45.19548 | PDIS: 4.01073 | PDWIS: 1.28753 | DR: 547.21907 --
Pred-> Constructive: 1187, Active: 606, Passive: 3193, None: 12
Eval Time: 26.477787017822266
--epoch: 8700/20000 | ECR: 50.65223 | IS: 22.06056 | WIS: 70.59380 | PDIS: 20.15313 | PDWIS: 5.72732 | DR: 726.56506 --
Pred-> Constructive: 2183, Active: 1366, Passive: 1407, None: 42
Eval Time: 26.73074698448181
--epoch: 8800/20000 | ECR: 50.77847 | IS: 514.93092 | WIS: 62.58512 | PDIS: 348.01994 | PDWIS: 9.47014 | DR: 1276.11093

Eval Time: 24.971224069595337
--epoch: 12300/20000 | ECR: 60.00105 | IS: 8.57304 | WIS: 57.09496 | PDIS: 8.23959 | PDWIS: 2.08463 | DR: 900.68466 --
Pred-> Constructive: 2328, Active: 956, Passive: 1296, None: 418
Eval Time: 25.16976261138916
--epoch: 12400/20000 | ECR: 63.91206 | IS: 499.18220 | WIS: 62.41865 | PDIS: 333.91047 | PDWIS: 7.36798 | DR: 1239.11954 --
Pred-> Constructive: 2965, Active: 798, Passive: 925, None: 310
Eval Time: 25.01572895050049
--epoch: 12500/20000 | ECR: 60.71610 | IS: 5.25887 | WIS: 47.10011 | PDIS: 4.98729 | PDWIS: 2.83152 | DR: 528.23472 --
Pred-> Constructive: 688, Active: 841, Passive: 2930, None: 539
Eval Time: 27.178532123565674
--epoch: 12600/20000 | ECR: 56.29603 | IS: 6.79929 | WIS: 52.45554 | PDIS: 6.46470 | PDWIS: 2.47384 | DR: 522.90642 --
Pred-> Constructive: 1584, Active: 916, Passive: 2106, None: 392
Eval Time: 27.239524126052856
--epoch: 12700/20000 | ECR: 59.12006 | IS: 52.47675 | WIS: 81.13071 | PDIS: 46.79729 | PDWIS: 10.92997 | DR: 1012

Eval Time: 31.35903525352478
--epoch: 16100/20000 | ECR: 58.29347 | IS: 17.88467 | WIS: 64.36829 | PDIS: 16.36445 | PDWIS: 7.35370 | DR: 668.08076 --
Pred-> Constructive: 2264, Active: 603, Passive: 1968, None: 163
Eval Time: 26.019238710403442
--epoch: 16200/20000 | ECR: 55.31383 | IS: 63.11536 | WIS: 79.05081 | PDIS: 52.03147 | PDWIS: 11.74327 | DR: 749.66597 --
Pred-> Constructive: 2386, Active: 887, Passive: 1530, None: 195
Eval Time: 25.817668914794922
--epoch: 16300/20000 | ECR: 58.08501 | IS: 18.57217 | WIS: 64.46073 | PDIS: 17.04966 | PDWIS: 5.49099 | DR: 709.65621 --
Pred-> Constructive: 3188, Active: 742, Passive: 938, None: 130
Eval Time: 26.06474804878235
--epoch: 16400/20000 | ECR: 58.02396 | IS: 17.08951 | WIS: 68.99560 | PDIS: 16.31721 | PDWIS: 4.78955 | DR: 599.89500 --
Pred-> Constructive: 1096, Active: 1795, Passive: 2047, None: 60
Eval Time: 26.00522208213806
--epoch: 16500/20000 | ECR: 56.65546 | IS: 39.74740 | WIS: 79.05853 | PDIS: 34.76216 | PDWIS: 14.82089 | DR: 

Eval Time: 25.753206968307495
--epoch: 19900/20000 | ECR: 54.95991 | IS: 27.77121 | WIS: 74.36194 | PDIS: 24.99870 | PDWIS: 7.39545 | DR: 737.39744 --
Pred-> Constructive: 3271, Active: 626, Passive: 1072, None: 29
Eval Time: 25.691134929656982
--epoch: 20000/20000 | ECR: 58.66978 | IS: 47.88999 | WIS: 80.98942 | PDIS: 42.15344 | PDWIS: 10.91479 | DR: 772.19952 --
Pred-> Constructive: 2466, Active: 1533, Passive: 994, None: 5
True-> Constructive: 2009, Active: 2074, Passive: 445, None: 470
Pred-> Constructive: 2466, Active: 1533, Passive: 994, None: 5
-> True Reward: 54.63407/30.52030, Pred Reward: 58.66978/18.42337


In [5]:
# target = 'ECR'
# step = 1

# y = []
# for i in range(int(len(eval_df)/step)):
#     y.append(eval_df.loc[i*step:i*step+step, target].mean())

# plt.plot(range(len(y)), y)

# OTHER EXPERIMENTAL DETAILS

In [1]:
import pandas as pd
df = pd.read_pickle('../temp/df_all_norm_cluster.pkl')
df.groupby(['action']).count()

df['delayed_reward'] = pd.to_numeric(df['delayed_reward'])
df.groupby(['episode_id']).sum()[['delayed_reward']].describe()

Unnamed: 0,delayed_reward
count,487.0
mean,54.634066
std,30.551682
min,-100.0
25%,36.36
50%,60.0
75%,77.78
max,100.0


In [16]:
df.loc[df['done']==True].describe()

Unnamed: 0,cluster,ns_cluster,action_proba,ns_act_0_proba,ns_act_1_proba,ns_act_2_proba,ns_act_3_proba
count,487.0,487.0,487.0,487.0,487.0,487.0,487.0
mean,2.088296,1.0,0.362423,0.09,0.09,0.42,0.39
std,1.512093,0.0,0.114959,8.335235e-16,8.335235e-16,3.723072e-15,4.112049e-15
min,0.0,1.0,0.08,0.09,0.09,0.42,0.39
25%,1.0,1.0,0.39,0.09,0.09,0.42,0.39
50%,3.0,1.0,0.4,0.09,0.09,0.42,0.39
75%,3.0,1.0,0.42,0.09,0.09,0.42,0.39
max,4.0,1.0,0.44,0.09,0.09,0.42,0.39


In [17]:
df['delayed_reward'].unique()

array([0, 30.0, 88.89, 85.71, 71.43, 62.5, 66.67, 45.45, 40.0, 42.86,
       100.0, 44.44, -12.5, 83.33, 25.0, 72.73, 37.5, 57.14, 20.0, 28.57,
       75.0, 33.33, 27.27, 22.22, 50.0, 80.0, 77.78, 87.5, 55.56, 60.0,
       10.0, 12.5, 70.0, -66.67, 9.09, 90.0, 18.18, 81.82, 63.64, -22.22,
       11.11, 54.55, 14.29, -9.09, -100.0, -50.0, -33.33, 41.67, 16.67,
       36.36, -25.0], dtype=object)