In [1]:
#Proximal Policy Optimzization algorithm
#Split version
import numpy as np
import tensorflow.compat.v1 as tf
import tensorflow_probability as tfp
import random
from collections import deque
from ou_noise import OUNoise
import actor_PPO_net
import critic_PPO_net
import Environment as En
import os
from timeit import default_timer as timer   


reward_path = 'Monitoring/reward/'
if not os.path.exists(reward_path): os.makedirs(reward_path)

Noise_path = 'Monitoring/Noise/'
if not os.path.exists(Noise_path): os.makedirs(Noise_path)
    
Loss_path = 'Monitoring/Loss/'
if not os.path.exists(Loss_path): os.makedirs(Loss_path)


environment = En.env() 
_ = environment.reset()

alpha_critic = 0.9 

#Input & Output
state_nums = environment.state_num() 
action_nums = environment.action_setting() 

input_size_state= state_nums
input_size_action = state_nums
output_size_critic = 1 
output_size_actor =  action_nums


#Reinforcement learning parmeter
dis = 0.99 
buffer_memory = 50000 #Replay memory에 몇개를 넣을 것인가? (Buffer)
alpha_critic = 1.0
exploration_noise = OUNoise(input_size_action)
epsilon = 0.2 # for PPO CLIPPING


def train(main_actor, main_critic, train_batch, batch_size):

    #make empty stack
    state_stack = np.empty(0);  action_stack = np.empty(0)
    Q_target_stack = np.empty(0); Q_old_stack = np.empty(0)
    state_stack = np.reshape(state_stack, (0, input_size_state))
    action_stack = np.reshape(action_stack, (0, output_size_actor))
    Q_target_stack = np.reshape(Q_target_stack, (0, output_size_critic))
    Q_old_stack = np.reshape(Q_old_stack, (0, output_size_critic))    
    
    
    
    #----------------------------- Actor train -----------------------------#
    # Actor train part
    
    Loss_actor = 0.0
    
    for state, action, reward, next_state, done, old_log_prob in train_batch:
        
        #print(np.mean(action))
      
        #Old Q
        Q_old = main_critic.predict(state, action)         
        
        #make V_old
        V_old = Q_old - reward
      
        actor_loss, _  = main_actor.update(main_critic, state, next_state, action, old_log_prob, V_old, reward) #, main_actor   
        
        Loss_actor += actor_loss/len(train_batch)
    

    #----------------------------- Critic train -----------------------------#
    #Apprioxiomate TD error (Advantage)

    
    # Critic train part
    for state, action, reward, next_state, done, old_log_prob in train_batch:

        #Old Q
        Q_old = main_critic.predict(state, action)           
        
        #PPO - on-policy (main_actor)
        next_action, _ = main_actor.predict(next_state) 
        #next_action하고 log_prob반환하니까
        
        #target_Q
        Q_target = Q_old + alpha_critic*(reward + dis*(main_critic.predict(next_state, next_action)) - Q_old)     
 
        
        #Stacking process
        state_stack = np.vstack([state_stack, state])
        Q_target_stack = np.vstack([Q_target_stack, Q_target])
        Q_old_stack = np.vstack([Q_old_stack, Q_old])
        action_stack = np.vstack([action_stack, action])


    #Update critic model 
    Critic_loss, _  = main_critic.update(state_stack, action_stack, Q_target_stack)  

        
    return Critic_loss, Loss_actor




def main():

    st_step = 15 #action을 몇 time-step마다 취할 것인지에 대한 숫자
    state_step = 0
    record_frequency = 1
    step_deadline = 100
    update_freq = 1
    train_loop_epoch = 2 #PPO 
    max_episodes = 1000
    batch_size = 128
    buff_len = batch_size
    Loss_step = 0  

    
    # Replay buffer를 deque로 짠다. 
    buffer = deque() 

    reward_buffer = deque() #maxlen=100
    #reward_buffer또한 deque로 만들어서 마지막 100개까지 기억하도록 한다
    

    with tf.Session() as sess:
             
        #formation of network for actor net
        main_actor = actor_PPO_net.actor(sess, input_size_state, output_size_actor, output_size_critic, name="main_actor") 
       
        #formation of network for critic net (first error NameError - input_size ciritic 등)
        main_critic = critic_PPO_net.critic(sess, input_size_state, input_size_action, output_size_critic, main_actor, name="main_critic") 
        #main_critic에 main_actor를 넣어줘서 연결시킴 Q(s,a) - Objective
        
        _ = main_critic.initialization_c(name ="main_critic")
        _ = main_actor.initialization_a(main_critic.Objective, name ="main_actor")
        
        sess.run(tf.global_variables_initializer()) 
        print("initialization complete")    


        for episode in range(0, max_episodes+1):
            
            print("Episode : {} start ".format(episode))
        
            done = False
            
            ##################### environment로부터 state를 받아온다 (observation) ###############
            state = environment.reset() #envrionment로부터 state를 가져온다. (초기 state)
            exploration_noise.reset()       
            
            reward_graph = 0

            #Noise 그래프 그리기        
            
            reward_record = open(reward_path + "reward.plt" , 'a', encoding='utf-8', newline='') 
            noise_record = open(Noise_path + "noise, episode{}.plt" .format(episode), 'a', encoding='utf-8', newline='')
            state_reward_record = open(reward_path + "state_reward, episode{}.plt" .format(episode), 'a', encoding='utf-8', newline='')
            Loss_record = open(Loss_path + "Loss.plt".format(episode), 'a', encoding='utf-8', newline='')
            
            if episode ==0: 
                noise_record.write('VARIABLES = "state_step", "noise" \n') 
                state_reward_record.write('VARIABLES = "state_step", "avg_reward" \n')  
                reward_record.write('VARIABLES = "Episode", "Reward" \n') 
                Loss_record.write('VARIABLES = "state_step", "Loss" \n')
            
            
            while not done == True:

                Noise = 0.5*exploration_noise.noise() # 매 step마다 Normal distribution에서 임의로 추출한다.
                      
                #------------------------Stochastic policy part ------------------------#

                action, log_prob = main_actor.predict(state)
    
                #action_noise = action + (Noise) #with N(Noise) #이
                
                #noise를 입력한다
                noise_record.write("%d %f \n" %(state_step ,np.mean(Noise)))
                
                #Noise 조절이 좀 필요하다 step loop 안으로 들어와야 계속 변할수 있다. 
                #action_noise = np.reshape(action_noise, (input_size_critic_a))
                                
                # Get new state and reward from environment  
                next_state, reward, done, record = environment.simulation(state, action, st_step, step_deadline, episode)
                
                #한 Episode에서 순간 reward를 기록하기 위함
                state_reward_record.write("%d %f \n" %(state_step ,reward))
                

                #한 step의 reward씩 계속 reward_graph에 쌓는다. summation of reward
                reward_graph = reward + reward_graph

                #----------------------------- Replay buffer -------------------------#
                buffer.append((state, action, reward, next_state, done, log_prob))
                
                if len(buffer) > buffer_memory:
                    buffer.popleft()         
                    
                #--------------------------- Learning part ----------------------------#
                if len(buffer) > buff_len and state_step % update_freq == 0:

                    loss_avg = 0
                    
                    for _ in range(train_loop_epoch): #PPO에서는 train_loop_epoch을 활성화 시킨다.
                        
                        minibatch = random.sample(buffer, batch_size) 
                        minibatch = list(minibatch)
                        
                        #critic update start
                        loss_critic, loss_actor = train(main_actor, main_critic, minibatch, batch_size)
                        
                        loss_avg = loss_critic/train_loop_epoch +loss_avg

                    print("Loss for critic is : {}".format(loss_avg)) 
                    Loss_record.write("%d %f \n" %(Loss_step ,loss_avg))
                               
                #---------------------------------------------------------------------#       
               
                state = next_state
                
                state_step += 1
                Loss_step += 1
                #print("step num : {}".format(step))
                
                # break part
                if done==True or state_step == step_deadline:
                    break
               
            
            reward_graph = reward_graph/state_step
            
            state_step = 0 #state_step zero            
            
            #plt file로 reward graph 저장
            reward_record.write("%d %f \n" %(episode , reward_graph))
            
            noise_record.close()
            state_reward_record.close()
            Loss_record.close()
            reward_record.close()


if __name__ == "__main__":
    
    main()
    
    print("All process is finished!")




main_actor
Actor_net connected
main_critic
Critic_net connected - for action_feed
main_critic
Critic_net connected - for actor_feed
initialization complete
Episode : 0 start 
avg_reward
-0.037918306309335126
avg_reward
-0.08316348346822357
avg_reward
-0.14354868720882416
avg_reward
-0.23282762206568836
avg_reward
-0.2672465615670137
avg_reward
-0.3166624362662385
avg_reward
-0.34754056007871664
avg_reward
-0.39295225898542835
avg_reward
-0.4175366727923777
avg_reward
-0.4206974827878375
avg_reward
-0.37126249567371317
avg_reward
-0.2954937959458634
avg_reward
-0.3823870562199603
avg_reward
-0.3098845950090836
avg_reward
-0.3613051770053212
avg_reward
-0.18756872606172362
avg_reward
-0.275045638899122
avg_reward
-0.2939949797745364
avg_reward
-0.3630565136612709
avg_reward
-0.21645360896398433
avg_reward
-0.23508600929579954
avg_reward
-0.19678724939331105
avg_reward
-0.3366939072843059
avg_reward
-0.27720938689920144
avg_reward
-0.2432588218932416
avg_reward
-0.22769102820545675
avg_re

Loss for critic is : 0.03796885348856449
avg_reward
-0.07719353992810737
Loss for critic is : 0.0374018419533968
avg_reward
-0.0904433018729646
Loss for critic is : 0.03664250671863556
avg_reward
-0.07834511458101644
Loss for critic is : 0.03613210283219814
avg_reward
-0.08585352286994054
Loss for critic is : 0.03492768108844757
avg_reward
-0.0650633258435355
Loss for critic is : 0.034111570566892624
avg_reward
-0.10226551774484605
Loss for critic is : 0.0335321519523859
avg_reward
-0.10789550624426872
Loss for critic is : 0.03291033022105694
avg_reward
-0.113437292600756
Loss for critic is : 0.032458554953336716
avg_reward
-0.12542530195636747
Loss for critic is : 0.03183715231716633
avg_reward
-0.13566751832187846
Loss for critic is : 0.03114527277648449
avg_reward
-0.18348545759454002
Loss for critic is : 0.03080465830862522
avg_reward
-0.18996584578557218
Loss for critic is : 0.030354967340826988
avg_reward
-0.236703441917508
Loss for critic is : 0.030257634818553925
avg_reward
-0.

Loss for critic is : 0.04427293688058853
avg_reward
-1.0312402815390944
Loss for critic is : 0.039208345115184784
avg_reward
-0.8015922231060967
Loss for critic is : 0.04336860030889511
avg_reward
-0.8100689636367541
Loss for critic is : 0.03806553594768047
avg_reward
-0.8837284381981589
Loss for critic is : 0.043846312910318375
Episode : 3 start 
avg_reward
-0.044294640293493025
Loss for critic is : 0.03444484435021877
avg_reward
-0.07719256476518832
Loss for critic is : 0.032490719109773636
avg_reward
-0.10473224573402784
Loss for critic is : 0.04221835546195507
avg_reward
-0.12441547249987531
Loss for critic is : 0.05287611298263073
avg_reward
-0.14151141819641364
Loss for critic is : 0.037539007142186165
avg_reward
-0.1774321999330304
Loss for critic is : 0.024665926583111286
avg_reward
-0.25531796992828176
Loss for critic is : 0.023496929556131363
avg_reward
-0.24654209705898616
Loss for critic is : 0.02123444527387619
avg_reward
-0.3764352368979694
Loss for critic is : 0.02200669

Loss for critic is : 0.08973471820354462
avg_reward
-0.17397984706153685
Loss for critic is : 0.08447854965925217
avg_reward
-0.1811243005432529
Loss for critic is : 0.08686456084251404
avg_reward
-0.18516679814281622
Loss for critic is : 0.08391916751861572
avg_reward
-0.1840243278357267
Loss for critic is : 0.08255432173609734
avg_reward
-0.18957809117209223
Loss for critic is : 0.07789607346057892
avg_reward
-0.1736083544612157
Loss for critic is : 0.08031192421913147
avg_reward
-0.16692880629725812
Loss for critic is : 0.08068451657891273
avg_reward
-0.16200964496882594
Loss for critic is : 0.08026828244328499
avg_reward
-0.1445570368233002
Loss for critic is : 0.07555504888296127
avg_reward
-0.1260094171159455
Loss for critic is : 0.08516799658536911
avg_reward
-0.11021206647958462
Loss for critic is : 0.07858933880925179
avg_reward
-0.10053270423620907
Loss for critic is : 0.07310553640127182
avg_reward
-0.08407384872439017
Loss for critic is : 0.0770106315612793
avg_reward
-0.06

Loss for critic is : 0.07544330507516861
avg_reward
-0.18444382780429805
Loss for critic is : 0.0695846676826477
avg_reward
-0.19680646271000088
Loss for critic is : 0.06363070569932461
avg_reward
-0.20875929010830296
Loss for critic is : 0.06011044234037399
avg_reward
-0.2129417308229751
Loss for critic is : 0.05909576639533043
avg_reward
-0.22228497293069416
Loss for critic is : 0.05798843689262867
avg_reward
-0.25847506648128055
Loss for critic is : 0.0550918523222208
avg_reward
-0.24985601141970593
Loss for critic is : 0.054567232728004456
avg_reward
-0.26675867965733907
Loss for critic is : 0.05795029364526272
avg_reward
-0.2661273937227565
Loss for critic is : 0.05285531468689442
avg_reward
-0.2837471472021107
Loss for critic is : 0.05578869953751564
avg_reward
-0.2671592104980184
Loss for critic is : 0.054700881242752075
avg_reward
-0.32123952330733707
Loss for critic is : 0.050741540268063545
avg_reward
-0.29278405583041944
Loss for critic is : 0.05021017789840698
avg_reward
-0

Loss for critic is : 0.030066242441534996
avg_reward
-0.07078158240006509
Loss for critic is : 0.030350147746503353
avg_reward
-0.07509161515517242
Loss for critic is : 0.029641056433320045
avg_reward
-0.15669067322916852
Loss for critic is : 0.02812537457793951
avg_reward
-0.1520673542923247
Loss for critic is : 0.028975614346563816
avg_reward
-0.16191698240743024
Loss for critic is : 0.03023051656782627
avg_reward
-0.2122196118499284
Loss for critic is : 0.028981071896851063
avg_reward
-0.1728596380148668
Loss for critic is : 0.030302593484520912
avg_reward
-0.17979756056864882
Loss for critic is : 0.028904495760798454
avg_reward
-0.23251369967185012
Loss for critic is : 0.029436307027935982
avg_reward
-0.19534236136425798
Loss for critic is : 0.029212215915322304
avg_reward
-0.23314362065898142
Loss for critic is : 0.029562801122665405
avg_reward
-0.283180332084255
Loss for critic is : 0.029672368429601192
avg_reward
-0.2301222860684033
Loss for critic is : 0.029282878153026104
avg_

Loss for critic is : 0.02924980316311121
avg_reward
-0.05127559418054943
Loss for critic is : 0.030648648738861084
avg_reward
-0.06689959384507363
Loss for critic is : 0.02911990601569414
avg_reward
-0.03892151988175785
Loss for critic is : 0.029645845293998718
avg_reward
-0.06167582838704476
Loss for critic is : 0.030164198018610477
avg_reward
-0.10607472805993011
Loss for critic is : 0.029440658167004585
avg_reward
-0.13890432364207989
Loss for critic is : 0.02912869304418564
avg_reward
-0.13934930044487676
Loss for critic is : 0.029269020073115826
avg_reward
-0.13167076564062113
Loss for critic is : 0.029499476775527
avg_reward
-0.12563237996107887
Loss for critic is : 0.02890863921493292
avg_reward
-0.13343054079942074
Loss for critic is : 0.02858610637485981
avg_reward
-0.145447043630245
Loss for critic is : 0.028761280700564384
avg_reward
-0.14555902078328656
Loss for critic is : 0.02923056297004223
avg_reward
-0.13396736157486397
Loss for critic is : 0.028242837637662888
avg_rew

Loss for critic is : 0.05077633820474148
avg_reward
-0.5019988545420163
Loss for critic is : 0.04738075099885464
avg_reward
-0.5544014829840984
Loss for critic is : 0.041255438700318336
avg_reward
-0.5626325554873122
Loss for critic is : 0.04142356291413307
avg_reward
-0.5538383414629857
Loss for critic is : 0.03771207109093666
avg_reward
-0.5372018197765975
Loss for critic is : 0.03535025380551815
avg_reward
-0.5042168294634307
Loss for critic is : 0.03774537332355976
avg_reward
-0.48745999567941767
Loss for critic is : 0.03606617823243141
avg_reward
-0.4281152917314311
Loss for critic is : 0.036947526037693024
avg_reward
-0.4907637338057713
Loss for critic is : 0.03933228924870491
avg_reward
-0.4655679247249171
Loss for critic is : 0.03680957481265068
avg_reward
-0.44321143729275275
Loss for critic is : 0.03678486868739128
avg_reward
-0.3858117236863142
Loss for critic is : 0.03494746424257755
avg_reward
-0.42973222302846037
Loss for critic is : 0.037343863397836685
avg_reward
-0.329

Loss for critic is : 0.026583130471408367
avg_reward
-0.11612514424299787
Loss for critic is : 0.02626656647771597
avg_reward
-0.10794913278720117
Loss for critic is : 0.025027510710060596
avg_reward
-0.09699644852888575
Loss for critic is : 0.02657389361411333
avg_reward
-0.11937451948740606
Loss for critic is : 0.02618769370019436
avg_reward
-0.14942346056966818
Loss for critic is : 0.025917968712747097
avg_reward
-0.11085341544905211
Loss for critic is : 0.02578841894865036
avg_reward
-0.1273033260749201
Loss for critic is : 0.026658806018531322
avg_reward
-0.11332463450646174
Loss for critic is : 0.02588736731559038
avg_reward
-0.10671607568513754
Loss for critic is : 0.025170202367007732
avg_reward
-0.10771310452911427
Loss for critic is : 0.026283985003829002
avg_reward
-0.11274336809047275
Loss for critic is : 0.025591895915567875
avg_reward
-0.12549062022849675
Loss for critic is : 0.02747371792793274
avg_reward
-0.09862699363729623
Loss for critic is : 0.026723246090114117
avg

Loss for critic is : 0.05358721688389778
avg_reward
-1.3450958255300631
Loss for critic is : 0.06757972575724125
avg_reward
-1.3047001313634243
Loss for critic is : 0.06033547967672348
avg_reward
-1.2948453402273343
Loss for critic is : 0.05214768275618553
avg_reward
-1.2760306551321963
Loss for critic is : 0.0691513903439045
avg_reward
-1.254523097802561
Loss for critic is : 0.0750976949930191
avg_reward
-1.3036707799930343
Loss for critic is : 0.08790384978055954
avg_reward
-1.4517085665169565
Loss for critic is : 0.07604452967643738
avg_reward
-1.5291506378542774
Loss for critic is : 0.081912811845541
avg_reward
-1.3459342439918902
Loss for critic is : 0.0617358572781086
avg_reward
-1.3291260818938246
Loss for critic is : 0.053267380222678185
avg_reward
-1.2956387280151218
Loss for critic is : 0.04404028505086899
avg_reward
-1.3423885329652911
Loss for critic is : 0.05121087282896042
avg_reward
-1.3125780698084288
Loss for critic is : 0.06200469844043255
avg_reward
-1.25308623533197

Loss for critic is : 0.03332132659852505
avg_reward
-0.24606519369517524
Loss for critic is : 0.03277614898979664
avg_reward
-0.22917345272119868
Loss for critic is : 0.03362683579325676
avg_reward
-0.22143040648064088
Loss for critic is : 0.032940469682216644
avg_reward
-0.20315715713680954
Loss for critic is : 0.032532718032598495
Episode : 12 start 
avg_reward
-0.050189195642864795
Loss for critic is : 0.031760433688759804
avg_reward
-0.11994996250358304
Loss for critic is : 0.03177157696336508
avg_reward
-0.1691055663667758
Loss for critic is : 0.032123666256666183
avg_reward
-0.20903898087215295
Loss for critic is : 0.03295671008527279
avg_reward
-0.24673937151262365
Loss for critic is : 0.03330371715128422
avg_reward
-0.2837795500651426
Loss for critic is : 0.03354089707136154
avg_reward
-0.3299383872881268
Loss for critic is : 0.03410894609987736
avg_reward
-0.3657650582765464
Loss for critic is : 0.03467012941837311
avg_reward
-0.39417057656901106
Loss for critic is : 0.0347243

Loss for critic is : 0.03176400810480118
avg_reward
-0.1682096560782876
Loss for critic is : 0.032691458240151405
avg_reward
-0.1710406526929264
Loss for critic is : 0.03140130080282688
avg_reward
-0.17073235851316493
Loss for critic is : 0.03229903057217598
avg_reward
-0.16237749111122066
Loss for critic is : 0.032152602449059486
avg_reward
-0.1517193393339958
Loss for critic is : 0.03141041472554207
avg_reward
-0.13655571233711727
Loss for critic is : 0.031798163428902626
avg_reward
-0.13838109221725636
Loss for critic is : 0.029847215861082077
avg_reward
-0.14620458249106527
Loss for critic is : 0.031153183430433273
avg_reward
-0.12491293093791857
Loss for critic is : 0.03073188103735447
avg_reward
-0.10759748970771539
Loss for critic is : 0.03082476183772087
avg_reward
-0.09484476381842716
Loss for critic is : 0.03224548418074846
avg_reward
-0.0910158801032092
Loss for critic is : 0.03164629824459553
avg_reward
-0.08721093807253674
Loss for critic is : 0.032468393445014954
avg_rewa

Loss for critic is : 0.03400043025612831
avg_reward
-0.1961888784296449
Loss for critic is : 0.0334707647562027
avg_reward
-0.1965923316638938
Loss for critic is : 0.03281305730342865
avg_reward
-0.16086544797167454
Loss for critic is : 0.03392530977725983
avg_reward
-0.16680521971866336
Loss for critic is : 0.033785294741392136
avg_reward
-0.14108990420069129
Loss for critic is : 0.03294669836759567
avg_reward
-0.16052696400645147
Loss for critic is : 0.033191025257110596
avg_reward
-0.1291464665230366
Loss for critic is : 0.03259493410587311
avg_reward
-0.1311766878837976
Loss for critic is : 0.03167896717786789
avg_reward
-0.12982308290207406
Loss for critic is : 0.03244387358427048
avg_reward
-0.14635757702165303
Loss for critic is : 0.033210791647434235
avg_reward
-0.14411159001727297
Loss for critic is : 0.03198149986565113
avg_reward
-0.1611881487426105
Loss for critic is : 0.03250894136726856
avg_reward
-0.16372835735849536
Loss for critic is : 0.03180572111159563
avg_reward
-0

Loss for critic is : 0.030210276134312153
avg_reward
-0.13229169628972803
Loss for critic is : 0.03087820578366518
avg_reward
-0.14388382454282722
Loss for critic is : 0.03172530047595501
avg_reward
-0.14086161829344132
Loss for critic is : 0.03150416165590286
avg_reward
-0.1286601574457924
Loss for critic is : 0.03104286454617977
avg_reward
-0.15007293239832875
Loss for critic is : 0.03076986037194729
avg_reward
-0.14738856849778106
Loss for critic is : 0.031324027106165886
avg_reward
-0.13453825776839012
Loss for critic is : 0.03184148855507374
avg_reward
-0.06729588892574712
Loss for critic is : 0.03217857889831066
avg_reward
-0.07764283826003712
Loss for critic is : 0.031524998135864735
avg_reward
-0.06495988187041837
Loss for critic is : 0.031251116655766964
avg_reward
-0.06762303513230164
Loss for critic is : 0.03160541690886021
avg_reward
-0.06332246546221967
Loss for critic is : 0.034708721563220024
avg_reward
-0.023500034478319026
Loss for critic is : 0.037122692912817
avg_rew

Loss for critic is : 0.04948200471699238
avg_reward
-0.16214473379084074
Loss for critic is : 0.05050436779856682
avg_reward
-0.19301308266049838
Loss for critic is : 0.04724682494997978
avg_reward
-0.19474614755490655
Loss for critic is : 0.04598386026918888
avg_reward
-0.1905370862596967
Loss for critic is : 0.04517462104558945
avg_reward
-0.17892270470342767
Loss for critic is : 0.04306922294199467
avg_reward
-0.19967302119986782
Loss for critic is : 0.04442322626709938
avg_reward
-0.20749577352374154
Loss for critic is : 0.04464838467538357
avg_reward
-0.21173460285364332
Loss for critic is : 0.044638246297836304
avg_reward
-0.1882940996508849
Loss for critic is : 0.04177342541515827
avg_reward
-0.21736722046757598
Loss for critic is : 0.044529229402542114
avg_reward
-0.22207977385369065
Loss for critic is : 0.04534837044775486
avg_reward
-0.21344083455976937
Loss for critic is : 0.044335486367344856
avg_reward
-0.21309122135092995
Loss for critic is : 0.040510326623916626
avg_rewa

Loss for critic is : 0.04226294904947281
avg_reward
-0.1814311254213381
Loss for critic is : 0.0414934940636158
avg_reward
-0.18121075321491847
Loss for critic is : 0.04096522368490696
avg_reward
-0.1824151592399723
Loss for critic is : 0.04103467985987663
avg_reward
-0.14026911738223147
Loss for critic is : 0.04194621928036213
avg_reward
-0.11321467687452141
Loss for critic is : 0.040355145931243896
avg_reward
-0.12198942646107824
Loss for critic is : 0.040862008929252625
avg_reward
-0.07824169943452403
Loss for critic is : 0.041562629863619804
avg_reward
-0.06234146795578438
Loss for critic is : 0.04076242074370384
avg_reward
-0.052799834565719184
Loss for critic is : 0.04098831117153168
avg_reward
-0.022834368127946814
Loss for critic is : 0.03944410942494869
avg_reward
-0.027482613012631357
Loss for critic is : 0.03728814795613289
avg_reward
-0.043848665247240154
Loss for critic is : 0.03910822793841362
avg_reward
-0.06190234291693106
Loss for critic is : 0.039356693625450134
avg_r

Loss for critic is : 0.037551069632172585
avg_reward
-0.19919444021132465
Loss for critic is : 0.039912138134241104
avg_reward
-0.1578554671701689
Loss for critic is : 0.041957393288612366
avg_reward
-0.12326839653623761
Loss for critic is : 0.04424441605806351
avg_reward
-0.10124904686659911
Loss for critic is : 0.0446863379329443
avg_reward
-0.01391100613465332
Loss for critic is : 0.046546511352062225
avg_reward
-0.0359552100416386
Loss for critic is : 0.04867745004594326
avg_reward
-0.04911469536586943
Loss for critic is : 0.0480607096105814
avg_reward
-0.05776309665169288
Loss for critic is : 0.04631071723997593
avg_reward
-0.0790765062203139
Loss for critic is : 0.04286806844174862
avg_reward
-0.09644328643725236
Loss for critic is : 0.04180275276303291
avg_reward
-0.10655162973981896
Loss for critic is : 0.040668290108442307
avg_reward
-0.24132238083406937
Loss for critic is : 0.039945073425769806
avg_reward
-0.21576485526876604
Loss for critic is : 0.039737820625305176
avg_rewa

Loss for critic is : 0.030395308509469032
avg_reward
-0.13297873192976878
Loss for critic is : 0.029901640489697456
avg_reward
-0.1684407934588734
Loss for critic is : 0.02968132682144642
avg_reward
-0.1755562590372871
Loss for critic is : 0.029644752852618694
avg_reward
-0.19507724353969688
Loss for critic is : 0.028821049258112907
avg_reward
-0.2035672354320167
Loss for critic is : 0.02861014474183321
avg_reward
-0.22589960193998354
Loss for critic is : 0.028931611217558384
avg_reward
-0.2432663980612165
Loss for critic is : 0.029644439928233624
avg_reward
-0.25982836521550856
Loss for critic is : 0.028681012801826
avg_reward
-0.2532343064010211
Loss for critic is : 0.02895869966596365
avg_reward
-0.2750579491990715
Loss for critic is : 0.028709099628031254
avg_reward
-0.2581119898135944
Loss for critic is : 0.02904599905014038
avg_reward
-0.2600434557863383
Loss for critic is : 0.029038486070930958
avg_reward
-0.2572453077015993
Loss for critic is : 0.02950134500861168
avg_reward
-0

Loss for critic is : 0.03267689794301987
avg_reward
-0.08011570726624431
Loss for critic is : 0.031080517917871475
avg_reward
-0.0749103850633338
Loss for critic is : 0.03039392363280058
avg_reward
-0.07823377196880474
Loss for critic is : 0.02991234790533781
avg_reward
-0.0830981748795081
Loss for critic is : 0.029851130209863186
Episode : 21 start 
avg_reward
-0.028544641710520124
Loss for critic is : 0.028854708187282085
avg_reward
-0.06700221471841977
Loss for critic is : 0.027706822380423546
avg_reward
-0.08885759189252017
Loss for critic is : 0.028549824841320515
avg_reward
-0.107231421649817
Loss for critic is : 0.02854551002383232
avg_reward
-0.12379563528975684
Loss for critic is : 0.028421766124665737
avg_reward
-0.13617643488420808
Loss for critic is : 0.028188900090754032
avg_reward
-0.13695483141840692
Loss for critic is : 0.028386949561536312
avg_reward
-0.13116288583418081
Loss for critic is : 0.028271623887121677
avg_reward
-0.12273848578779713
Loss for critic is : 0.02

Loss for critic is : 0.025358757004141808
avg_reward
-0.12316033219778738
Loss for critic is : 0.02602399792522192
avg_reward
-0.12114960362376587
Loss for critic is : 0.02627163752913475
avg_reward
-0.12938252981348097
Loss for critic is : 0.026031779125332832
avg_reward
-0.1356214272808555
Loss for critic is : 0.025553260929882526
avg_reward
-0.13837371611525479
Loss for critic is : 0.026550923474133015
avg_reward
-0.13764374707811705
Loss for critic is : 0.026939711533486843
avg_reward
-0.1322442917207784
Loss for critic is : 0.025675400160253048
avg_reward
-0.12418876782208117
Loss for critic is : 0.0258925948292017
avg_reward
-0.11347147873940243
Loss for critic is : 0.025927322916686535
avg_reward
-0.10209173145080902
Loss for critic is : 0.025751782581210136
avg_reward
-0.08836566970496523
Loss for critic is : 0.025465790182352066
avg_reward
-0.08091969418150792
Loss for critic is : 0.025961332954466343
avg_reward
-0.086538489559917
Loss for critic is : 0.02522442303597927
avg_r

Loss for critic is : 0.022770275361835957
avg_reward
-0.03897724712944898
Loss for critic is : 0.022715849801898003
avg_reward
-0.03786404659347011
Loss for critic is : 0.022801357321441174
avg_reward
-0.03824082618828204
Loss for critic is : 0.022495336830615997
avg_reward
-0.036721162013573186
Loss for critic is : 0.022981243208050728
avg_reward
-0.036516382352820016
Loss for critic is : 0.023065481334924698
avg_reward
-0.0514900111144813
Loss for critic is : 0.023542363196611404
avg_reward
-0.06096444767130862
Loss for critic is : 0.023981901817023754
avg_reward
-0.05298256981052639
Loss for critic is : 0.023399746045470238
avg_reward
-0.05096412073260946
Loss for critic is : 0.0239561740309
avg_reward
-0.0788071495398418
Loss for critic is : 0.022507132962346077
avg_reward
-0.05107806248527126
Loss for critic is : 0.02320122253149748
avg_reward
-0.03569610347771928
Loss for critic is : 0.02365580014884472
avg_reward
-0.04482459874811785
Loss for critic is : 0.02314866241067648
avg_

Loss for critic is : 0.020500440150499344
avg_reward
-0.023648778830680894
Loss for critic is : 0.020338342525064945
avg_reward
-0.018644482323061137
Loss for critic is : 0.020754442550241947
avg_reward
-0.022935191999541034
Loss for critic is : 0.020304011180996895
avg_reward
-0.012856744077195316
Loss for critic is : 0.01977095752954483
avg_reward
-0.015838300923449816
Loss for critic is : 0.020014035515487194
avg_reward
-0.028382363326256446
Loss for critic is : 0.02042307425290346
avg_reward
-0.01680403598454265
Loss for critic is : 0.019164257682859898
avg_reward
-0.015869266816910393
Loss for critic is : 0.019578220322728157
avg_reward
-0.027059545341726665
Loss for critic is : 0.019706005230545998
avg_reward
-0.02831742842462979
Loss for critic is : 0.019882014021277428
avg_reward
-0.036225973203519814
Loss for critic is : 0.020100883208215237
avg_reward
-0.021342441717118604
Loss for critic is : 0.019339977763593197
avg_reward
-0.023318054936455945
Loss for critic is : 0.020280

Loss for critic is : 0.01843828335404396
avg_reward
-0.04659379923131772
Loss for critic is : 0.018354099243879318
avg_reward
-0.04197295282154154
Loss for critic is : 0.01860870234668255
avg_reward
-0.04686820589057436
Loss for critic is : 0.018169770017266273
avg_reward
-0.038682041401682896
Loss for critic is : 0.01840980537235737
avg_reward
-0.04711506953219477
Loss for critic is : 0.018505321815609932
avg_reward
-0.04276154860699134
Loss for critic is : 0.018126039765775204
avg_reward
-0.03217661330497728
Loss for critic is : 0.018260945565998554
avg_reward
-0.041921098602240824
Loss for critic is : 0.01838864106684923
avg_reward
-0.05083245913113346
Loss for critic is : 0.017983367666602135
avg_reward
-0.03508576225259462
Loss for critic is : 0.01828252151608467
avg_reward
-0.06120763256153265
Loss for critic is : 0.018126952461898327
avg_reward
-0.03790930214729959
Loss for critic is : 0.018574688583612442
avg_reward
-0.052513524380082846
Loss for critic is : 0.01803395431488752

Loss for critic is : 0.01616533938795328
avg_reward
-0.06665923001391928
Loss for critic is : 0.01651904545724392
avg_reward
-0.04895536983754644
Loss for critic is : 0.01630020420998335
avg_reward
-0.07050002674769662
Loss for critic is : 0.01696815900504589
avg_reward
-0.07542345667253816
Loss for critic is : 0.016697357408702374
avg_reward
-0.062940543488054
Loss for critic is : 0.016212272457778454
avg_reward
-0.03338193349646516
Loss for critic is : 0.016634915955364704
avg_reward
-0.021712741182007216
Loss for critic is : 0.01717633754014969
avg_reward
-0.020159177080357235
Loss for critic is : 0.016737009398639202
avg_reward
-0.013740625294710047
Loss for critic is : 0.017194815911352634
avg_reward
-0.03352075847993355
Loss for critic is : 0.017165818251669407
avg_reward
-0.024170128984831327
Loss for critic is : 0.016539971344172955
avg_reward
-0.04506003980615085
Loss for critic is : 0.01706306543201208
avg_reward
-0.03465982396524414
Loss for critic is : 0.016695905476808548


Loss for critic is : 0.017688537947833538
avg_reward
0.016326862812431515
Loss for critic is : 0.018260576762259007
avg_reward
0.009145056652076523
Loss for critic is : 0.01738786231726408
avg_reward
-0.008873544765003391
Loss for critic is : 0.017619003541767597
avg_reward
-0.020540443650104268
Loss for critic is : 0.017722129821777344
avg_reward
-0.035042085989892904
Loss for critic is : 0.01781519129872322
avg_reward
-0.03987366745636271
Loss for critic is : 0.018441669642925262
avg_reward
-0.036341942762991034
Loss for critic is : 0.017415568232536316
avg_reward
-0.05461124760400362
Loss for critic is : 0.018113136291503906
avg_reward
-0.039796910832905284
Loss for critic is : 0.016921489499509335
avg_reward
-0.040266454670898094
Loss for critic is : 0.017560253851115704
avg_reward
-0.03371351865360052
Loss for critic is : 0.017006595619022846
avg_reward
-0.04403977860418722
Loss for critic is : 0.016165283508598804
avg_reward
-0.035761985025344806
Loss for critic is : 0.0162167893

Loss for critic is : 0.014503578189760447
avg_reward
-0.007173727838937327
Loss for critic is : 0.014553620014339685
avg_reward
0.016811204560891267
Loss for critic is : 0.014933960046619177
avg_reward
0.01327752151395254
Loss for critic is : 0.01524318428710103
avg_reward
0.028954930627953818
Loss for critic is : 0.0148540660738945
avg_reward
0.017289299015960487
Loss for critic is : 0.014622915536165237
avg_reward
0.018593607715483785
Loss for critic is : 0.014363459777086973
avg_reward
-0.0019303558028969207
Loss for critic is : 0.014531244523823261
avg_reward
-0.0023278923482776166
Loss for critic is : 0.014330526813864708
avg_reward
-0.001110319909365387
Loss for critic is : 0.014837601687759161
avg_reward
-0.0063863080674693325
Loss for critic is : 0.01480700634419918
avg_reward
-0.012948029554155284
Loss for critic is : 0.014652397017925978
avg_reward
-0.01665156066108769
Loss for critic is : 0.01450214721262455
avg_reward
-0.01157068410088586
Loss for critic is : 0.015303907450

Loss for critic is : 0.0157599993981421
avg_reward
-0.08177673521811388
Loss for critic is : 0.014112818520516157
avg_reward
-0.06657704241327014
Loss for critic is : 0.01498335599899292
avg_reward
-0.06750373776071171
Loss for critic is : 0.014254406094551086
avg_reward
-0.07033854408373436
Loss for critic is : 0.015113410074263811
avg_reward
-0.07880622644476161
Loss for critic is : 0.014938563108444214
avg_reward
-0.07619312870934584
Loss for critic is : 0.014504286926239729
avg_reward
-0.07937389654695044
Loss for critic is : 0.014697079081088305
avg_reward
-0.07949911995252941
Loss for critic is : 0.014433389529585838
avg_reward
-0.08615972656946155
Loss for critic is : 0.014511732384562492
avg_reward
-0.089803018296152
Loss for critic is : 0.014797857962548733
avg_reward
-0.08647480541860436
Loss for critic is : 0.014718471560627222
avg_reward
-0.09678257828725204
Loss for critic is : 0.014351237565279007
avg_reward
-0.09314416878602481
Loss for critic is : 0.014674812089651823
a

Loss for critic is : 0.01355955796316266
avg_reward
-0.08162099671383433
Loss for critic is : 0.013862262479960918
avg_reward
-0.07989251918148704
Loss for critic is : 0.013910209760069847
avg_reward
-0.09062167775346727
Loss for critic is : 0.014203795697540045
avg_reward
-0.07828884722061756
Loss for critic is : 0.012822753749787807
avg_reward
-0.08021001099563362
Loss for critic is : 0.012900801841169596
Episode : 31 start 
avg_reward
-0.017156862169378652
Loss for critic is : 0.012893157545477152
avg_reward
-0.03763433059570065
Loss for critic is : 0.013069604989141226
avg_reward
-0.0469382698215135
Loss for critic is : 0.013221148401498795
avg_reward
-0.04954243212047095
Loss for critic is : 0.012618890963494778
avg_reward
-0.054272005261731496
Loss for critic is : 0.013081081677228212
avg_reward
-0.05719155033313811
Loss for critic is : 0.01323754945769906
avg_reward
-0.06591149917650108
Loss for critic is : 0.013845077715814114
avg_reward
-0.08381632369928713
Loss for critic is 

Loss for critic is : 0.012614259496331215
avg_reward
-0.027463898855174133
Loss for critic is : 0.012322288006544113
avg_reward
-0.03499247027463817
Loss for critic is : 0.013106546830385923
avg_reward
-0.03546948849574763
Loss for critic is : 0.01263388805091381
avg_reward
-0.04898039182009299
Loss for critic is : 0.012697167228907347
avg_reward
-0.033934129538246094
Loss for critic is : 0.012728549540042877
avg_reward
-0.04013017515867892
Loss for critic is : 0.013003330677747726
avg_reward
-0.04235484990716715
Loss for critic is : 0.012979317456483841
avg_reward
-0.04946251893695546
Loss for critic is : 0.012227419298142195
avg_reward
-0.044359204816641115
Loss for critic is : 0.012772415298968554
avg_reward
-0.05084739679321193
Loss for critic is : 0.012946689035743475
avg_reward
-0.05211229013374594
Loss for critic is : 0.012545180972665548
avg_reward
-0.0488445434095559
Loss for critic is : 0.012515032198280096
avg_reward
-0.04136952739284549
Loss for critic is : 0.01264179497957

Loss for critic is : 0.012679448351264
avg_reward
-0.05103592333203003
Loss for critic is : 0.012259543407708406
avg_reward
-0.0563314415065678
Loss for critic is : 0.012167088221758604
avg_reward
-0.05937515204011692
Loss for critic is : 0.012020064517855644
avg_reward
-0.058747447898764404
Loss for critic is : 0.012456470634788275
avg_reward
-0.0609943088401858
Loss for critic is : 0.01250633830204606
avg_reward
-0.06003226811443928
Loss for critic is : 0.012508992105722427
avg_reward
-0.06451418303654033
Loss for critic is : 0.01225038431584835
avg_reward
-0.058696242361883935
Loss for critic is : 0.01289302110671997
avg_reward
-0.0595413981243388
Loss for critic is : 0.01232516672462225
avg_reward
-0.06997006483274651
Loss for critic is : 0.012061509769409895
avg_reward
-0.06901508110786804
Loss for critic is : 0.012636288069188595
avg_reward
-0.06780807028588509
Loss for critic is : 0.012214182410389185
avg_reward
-0.06460241003475596
Loss for critic is : 0.012465988751500845
avg_

Loss for critic is : 0.016730235889554024
avg_reward
0.0037062555627334556
Loss for critic is : 0.015528053976595402
avg_reward
0.004466331273758346
Loss for critic is : 0.014199107885360718
avg_reward
0.00273138790758716
Loss for critic is : 0.01446098368614912
avg_reward
-0.006388204244984055
Loss for critic is : 0.014445371460169554
avg_reward
-0.005945210253778824
Loss for critic is : 0.015152549371123314
avg_reward
-0.01309500532664841
Loss for critic is : 0.01586171705275774
avg_reward
-0.004291787031749747
Loss for critic is : 0.015297341626137495
avg_reward
-0.013791771376334236
Loss for critic is : 0.016291039995849133
avg_reward
-0.03347571561788121
Loss for critic is : 0.016198044642806053
avg_reward
-0.02152889158753348
Loss for critic is : 0.014416806399822235
avg_reward
-0.026486497556838857
Loss for critic is : 0.01641384232789278
avg_reward
-0.024689571012534287
Loss for critic is : 0.015798179432749748
avg_reward
-0.038695456008110286
Loss for critic is : 0.01596798188

Loss for critic is : 0.014259127900004387
avg_reward
-0.05258090977367978
Loss for critic is : 0.014560075476765633
avg_reward
-0.0485576916989447
Loss for critic is : 0.014270059298723936
avg_reward
-0.07717457546359266
Loss for critic is : 0.013522633351385593
avg_reward
-0.06207291629842003
Loss for critic is : 0.01345535647124052
avg_reward
-0.08087302993359359
Loss for critic is : 0.01244781306013465
avg_reward
-0.06048214139316698
Loss for critic is : 0.012511104810982943
avg_reward
-0.06525805417571347
Loss for critic is : 0.01203132700175047
avg_reward
-0.073409255484897
Loss for critic is : 0.01237071817740798
avg_reward
-0.06487296207809073
Loss for critic is : 0.012498289812356234
avg_reward
-0.06509491312831454
Loss for critic is : 0.011444238480180502
avg_reward
-0.06097702828734705
Loss for critic is : 0.013366724364459515
avg_reward
-0.03753623550136132
Loss for critic is : 0.012383996974676847
avg_reward
-0.028050464260216602
Loss for critic is : 0.011594600044190884
av

Loss for critic is : 0.011408061720430851
avg_reward
-0.02162978117065202
Loss for critic is : 0.011605068109929562
avg_reward
-0.0369552291103914
Loss for critic is : 0.011809490155428648
avg_reward
-0.03377452562341881
Loss for critic is : 0.011438039131462574
avg_reward
-0.04146358251697723
Loss for critic is : 0.012320643290877342
avg_reward
-0.04406824505016842
Loss for critic is : 0.011770641896873713
avg_reward
-0.03801085907177614
Loss for critic is : 0.012391539756208658
avg_reward
-0.045118963172707666
Loss for critic is : 0.011915585491806269
avg_reward
-0.04428281093401747
Loss for critic is : 0.012070843484252691
avg_reward
-0.024684163731539983
Loss for critic is : 0.01213492825627327
avg_reward
-0.03470657643515145
Loss for critic is : 0.011733878403902054
avg_reward
-0.043937386737121185
Loss for critic is : 0.011887505650520325
avg_reward
-0.018197279249302485
Loss for critic is : 0.011860803235322237
avg_reward
-0.02008338128600988
Loss for critic is : 0.0117904152721

Loss for critic is : 0.0163305108435452
avg_reward
-0.1264753661455067
Loss for critic is : 0.017672027461230755
avg_reward
-0.12775980964716901
Loss for critic is : 0.017179292626678944
avg_reward
-0.12716247837840747
Loss for critic is : 0.01676511950790882
avg_reward
-0.12439275866357022
Loss for critic is : 0.014279598370194435
avg_reward
-0.1195603409938426
Loss for critic is : 0.015189401339739561
avg_reward
-0.04556165562776239
Loss for critic is : 0.01607226626947522
avg_reward
-0.031235565933857082
Loss for critic is : 0.01569795049726963
avg_reward
-0.04574426165741225
Loss for critic is : 0.014537271577864885
avg_reward
-0.014003847548336237
Loss for critic is : 0.014149769209325314
avg_reward
-0.011304830910357208
Loss for critic is : 0.013192114420235157
avg_reward
-0.04110911139889661
Loss for critic is : 0.01424935320392251
avg_reward
-0.04051463536463669
Loss for critic is : 0.012486628256738186
avg_reward
-0.09546172330293601
Loss for critic is : 0.013648266438394785
a

Loss for critic is : 0.025771358981728554
avg_reward
-0.015265129788094703
Loss for critic is : 0.029781654477119446
avg_reward
-0.01660724990544901
Loss for critic is : 0.022873591631650925
avg_reward
-0.021274821611391676
Loss for critic is : 0.018754519522190094
avg_reward
-0.026316240428668994
Loss for critic is : 0.0184441190212965
avg_reward
-0.016988118680456236
Loss for critic is : 0.01783254463225603
avg_reward
-0.037447059572888935
Loss for critic is : 0.016797974705696106
avg_reward
-0.053001045268497894
Loss for critic is : 0.01678440533578396
avg_reward
-0.05146059942145029
Loss for critic is : 0.014828249346464872
avg_reward
-0.044252491512477975
Loss for critic is : 0.016874954104423523
avg_reward
-0.04179451743130724
Loss for critic is : 0.01968288328498602
avg_reward
-0.04427062559424352
Loss for critic is : 0.017802308313548565
avg_reward
-0.04487880906376175
Loss for critic is : 0.017359109595417976
avg_reward
-0.059647770805227616
Loss for critic is : 0.016135607846

Loss for critic is : 0.011408710852265358
avg_reward
-0.11466060169366363
Loss for critic is : 0.012013348285108805
avg_reward
-0.09567937970700072
Loss for critic is : 0.01169210346415639
avg_reward
-0.06100038445587053
Loss for critic is : 0.01223228732123971
avg_reward
-0.0647837495583263
Loss for critic is : 0.012127954047173262
avg_reward
-0.10767520614365954
Loss for critic is : 0.011908094864338636
avg_reward
-0.04863894269462418
Loss for critic is : 0.012513850815594196
avg_reward
-0.07194751448600177
Loss for critic is : 0.012224046047776937
avg_reward
-0.10411637699723038
Loss for critic is : 0.012186942622065544
avg_reward
-0.09989261292352693
Loss for critic is : 0.012337547726929188
avg_reward
-0.1533160230162242
Loss for critic is : 0.01209422666579485
avg_reward
-0.0819629205087861
Loss for critic is : 0.011865400709211826
avg_reward
-0.0830963343272014
Loss for critic is : 0.011932808440178633
avg_reward
-0.12745662758975074
Loss for critic is : 0.01215409953147173
avg_

Loss for critic is : 0.010871066711843014
avg_reward
-0.004842083109083357
Loss for critic is : 0.011073952540755272
avg_reward
-0.0017427931385808554
Loss for critic is : 0.01171220513060689
avg_reward
-0.0076785142924233225
Loss for critic is : 0.011421897914260626
avg_reward
-0.007293382196890346
Loss for critic is : 0.011670578271150589
avg_reward
-0.005705859101828924
Loss for critic is : 0.011409963946789503
avg_reward
-0.011544513473122192
Loss for critic is : 0.011128713842481375
avg_reward
-0.012214796516967469
Loss for critic is : 0.010819728020578623
Episode : 41 start 
avg_reward
-0.012753170955956775
Loss for critic is : 0.01137786079198122
avg_reward
-0.022626269747040437
Loss for critic is : 0.011064757592976093
avg_reward
-0.01839767013399379
Loss for critic is : 0.0110219307243824
avg_reward
-0.018548841575989634
Loss for critic is : 0.011502181645482779
avg_reward
-0.012110913713017889
Loss for critic is : 0.010857978835701942
avg_reward
-0.010418659801988294
Loss for

Loss for critic is : 0.010806533973664045
avg_reward
-0.02942670410660797
Loss for critic is : 0.010646708775311708
avg_reward
-0.0348091860225716
Loss for critic is : 0.011080025229603052
avg_reward
-0.039250545240138685
Loss for critic is : 0.010929367039352655
avg_reward
-0.03993107354968737
Loss for critic is : 0.01113879680633545
avg_reward
-0.04636045148982882
Loss for critic is : 0.011003817897289991
avg_reward
-0.037181774638532734
Loss for critic is : 0.011060922406613827
avg_reward
-0.04003314476102049
Loss for critic is : 0.011323060840368271
avg_reward
-0.041746016668890454
Loss for critic is : 0.01099974662065506
avg_reward
-0.0267412036115192
Loss for critic is : 0.01066939765587449
avg_reward
-0.03158364036709556
Loss for critic is : 0.010972158517688513
avg_reward
-0.020958059345607932
Loss for critic is : 0.010894987732172012
avg_reward
-0.024932159360972393
Loss for critic is : 0.010356546379625797
avg_reward
-0.005614453604251203
Loss for critic is : 0.01058026263490

Loss for critic is : 0.010645174887031317
avg_reward
-0.021989649932881196
Loss for critic is : 0.010793902911245823
avg_reward
-0.017495682475930505
Loss for critic is : 0.01000362541526556
avg_reward
-0.02001648233909965
Loss for critic is : 0.010231729131191969
avg_reward
-0.010265368731170631
Loss for critic is : 0.01111864484846592
avg_reward
-0.013135957509374551
Loss for critic is : 0.011181285604834557
avg_reward
-0.014379621981832992
Loss for critic is : 0.011126141995191574
avg_reward
-0.006480902135240826
Loss for critic is : 0.011282485909759998
avg_reward
-0.005794456965039153
Loss for critic is : 0.011522844433784485
avg_reward
-0.007821651066754427
Loss for critic is : 0.01169242337346077
avg_reward
-0.015611329782936303
Loss for critic is : 0.011407651472836733
avg_reward
-0.019898838260972686
Loss for critic is : 0.01143555948510766
avg_reward
-0.016179410954682113
Loss for critic is : 0.01167990081012249
avg_reward
-0.029367926034477868
Loss for critic is : 0.01107253

Loss for critic is : 0.009973964653909206
avg_reward
-0.003718523778399995
Loss for critic is : 0.010770649183541536
avg_reward
-0.0060925863917542225
Loss for critic is : 0.01021051686257124
avg_reward
-0.005554640989730115
Loss for critic is : 0.010996089316904545
avg_reward
-0.005657375415768493
Loss for critic is : 0.010061257984489202
avg_reward
-0.006927062657743006
Loss for critic is : 0.010505330748856068
avg_reward
-0.004526719192643083
Loss for critic is : 0.010117109399288893
avg_reward
-0.006301958290030886
Loss for critic is : 0.009985330980271101
avg_reward
-0.006989807548015563
Loss for critic is : 0.010737108998000622
avg_reward
-0.009936136164332105
Loss for critic is : 0.01048492407426238
avg_reward
-0.007384205177497804
Loss for critic is : 0.010218567214906216
avg_reward
-0.00545407447813286
Loss for critic is : 0.01037242915481329
avg_reward
-0.005967203353322582
Loss for critic is : 0.01059228740632534
avg_reward
-0.0034757898374036473
Loss for critic is : 0.01021

Loss for critic is : 0.010052043478935957
avg_reward
-0.00896719568929032
Loss for critic is : 0.009792298544198275
avg_reward
-0.009640604114177118
Loss for critic is : 0.010371176525950432
avg_reward
-0.010531113413575913
Loss for critic is : 0.009915856178849936
avg_reward
-0.011319481335979747
Loss for critic is : 0.009843392763286829
avg_reward
-0.012372123112086424
Loss for critic is : 0.01040811650454998
avg_reward
-0.010344932900233791
Loss for critic is : 0.010472716763615608
avg_reward
-0.012316376221138797
Loss for critic is : 0.010130534414201975
avg_reward
-0.00991442307960621
Loss for critic is : 0.01063767820596695
avg_reward
-0.006455234995835802
Loss for critic is : 0.010203358251601458
avg_reward
-0.010573858129541548
Loss for critic is : 0.009784549940377474
avg_reward
-0.00942782263696653
Loss for critic is : 0.0098539087921381
avg_reward
-0.005040588734313799
Loss for critic is : 0.009868789464235306
avg_reward
-0.007852593212597472
Loss for critic is : 0.009482302

Loss for critic is : 0.009841097053140402
avg_reward
-0.015492382546802514
Loss for critic is : 0.009467270225286484
avg_reward
-0.014784543469953592
Loss for critic is : 0.01071457751095295
avg_reward
-0.017195679321668517
Loss for critic is : 0.010447554755955935
avg_reward
-0.02017081629832298
Loss for critic is : 0.009954619221389294
avg_reward
-0.017239369171739405
Loss for critic is : 0.009824232198297977
avg_reward
-0.019797638404841018
Loss for critic is : 0.010041287634521723
avg_reward
-0.017409922327942882
Loss for critic is : 0.009782145265489817
avg_reward
-0.031979690867106
Loss for critic is : 0.009351908229291439
avg_reward
-0.018806693063361232
Loss for critic is : 0.01005878672003746
avg_reward
-0.022448899375708386
Loss for critic is : 0.009342629928141832
avg_reward
-0.01121770219225725
Loss for critic is : 0.009984742384403944
avg_reward
-0.012813767625500109
Loss for critic is : 0.010142707731574774
avg_reward
-0.015348286214153649
Loss for critic is : 0.009613648

Loss for critic is : 0.009709032718092203
avg_reward
-0.014743365777540294
Loss for critic is : 0.009649124927818775
avg_reward
-0.014628412066829116
Loss for critic is : 0.009240484796464443
avg_reward
-0.013674023334672394
Loss for critic is : 0.009998905938118696
avg_reward
-0.014431509602987367
Loss for critic is : 0.00973327411338687
avg_reward
-0.02004065775082101
Loss for critic is : 0.010431315284222364
avg_reward
-0.01901157239533116
Loss for critic is : 0.009917642921209335
avg_reward
-0.024625359568949602
Loss for critic is : 0.009910221211612225
avg_reward
-0.028713547466867058
Loss for critic is : 0.009550044313073158
avg_reward
-0.03060441141062233
Loss for critic is : 0.010286693461239338
avg_reward
-0.037543951703529796
Loss for critic is : 0.01032672030851245
avg_reward
-0.019051261958142635
Loss for critic is : 0.00982664991170168
avg_reward
-0.03568756597545862
Loss for critic is : 0.009777503088116646
avg_reward
-0.03979909270528671
Loss for critic is : 0.0098290629

Loss for critic is : 0.010079616215080023
avg_reward
-0.0007674818180101496
Loss for critic is : 0.009135857690125704
avg_reward
-0.01839447221046224
Loss for critic is : 0.009776785504072905
avg_reward
-0.053301440503630365
Loss for critic is : 0.009637226816266775
avg_reward
-0.05251700136115677
Loss for critic is : 0.009393736720085144
avg_reward
-0.07510296124578586
Loss for critic is : 0.01006510341539979
avg_reward
-0.09023654124649108
Loss for critic is : 0.009701655246317387
avg_reward
-0.09741623464605083
Loss for critic is : 0.010373079217970371
avg_reward
-0.09662680765231962
Loss for critic is : 0.009290420450270176
avg_reward
-0.07222576700905142
Loss for critic is : 0.010054109152406454
avg_reward
-0.07575513276316395
Loss for critic is : 0.010446041822433472
avg_reward
-0.07186985648185322
Loss for critic is : 0.009565317071974277
avg_reward
-0.04727167692192419
Loss for critic is : 0.009962289594113827
avg_reward
-0.03884859928517741
Loss for critic is : 0.0096906428225

Loss for critic is : 0.008773691486567259
avg_reward
0.02459148851089505
Loss for critic is : 0.009134437888860703
avg_reward
0.023235363333055124
Loss for critic is : 0.009644825011491776
avg_reward
0.01886563199836469
Loss for critic is : 0.009354573674499989
avg_reward
0.0066268428308909905
Loss for critic is : 0.009379183873534203
avg_reward
0.009376123023327097
Loss for critic is : 0.009002314880490303
avg_reward
0.006523718625694477
Loss for critic is : 0.00940726837143302
avg_reward
0.009098000951661175
Loss for critic is : 0.009701383765786886
avg_reward
0.004952023194918585
Loss for critic is : 0.009585910476744175
avg_reward
0.004154045651547502
Loss for critic is : 0.008949307259172201
avg_reward
0.0011963680916526644
Loss for critic is : 0.009676570538431406
avg_reward
0.001278594740413471
Loss for critic is : 0.009424165356904268
avg_reward
-0.005587416336214736
Loss for critic is : 0.00954960286617279
avg_reward
0.003541257971212318
Loss for critic is : 0.0091335261240601

Loss for critic is : 0.009703253395855427
avg_reward
-0.012712739489230648
Loss for critic is : 0.009927671402692795
avg_reward
-0.022408174407545844
Loss for critic is : 0.009371520485728979
avg_reward
-0.023794247114181225
Loss for critic is : 0.009293763432651758
avg_reward
-0.016366459963400325
Loss for critic is : 0.009321772027760744
avg_reward
-0.023748604338217377
Loss for critic is : 0.008999540004879236
avg_reward
-0.025302687729124564
Loss for critic is : 0.009128163568675518
avg_reward
-0.02060744454876981
Loss for critic is : 0.009306749794632196
avg_reward
-0.01951117450899874
Loss for critic is : 0.009298516437411308
avg_reward
-0.028331667062825555
Loss for critic is : 0.009661891497671604
avg_reward
-0.017879781369814475
Loss for critic is : 0.008648310787975788
avg_reward
-0.018178800911499055
Loss for critic is : 0.009163370355963707
avg_reward
-0.01946854304073232
Loss for critic is : 0.009353763423860073
avg_reward
-0.011842361909101248
Loss for critic is : 0.00918

Loss for critic is : 0.00956576643511653
avg_reward
-0.011846547119477583
Loss for critic is : 0.00917561911046505
avg_reward
-0.019469442395507082
Loss for critic is : 0.010061400476843119
avg_reward
-0.013185264041447705
Loss for critic is : 0.009414175059646368
avg_reward
-0.010887237429963961
Loss for critic is : 0.009063885547220707
avg_reward
-0.008695960079482874
Loss for critic is : 0.009580851998180151
avg_reward
-0.014023982188850006
Loss for critic is : 0.010168681852519512
avg_reward
-0.01528560513678431
Loss for critic is : 0.009582376573234797
Episode : 52 start 
avg_reward
-0.01124668509282742
Loss for critic is : 0.009560457430779934
avg_reward
-0.019288168458295174
Loss for critic is : 0.009263403713703156
avg_reward
-0.019233949933041533
Loss for critic is : 0.009919499978423119
avg_reward
-0.014531878260612299
Loss for critic is : 0.0104361935518682
avg_reward
-0.01281539394460765
Loss for critic is : 0.009642568416893482
avg_reward
-0.00670626406973601
Loss for crit

Loss for critic is : 0.008882335852831602
avg_reward
-0.01096372459634283
Loss for critic is : 0.009529105853289366
avg_reward
-0.008368084764573458
Loss for critic is : 0.008772285655140877
avg_reward
-0.010803214088507974
Loss for critic is : 0.008842326700687408
avg_reward
-0.008896902741602677
Loss for critic is : 0.008624373469501734
avg_reward
-0.005665018427568008
Loss for critic is : 0.008835067506879568
avg_reward
-0.011499574430227394
Loss for critic is : 0.0096581787802279
avg_reward
-0.012098112084266678
Loss for critic is : 0.008569855708628893
avg_reward
-0.012063905077377113
Loss for critic is : 0.009229149669408798
avg_reward
-0.008399043477495063
Loss for critic is : 0.009217433631420135
avg_reward
-0.01479345727687803
Loss for critic is : 0.008794801775366068
avg_reward
-0.015461392048909969
Loss for critic is : 0.009118134621530771
avg_reward
-0.014877929574122936
Loss for critic is : 0.0089405607432127
avg_reward
-0.013731556120119788
Loss for critic is : 0.00909164

Loss for critic is : 0.008580096065998077
avg_reward
-0.018298110942219913
Loss for critic is : 0.009023455437272787
avg_reward
-0.020459622081991355
Loss for critic is : 0.009233913850039244
avg_reward
-0.014811065568511257
Loss for critic is : 0.009501314256340265
avg_reward
-0.010931368593924604
Loss for critic is : 0.0096192448399961
avg_reward
-0.005154138510260557
Loss for critic is : 0.008978422731161118
avg_reward
-0.003395514183419079
Loss for critic is : 0.009361966978758574
avg_reward
-0.0038900223148221006
Loss for critic is : 0.008893052581697702
avg_reward
-0.005016861071344822
Loss for critic is : 0.008778499905019999
avg_reward
-0.006230530587088692
Loss for critic is : 0.009329002350568771
avg_reward
-0.007505764498095513
Loss for critic is : 0.009267054498195648
avg_reward
-0.0085281277811684
Loss for critic is : 0.009423400741070509
avg_reward
-0.010986819850985667
Loss for critic is : 0.009259207174181938
avg_reward
-0.010591219833859192
Loss for critic is : 0.00952

Loss for critic is : 0.008473821450024843
avg_reward
-0.011171901560313446
Loss for critic is : 0.008288573008030653
avg_reward
-0.009387821219460963
Loss for critic is : 0.009625677950680256
avg_reward
-0.011977964262192529
Loss for critic is : 0.008116879034787416
avg_reward
-0.015382173260778644
Loss for critic is : 0.008908262476325035
avg_reward
-0.015483536615785295
Loss for critic is : 0.008722370490431786
avg_reward
-0.012737993508880658
Loss for critic is : 0.00846657482907176
avg_reward
-0.01406654361564131
Loss for critic is : 0.008469874039292336
avg_reward
-0.010985421158938176
Loss for critic is : 0.008508062455803156
avg_reward
-0.012815087585443368
Loss for critic is : 0.008996918797492981
avg_reward
-0.013742012988347823
Loss for critic is : 0.008308899123221636
avg_reward
-0.017650716634024687
Loss for critic is : 0.008434854447841644
avg_reward
-0.013499378526687046
Loss for critic is : 0.00859232060611248
avg_reward
-0.020329220402092417
Loss for critic is : 0.00829

Loss for critic is : 0.008051240351051092
avg_reward
-0.006541681625031076
Loss for critic is : 0.007855358766391873
avg_reward
-0.005256348838766479
Loss for critic is : 0.007699990412220359
avg_reward
-0.005243604299279276
Loss for critic is : 0.008374002994969487
avg_reward
-0.00765873681197518
Loss for critic is : 0.008203819394111633
avg_reward
-0.0036332942347537577
Loss for critic is : 0.007646321319043636
avg_reward
0.00024705508809123346
Loss for critic is : 0.008103736210614443
avg_reward
0.0057442730492781235
Loss for critic is : 0.00803570169955492
avg_reward
0.0036547163465026136
Loss for critic is : 0.008225320372730494
avg_reward
-0.010592193166090294
Loss for critic is : 0.0077770124189555645
avg_reward
-0.004837806146498133
Loss for critic is : 0.008193711750209332
avg_reward
-0.0008312054297041011
Loss for critic is : 0.008308865129947662
avg_reward
-0.008039891362314722
Loss for critic is : 0.007923653582111001
avg_reward
-0.0016146105727378865
Loss for critic is : 0

Loss for critic is : 0.007912715896964073
avg_reward
-0.0178124589312253
Loss for critic is : 0.00813292060047388
avg_reward
-0.021015018562230998
Loss for critic is : 0.007832154165953398
avg_reward
-0.025823343318099826
Loss for critic is : 0.007861942518502474
avg_reward
-0.025104703999263317
Loss for critic is : 0.007692534243687987
avg_reward
-0.02542086755606452
Loss for critic is : 0.007892431691288948
avg_reward
-0.024919533370720504
Loss for critic is : 0.007736510597169399
avg_reward
-0.02828634200598031
Loss for critic is : 0.008014621213078499
avg_reward
-0.029085764257878996
Loss for critic is : 0.007920219097286463
avg_reward
-0.029453314583559934
Loss for critic is : 0.007933909073472023
avg_reward
-0.03573935313406907
Loss for critic is : 0.00752540142275393
avg_reward
-0.03609642097716825
Loss for critic is : 0.008038191590458155
avg_reward
-0.03334163108906862
Loss for critic is : 0.0075450746808201075
avg_reward
-0.03912869383644941
Loss for critic is : 0.00778491655

Loss for critic is : 0.007701131282374263
avg_reward
-0.012880675766936339
Loss for critic is : 0.007573296781629324
avg_reward
-0.01310267307509506
Loss for critic is : 0.008008505683392286
avg_reward
-0.008399584026226942
Loss for critic is : 0.007995417341589928
avg_reward
-0.028645807068790944
Loss for critic is : 0.007947866339236498
avg_reward
-0.027848651514666978
Loss for critic is : 0.007727830205112696
avg_reward
-0.013577660633774268
Loss for critic is : 0.007961493916809559
avg_reward
-0.023075820500617815
Loss for critic is : 0.008796283043920994
avg_reward
-0.02713779862031886
Loss for critic is : 0.00797278736717999
avg_reward
-0.035082159296760035
Loss for critic is : 0.008122147060930729
avg_reward
-0.03548952322780114
Loss for critic is : 0.007743266643956304
avg_reward
-0.03682207428784959
Loss for critic is : 0.00732845370657742
avg_reward
-0.036561217653000945
Loss for critic is : 0.0076696001924574375
avg_reward
-0.0123435347488954
Loss for critic is : 0.008208193

Loss for critic is : 0.007712682243436575
avg_reward
0.0059394006331834025
Loss for critic is : 0.007415569620206952
avg_reward
-0.006292974509360269
Loss for critic is : 0.007515887962654233
avg_reward
0.008845688721977517
Loss for critic is : 0.007414012914523482
avg_reward
-0.0189572980151756
Loss for critic is : 0.007655302062630653
avg_reward
-0.008392050761303785
Loss for critic is : 0.007214172510430217
avg_reward
0.009133579144638138
Loss for critic is : 0.007296885596588254
avg_reward
-0.0001380636744257608
Loss for critic is : 0.007933399640023708
avg_reward
-0.003574675892736085
Loss for critic is : 0.007724459050223231
avg_reward
-0.010469308256657863
Loss for critic is : 0.007359417621046305
avg_reward
-0.020513868773275656
Loss for critic is : 0.007869542576372623
avg_reward
-0.0016123462332419828
Loss for critic is : 0.008034239988774061
avg_reward
-0.012559058713208589
Loss for critic is : 0.007939077913761139
avg_reward
-0.005257276323021963
Loss for critic is : 0.0077

Loss for critic is : 0.007888806983828545
avg_reward
-0.015798802004255916
Loss for critic is : 0.007628771709278226
avg_reward
-0.013084163123241569
Loss for critic is : 0.00819970853626728
avg_reward
-0.011354387838265062
Loss for critic is : 0.007551049580797553
avg_reward
-0.016829216720585256
Loss for critic is : 0.007606374332681298
avg_reward
-0.021749870282744492
Loss for critic is : 0.00767851434648037
avg_reward
-0.018310771773536277
Loss for critic is : 0.0073074225801974535
avg_reward
-0.01737940966018082
Loss for critic is : 0.008336982224136591
avg_reward
-0.023672934686820652
Loss for critic is : 0.007525346241891384
avg_reward
-0.011525033461456164
Loss for critic is : 0.008276941487565637
avg_reward
-0.021310688346353978
Loss for critic is : 0.007285382831469178
avg_reward
-0.009085620468129973
Loss for critic is : 0.007710918318480253
avg_reward
-0.016745099496477324
Loss for critic is : 0.007499353028833866
avg_reward
-0.002642972638141403
Loss for critic is : 0.0076

Loss for critic is : 0.007686716737225652
avg_reward
-0.020252289200619895
Loss for critic is : 0.007397228619083762
avg_reward
-0.017410607839564118
Loss for critic is : 0.007893600733950734
avg_reward
-0.022046839587891484
Loss for critic is : 0.007561573991551995
avg_reward
-0.01632156307826719
Loss for critic is : 0.008348258677870035
avg_reward
-0.017936343169444483
Loss for critic is : 0.008151520043611526
avg_reward
-0.015496227437887082
Loss for critic is : 0.007386579178273678
avg_reward
-0.007575506587276534
Loss for critic is : 0.007819251623004675
avg_reward
-0.012235577480104808
Loss for critic is : 0.007177912164479494
avg_reward
-0.007258748891310939
Loss for critic is : 0.0076378039084374905
avg_reward
-0.004679593983159289
Loss for critic is : 0.00771750183776021
avg_reward
-0.0031920323208904124
Loss for critic is : 0.00785452127456665
avg_reward
0.0003130105833963892
Loss for critic is : 0.008081783540546894
avg_reward
-0.0015412014482910179
Loss for critic is : 0.00

Loss for critic is : 0.007124001160264015
avg_reward
-0.026543091892108406
Loss for critic is : 0.007044672034680843
avg_reward
-0.015712336461823545
Loss for critic is : 0.006999014876782894
avg_reward
-0.02487195624144878
Loss for critic is : 0.006828971905633807
avg_reward
-0.0006036417493789532
Loss for critic is : 0.007916751317679882
avg_reward
-0.014587629953382433
Loss for critic is : 0.00686044292524457
avg_reward
-0.004075470142065614
Loss for critic is : 0.007004840299487114
Episode : 63 start 
avg_reward
-0.01580609210232874
Loss for critic is : 0.0075383170042186975
avg_reward
-0.02279806040635373
Loss for critic is : 0.007020113291218877
avg_reward
-0.027837072445092446
Loss for critic is : 0.007095866138115525
avg_reward
-0.038407434997285335
Loss for critic is : 0.007063120603561401
avg_reward
-0.038569165911540214
Loss for critic is : 0.007161676185205579
avg_reward
-0.04945164233825499
Loss for critic is : 0.007006720639765263
avg_reward
-0.06074519725975145
Loss for 

Loss for critic is : 0.007498677354305983
avg_reward
-0.025980130727760252
Loss for critic is : 0.007880634628236294
avg_reward
-0.031009657210307025
Loss for critic is : 0.0072827625554054976
avg_reward
-0.024395604675382924
Loss for critic is : 0.0075925851706415415
avg_reward
-0.02617047470793517
Loss for critic is : 0.007299897028133273
avg_reward
-0.02267535458614235
Loss for critic is : 0.007272521732375026
avg_reward
-0.01751589735869799
Loss for critic is : 0.007382426410913467
avg_reward
-0.013739101278831793
Loss for critic is : 0.00738006760366261
avg_reward
-0.02125448868776069
Loss for critic is : 0.007325250655412674
avg_reward
-0.01709525324181623
Loss for critic is : 0.007313912734389305
avg_reward
-0.011749167197989
Loss for critic is : 0.007041695527732372
avg_reward
-0.020845645751847955
Loss for critic is : 0.007329561049118638
avg_reward
-0.008814767455201494
Loss for critic is : 0.007886409759521484
avg_reward
-0.015320668031989396
Loss for critic is : 0.007082797

Loss for critic is : 0.0075243988540023565
avg_reward
-0.006019404672791944
Loss for critic is : 0.00746279233135283
avg_reward
-0.0058883518765939806
Loss for critic is : 0.0076250359416007996
avg_reward
-0.004323742492624623
Loss for critic is : 0.007385921897366643
avg_reward
-0.0009676434470158269
Loss for critic is : 0.007017863215878606
avg_reward
-0.00630151804111739
Loss for critic is : 0.007311041466891766
avg_reward
-0.007175781623178194
Loss for critic is : 0.007521273801103234
avg_reward
-0.0036824964329746127
Loss for critic is : 0.0070078091230243444
avg_reward
-0.0018570521329057685
Loss for critic is : 0.0075298810843378305
avg_reward
-0.009149517954033064
Loss for critic is : 0.0071386476047337055
avg_reward
-0.0013482358925198312
Loss for critic is : 0.007315387250855565
avg_reward
-0.007682873435688003
Loss for critic is : 0.00753488065674901
avg_reward
-0.003159343803012169
Loss for critic is : 0.007199516985565424
avg_reward
-0.016124490661995436
Loss for critic is

Loss for critic is : 0.007345261052250862
avg_reward
-0.0017020027695044482
Loss for critic is : 0.007295515388250351
avg_reward
-0.0036259856494867443
Loss for critic is : 0.006778364535421133
avg_reward
-0.008206765212238714
Loss for critic is : 0.0072246454656124115
avg_reward
-0.009867599128130157
Loss for critic is : 0.007088586455211043
avg_reward
-0.005410171849428289
Loss for critic is : 0.007413501152768731
avg_reward
-0.012297337167417804
Loss for critic is : 0.007685597520321608
avg_reward
-0.0031649024956791165
Loss for critic is : 0.0075551713816821575
avg_reward
-0.006292297191132826
Loss for critic is : 0.007004265207797289
avg_reward
-0.007312138192379985
Loss for critic is : 0.007390543352812529
avg_reward
-0.005286478435748073
Loss for critic is : 0.007879314012825489
avg_reward
-0.00300830114266051
Loss for critic is : 0.007093997672200203
avg_reward
-0.0015172631243440763
Loss for critic is : 0.007369280094280839
avg_reward
-0.0002016948507430824
Loss for critic is 

Loss for critic is : 0.008052029181271791
avg_reward
0.0020384569350070745
Loss for critic is : 0.00784690142609179
avg_reward
-0.004459417930960011
Loss for critic is : 0.0076355901546776295
avg_reward
-0.01055929533853632
Loss for critic is : 0.007846600376069546
avg_reward
-0.012842465992686904
Loss for critic is : 0.007841354701668024
avg_reward
-0.003919925187581852
Loss for critic is : 0.007337156683206558
avg_reward
-0.003563907359288265
Loss for critic is : 0.007678375579416752
avg_reward
-0.00869274200756268
Loss for critic is : 0.006955671822652221
avg_reward
-0.007543918737629161
Loss for critic is : 0.007362285163253546
avg_reward
-0.005005241460355853
Loss for critic is : 0.007340739946812391
avg_reward
-0.01014752151985491
Loss for critic is : 0.007562706945464015
avg_reward
-0.004067654650249121
Loss for critic is : 0.007426507072523236
avg_reward
-0.005485663535406935
Loss for critic is : 0.007571525173261762
avg_reward
-0.004577173000255823
Loss for critic is : 0.00740

Loss for critic is : 0.007427041418850422
avg_reward
-0.003243340513029565
Loss for critic is : 0.007417746819555759
avg_reward
-0.002484174404939518
Loss for critic is : 0.007006029365584254
avg_reward
0.002440325801744823
Loss for critic is : 0.007226894609630108
avg_reward
0.0023044860618380717
Loss for critic is : 0.007064904319122434
avg_reward
-0.0012510402398082528
Loss for critic is : 0.007374712266027927
avg_reward
-0.00010562474672773164
Loss for critic is : 0.006852890597656369
avg_reward
-0.0026451573255793517
Loss for critic is : 0.007429741322994232
avg_reward
-0.000612239325771864
Loss for critic is : 0.007237516110762954
avg_reward
-0.0014360976377268552
Loss for critic is : 0.006967602064833045
avg_reward
-0.0029719482084648607
Loss for critic is : 0.007398596033453941
avg_reward
-0.009536065483021421
Loss for critic is : 0.00733733456581831
avg_reward
-0.003578874398887574
Loss for critic is : 0.00741099938750267
avg_reward
-0.006706717522061544
Loss for critic is : 0

Loss for critic is : 0.007166773080825806
avg_reward
-0.016471583224313118
Loss for critic is : 0.00719765480607748
avg_reward
-0.02013378659784583
Loss for critic is : 0.0077599401120096445
avg_reward
-0.006624030115780076
Loss for critic is : 0.007083303760737181
avg_reward
-0.005572201559064511
Loss for critic is : 0.00713430461473763
avg_reward
0.0010089395636260021
Loss for critic is : 0.007716124644502997
avg_reward
0.00230624154460845
Loss for critic is : 0.007075849687680602
avg_reward
0.0022738198498129837
Loss for critic is : 0.007260255282744765
avg_reward
-0.0009243063309400095
Loss for critic is : 0.007564384490251541
avg_reward
-0.0015333722941121642
Loss for critic is : 0.00707338098436594
avg_reward
-0.005975198598528961
Loss for critic is : 0.007288644555956125
avg_reward
-0.003353963334774423
Loss for critic is : 0.007453934755176306
avg_reward
-0.013474738120056161
Loss for critic is : 0.006982835242524743
avg_reward
-0.00975052208150414
Loss for critic is : 0.007123

Loss for critic is : 0.007861750666052103
avg_reward
0.002259359935380559
Loss for critic is : 0.007863373961299658
avg_reward
-0.0010634442944549693
Loss for critic is : 0.0072957652155309916
avg_reward
-0.001917392373650351
Loss for critic is : 0.007028476567938924
avg_reward
-0.00025848284204535183
Loss for critic is : 0.007600352866575122
avg_reward
-0.002905913896301549
Loss for critic is : 0.007168358424678445
avg_reward
-0.0060493251560178145
Loss for critic is : 0.007154060760512948
avg_reward
-0.00010161354163689979
Loss for critic is : 0.007056168047711253
avg_reward
-0.006420113431822899
Loss for critic is : 0.00754065765067935
avg_reward
-0.008386625042866039
Loss for critic is : 0.007287714630365372
avg_reward
-0.002874347456109983
Loss for critic is : 0.0072906664572656155
avg_reward
-0.006911456223866343
Loss for critic is : 0.007083144271746278
avg_reward
-0.009162744087922062
Loss for critic is : 0.0069864545948803425
avg_reward
-0.008450385807691724
Loss for critic is

Loss for critic is : 0.00715751713141799
avg_reward
-0.009830159646976262
Loss for critic is : 0.007412906503304839
avg_reward
-0.006193168058955433
Loss for critic is : 0.007434170693159103
avg_reward
-0.002267576462758804
Loss for critic is : 0.007756355218589306
avg_reward
-0.005685760857819027
Loss for critic is : 0.007842532824724913
avg_reward
-0.007284354773764974
Loss for critic is : 0.007133129518479109
avg_reward
-0.00829911334338125
Loss for critic is : 0.007383027346804738
avg_reward
-0.007912599351429513
Loss for critic is : 0.0074854150880128145
avg_reward
-0.008519409472268397
Loss for critic is : 0.007468705764040351
avg_reward
-0.009692461918170107
Loss for critic is : 0.008024116279557347
avg_reward
-0.010211319235078464
Loss for critic is : 0.007152078906074166
avg_reward
0.0058090727817054905
Loss for critic is : 0.007217020262032747
avg_reward
0.009833224920833376
Loss for critic is : 0.007562268991023302
avg_reward
0.00734163784293647
Loss for critic is : 0.007216

Loss for critic is : 0.00732218474149704
avg_reward
-0.021231196907617052
Loss for critic is : 0.007331873290240765
avg_reward
-0.021110183816181734
Loss for critic is : 0.0072517478838562965
avg_reward
-0.01888635211591975
Loss for critic is : 0.007591160712763667
avg_reward
-0.015637622383005492
Loss for critic is : 0.007598889991641045
avg_reward
-0.02121812319955365
Loss for critic is : 0.007336676120758057
avg_reward
-0.019902993659544874
Loss for critic is : 0.0078053411561995745
avg_reward
-0.015473335416768626
Loss for critic is : 0.00737869250588119
avg_reward
-0.010421577774127624
Loss for critic is : 0.007376473164185882
avg_reward
-0.015853402389352407
Loss for critic is : 0.007672662613913417
avg_reward
-0.008801089392000003
Loss for critic is : 0.0072266533970832825
avg_reward
-0.010241936810504067
Loss for critic is : 0.007212063297629356
avg_reward
-0.0048726387206294575
Loss for critic is : 0.007045702775940299
avg_reward
-0.00015387152290700576
Loss for critic is : 0.

Loss for critic is : 0.006685704691335559
avg_reward
-0.0043654389936838974
Loss for critic is : 0.006876701023429632
avg_reward
-0.0076684376615551286
Loss for critic is : 0.006846112664788961
avg_reward
-0.007687370962972917
Loss for critic is : 0.0070279615465551615
avg_reward
-0.004235901411642703
Loss for critic is : 0.007120907306671143
avg_reward
-0.013243621631624278
Loss for critic is : 0.006752830231562257
avg_reward
-0.01722903454787792
Loss for critic is : 0.006911886390298605
avg_reward
-0.015996550727269472
Loss for critic is : 0.0073156352154910564
avg_reward
-0.009125461702845014
Loss for critic is : 0.006941854488104582
avg_reward
-0.02452575159133269
Loss for critic is : 0.007154962047934532
avg_reward
-0.027296720230484725
Loss for critic is : 0.006849600933492184
avg_reward
-0.024900833613006122
Loss for critic is : 0.007054860005155206
avg_reward
-0.027483249953552814
Loss for critic is : 0.0069329096004366875
avg_reward
-0.025224946309725858
Loss for critic is : 0

Loss for critic is : 0.0070513098035007715
avg_reward
-0.004537345434240038
Loss for critic is : 0.0067604342475533485
avg_reward
-0.010502039386950511
Loss for critic is : 0.006681713508442044
avg_reward
-0.018361748706983268
Loss for critic is : 0.006420913152396679
Episode : 75 start 
avg_reward
-0.016737326850276166
Loss for critic is : 0.006860420573502779
avg_reward
-0.04477367898846094
Loss for critic is : 0.006356747122481465
avg_reward
-0.06738176107360884
Loss for critic is : 0.006418376229703426
avg_reward
-0.07866477039833136
Loss for critic is : 0.006719374796375632
avg_reward
-0.08084575944301857
Loss for critic is : 0.006575361592695117
avg_reward
-0.08197648145947993
Loss for critic is : 0.0070658770855516195
avg_reward
-0.08782680070755756
Loss for critic is : 0.006293681683018804
avg_reward
-0.03622915913091683
Loss for critic is : 0.0066655834671109915
avg_reward
-0.03356902760164006
Loss for critic is : 0.006506302626803517
avg_reward
-0.03723812512178885
Loss for c

Loss for critic is : 0.00683685252442956
avg_reward
-0.05501486909975839
Loss for critic is : 0.006957816192880273
avg_reward
-0.05658482861091832
Loss for critic is : 0.006627902388572693
avg_reward
-0.05087349119298945
Loss for critic is : 0.0065563570242375135
avg_reward
-0.01625831242751765
Loss for critic is : 0.006725717801600695
avg_reward
-0.022024444182119206
Loss for critic is : 0.006719446508213878
avg_reward
-0.016519025048273663
Loss for critic is : 0.006498601520434022
avg_reward
-0.023752302404896283
Loss for critic is : 0.006459059659391642
avg_reward
-0.01710358044119558
Loss for critic is : 0.006792050553485751
avg_reward
-0.017636836162226922
Loss for critic is : 0.006245704600587487
avg_reward
-0.04340232971066228
Loss for critic is : 0.007059237454086542
avg_reward
-0.032943371644711186
Loss for critic is : 0.007859492907300591
avg_reward
-0.0026003237447744347
Loss for critic is : 0.006216487381607294
avg_reward
0.007132134346433141
Loss for critic is : 0.00683934

Loss for critic is : 0.006698701996356249
avg_reward
-0.01424591913707855
Loss for critic is : 0.006474426481872797
avg_reward
-0.016744715665295326
Loss for critic is : 0.006823940435424447
avg_reward
-0.012327015835430282
Loss for critic is : 0.006453042617067695
avg_reward
-0.009608462624970355
Loss for critic is : 0.006671201903373003
avg_reward
-0.007593813415222524
Loss for critic is : 0.0069022460374981165
avg_reward
-0.006765235388458574
Loss for critic is : 0.0063464827835559845
avg_reward
-0.008816700862707253
Loss for critic is : 0.007036010269075632
avg_reward
-0.004985908750870013
Loss for critic is : 0.006848442135378718
avg_reward
0.008105411573616132
Loss for critic is : 0.006873891223222017
avg_reward
0.016796914404861713
Loss for critic is : 0.007406583754345775
avg_reward
0.012797418748009068
Loss for critic is : 0.007116753840819001
avg_reward
0.007689568920991706
Loss for critic is : 0.006565580610185862
avg_reward
-0.005382006808507861
Loss for critic is : 0.00695

Loss for critic is : 0.00673733395524323
avg_reward
0.0020058768764408246
Loss for critic is : 0.007007190957665443
avg_reward
-0.0013693055034399507
Loss for critic is : 0.006644330220296979
avg_reward
-0.002221796833557039
Loss for critic is : 0.007195234997197986
avg_reward
0.0013958037373397597
Loss for critic is : 0.0071078152395784855
avg_reward
-0.0014778500696887697
Loss for critic is : 0.006980923004448414
avg_reward
0.0006650562329865214
Loss for critic is : 0.007019760785624385
avg_reward
0.003277247220468905
Loss for critic is : 0.006981865968555212
avg_reward
-0.007684286778226341
Loss for critic is : 0.006883523426949978
avg_reward
-0.007945687953789096
Loss for critic is : 0.006585350027307868
avg_reward
-0.0048078131623168485
Loss for critic is : 0.0068962539080530405
avg_reward
-0.006519514278694281
Loss for critic is : 0.006934784119948745
avg_reward
-0.012180903253568541
Loss for critic is : 0.0069009000435471535
avg_reward
-0.006697510960660424
Loss for critic is : 

Loss for critic is : 0.0062874178402125835
avg_reward
-0.005683633991063412
Loss for critic is : 0.006676007993519306
avg_reward
-0.008554024195366234
Loss for critic is : 0.007242591120302677
avg_reward
0.0017645750286442753
Loss for critic is : 0.006407652283087373
avg_reward
0.0006622784680591631
Loss for critic is : 0.006399770267307758
avg_reward
-0.0012928006342163949
Loss for critic is : 0.006855177227407694
avg_reward
0.0011483727594961933
Loss for critic is : 0.0073590646497905254
avg_reward
-0.0026943383396772873
Loss for critic is : 0.00644395942799747
avg_reward
0.0005461625053021675
Loss for critic is : 0.006873365957289934
avg_reward
0.0017434162932790827
Loss for critic is : 0.006609879666939378
avg_reward
0.0008375358890911379
Loss for critic is : 0.006345986155793071
avg_reward
-0.004236507793723044
Loss for critic is : 0.0067236279137432575
avg_reward
-0.00861876531421522
Loss for critic is : 0.0069297486916184425
avg_reward
-0.0045735914300070474
Loss for critic is :

Loss for critic is : 0.006753207184374332
avg_reward
-0.010309220902639936
Loss for critic is : 0.00643523083999753
avg_reward
-0.007534395567575384
Loss for critic is : 0.007282655918970704
avg_reward
-0.009639060613005861
Loss for critic is : 0.006357574136927724
avg_reward
-0.005022647052245387
Loss for critic is : 0.00685830251313746
avg_reward
-0.007682978269036094
Loss for critic is : 0.006512179272249341
avg_reward
-0.00978626711025602
Loss for critic is : 0.006143634440377355
avg_reward
-0.008965223531328457
Loss for critic is : 0.006709673209115863
avg_reward
-0.009322861912767983
Loss for critic is : 0.006533927051350474
avg_reward
-0.011071695504231302
Loss for critic is : 0.0068471357226371765
avg_reward
-0.0038961429574204814
Loss for critic is : 0.006776750087738037
avg_reward
-0.004217018591218024
Loss for critic is : 0.006365826586261392
avg_reward
-0.004223308347651834
Loss for critic is : 0.006422108970582485
avg_reward
-0.00412670263403658
Loss for critic is : 0.0067

Loss for critic is : 0.006309382617473602
avg_reward
-0.019079370047747183
Loss for critic is : 0.006051068194210529
avg_reward
-0.011287955015740758
Loss for critic is : 0.006520662689581513
avg_reward
-0.006090545337477264
Loss for critic is : 0.0066626169718801975
avg_reward
-0.006541904778344683
Loss for critic is : 0.00635371427051723
avg_reward
-0.005678551168123374
Loss for critic is : 0.006680456688627601
avg_reward
-0.004341864458976943
Loss for critic is : 0.006376602686941624
avg_reward
-0.019652445166843925
Loss for critic is : 0.007412295788526535
avg_reward
-0.01610164412399861
Loss for critic is : 0.006295622093603015
avg_reward
-0.010146148469018597
Loss for critic is : 0.006366106448695064
avg_reward
-0.018250818393409952
Loss for critic is : 0.00664039421826601
avg_reward
-0.014255769519955478
Loss for critic is : 0.006891491124406457
avg_reward
-0.01192949459013298
Loss for critic is : 0.00646472442895174
avg_reward
0.0035616977576250243
Loss for critic is : 0.006545

Loss for critic is : 0.006854988168925047
avg_reward
-0.03349229451023525
Loss for critic is : 0.0064751270692795515
avg_reward
-0.03229468228770448
Loss for critic is : 0.006625582464039326
avg_reward
-0.033037596295960446
Loss for critic is : 0.00704634259454906
avg_reward
-0.03919193675841556
Loss for critic is : 0.006792833562940359
avg_reward
-0.035234249331280505
Loss for critic is : 0.006444669794291258
avg_reward
-0.033557499308018574
Loss for critic is : 0.007421624846756458
avg_reward
-0.04942856251973198
Loss for critic is : 0.006720221601426601
avg_reward
-0.05282011235173565
Loss for critic is : 0.006978948134928942
avg_reward
-0.05640364762062775
Loss for critic is : 0.006607254734262824
avg_reward
-0.05364482454227069
Loss for critic is : 0.007367843762040138
avg_reward
-0.06617904147306339
Loss for critic is : 0.00661656423471868
avg_reward
-0.06396608398012425
Loss for critic is : 0.006890397518873215
avg_reward
-0.06345523919512622
Loss for critic is : 0.0066890746820

Loss for critic is : 0.006435262272134423
avg_reward
-0.045270323089649955
Loss for critic is : 0.00651092454791069
avg_reward
-0.038220355449498686
Loss for critic is : 0.007222120882943273
avg_reward
-0.0431957746392798
Loss for critic is : 0.006289399694651365
avg_reward
-0.04503799484255266
Loss for critic is : 0.006505668396130204
avg_reward
-0.05089299421942464
Loss for critic is : 0.005990057485178113
avg_reward
-0.061782512405732785
Loss for critic is : 0.0065094465389847755
avg_reward
-0.05170183601208235
Loss for critic is : 0.0062196445651352406
avg_reward
-0.046970092867592614
Loss for critic is : 0.006566133350133896
avg_reward
-0.05301397085742723
Loss for critic is : 0.0068305740132927895
avg_reward
-0.058560183390827115
Loss for critic is : 0.006121453596279025
avg_reward
-0.059728875672398
Loss for critic is : 0.0063999309204518795
avg_reward
-0.04781118301125774
Loss for critic is : 0.0062768300995230675
avg_reward
-0.03784507799175468
Loss for critic is : 0.006411126

Loss for critic is : 0.006675058277323842
avg_reward
-0.03229322231087727
Loss for critic is : 0.006276193540543318
avg_reward
-0.04254205803733267
Loss for critic is : 0.0062409944366663694
avg_reward
-0.03404237927188418
Loss for critic is : 0.006387344095855951
avg_reward
-0.05322245798699315
Loss for critic is : 0.006087437272071838
avg_reward
-0.053180959920849626
Loss for critic is : 0.006516653811559081
avg_reward
-0.035240199548605314
Loss for critic is : 0.006825482007116079
avg_reward
-0.03381586268942886
Loss for critic is : 0.006246026838198304
avg_reward
-0.03367569038450568
Loss for critic is : 0.006662921514362097
avg_reward
-0.029262874789225367
Loss for critic is : 0.006453884765505791
avg_reward
-0.05006508789232067
Loss for critic is : 0.006800261326134205
avg_reward
-0.06733043458310814
Loss for critic is : 0.006637001875787973
avg_reward
-0.04400506777936221
Loss for critic is : 0.007186684524640441
avg_reward
-0.049725963220786866
Loss for critic is : 0.0066472082

Loss for critic is : 0.006653130985796452
avg_reward
-0.015433814828430398
Loss for critic is : 0.006624201079830527
avg_reward
0.00038557399879618134
Loss for critic is : 0.006820466136559844
avg_reward
-0.021275336391776846
Loss for critic is : 0.006895008496940136
avg_reward
-0.02822365451721915
Loss for critic is : 0.006906353170052171
Episode : 86 start 
avg_reward
-0.011969293548000743
Loss for critic is : 0.00666490220464766
avg_reward
-0.022665755290472836
Loss for critic is : 0.007087781559675932
avg_reward
-0.023143562770110065
Loss for critic is : 0.0068002198822796345
avg_reward
-0.02296878570491829
Loss for critic is : 0.006590557750314474
avg_reward
-0.024067955387360056
Loss for critic is : 0.007365488680079579
avg_reward
-0.02462950948434464
Loss for critic is : 0.006413731491193175
avg_reward
-0.026378629629754298
Loss for critic is : 0.006721310317516327
avg_reward
-0.02815143216348019
Loss for critic is : 0.007210042793303728
avg_reward
-0.03207562331356089
Loss for 

Loss for critic is : 0.007655846420675516
avg_reward
-0.039106860830148214
Loss for critic is : 0.0068920080084353685
avg_reward
-0.04130171775653435
Loss for critic is : 0.00675855646841228
avg_reward
-0.041948243084081155
Loss for critic is : 0.006929760333150625
avg_reward
-0.029944393070384313
Loss for critic is : 0.006991965230554342
avg_reward
-0.0006237582542414754
Loss for critic is : 0.007068810053169727
avg_reward
0.001031397267638583
Loss for critic is : 0.006787796970456839
avg_reward
0.021568114792191263
Loss for critic is : 0.006575314328074455
avg_reward
0.008985144725781827
Loss for critic is : 0.006379140540957451
avg_reward
0.00840576259530263
Loss for critic is : 0.006916555110365152
avg_reward
-0.0029286380960984962
Loss for critic is : 0.006489441730082035
avg_reward
-0.004887035359186375
Loss for critic is : 0.007161647314205766
avg_reward
-0.015419657541644483
Loss for critic is : 0.006353548727929592
avg_reward
-0.02121958668270228
Loss for critic is : 0.0068370

Loss for critic is : 0.0066931894980371
avg_reward
0.006242915300922174
Loss for critic is : 0.006901792949065566
avg_reward
0.00030880264576153317
Loss for critic is : 0.006670370465144515
avg_reward
-0.002917424853685048
Loss for critic is : 0.006494760746136308
avg_reward
-0.004211817556822318
Loss for critic is : 0.006383112631738186
avg_reward
-0.018314392369076875
Loss for critic is : 0.00643162103369832
avg_reward
-0.018321422363515874
Loss for critic is : 0.007286226376891136
avg_reward
-0.016458276613417634
Loss for critic is : 0.007104166550561786
avg_reward
-0.019898530135985013
Loss for critic is : 0.006557598477229476
avg_reward
-0.024118096173076253
Loss for critic is : 0.006631038151681423
avg_reward
-0.025884081770013813
Loss for critic is : 0.006469036918133497
avg_reward
-0.01261326098837326
Loss for critic is : 0.006724263308569789
avg_reward
-0.025351433239912943
Loss for critic is : 0.006279038265347481
avg_reward
-0.021726617332906553
Loss for critic is : 0.006992

Loss for critic is : 0.007039838004857302
avg_reward
-0.04316607300767598
Loss for critic is : 0.006944266613572836
avg_reward
-0.04004495894074493
Loss for critic is : 0.006937071913853288
avg_reward
-0.0369558512756266
Loss for critic is : 0.007447775918990374
avg_reward
-0.01552989948877604
Loss for critic is : 0.007188368123024702
avg_reward
0.002517302084751205
Loss for critic is : 0.007255802163854241
avg_reward
-0.003621447242694441
Loss for critic is : 0.0073032379150390625
avg_reward
-0.006795924401114747
Loss for critic is : 0.007456188090145588
avg_reward
-0.007025083239273804
Loss for critic is : 0.008326407987624407
avg_reward
-0.0524463028713221
Loss for critic is : 0.009465677663683891
avg_reward
-0.047162659741840904
Loss for critic is : 0.008486235048621893
avg_reward
-0.09707166169806977
Loss for critic is : 0.0084635722450912
avg_reward
-0.0933274576124899
Loss for critic is : 0.00855909613892436
avg_reward
-0.07697916254679785
Loss for critic is : 0.0082443375140428

Loss for critic is : 0.00705460668541491
avg_reward
-0.02399190419116073
Loss for critic is : 0.007242802530527115
avg_reward
-0.01436631027472874
Loss for critic is : 0.006731181405484676
avg_reward
-0.01090178363114011
Loss for critic is : 0.007081233197823167
avg_reward
-0.018181440556967854
Loss for critic is : 0.0072440956719219685
avg_reward
-0.01784224224161514
Loss for critic is : 0.006579709472134709
avg_reward
-0.026336730862366903
Loss for critic is : 0.0070472408551722765
avg_reward
-0.026866618214490816
Loss for critic is : 0.006729460787028074
avg_reward
-0.02442684967002149
Loss for critic is : 0.00698378193192184
avg_reward
-0.025909568874490967
Loss for critic is : 0.006685126340016723
avg_reward
-0.02586203147809734
Loss for critic is : 0.007000763900578022
avg_reward
-0.02658941950266716
Loss for critic is : 0.00696819182485342
avg_reward
-0.025143268669562676
Loss for critic is : 0.007303352002054453
avg_reward
-0.01984296252369121
Loss for critic is : 0.00659409654

Loss for critic is : 0.007547879358753562
avg_reward
-0.005362909849133278
Loss for critic is : 0.007271744776517153
avg_reward
-0.0068847111524734175
Loss for critic is : 0.006946013309061527
avg_reward
-0.012327346844363395
Loss for critic is : 0.0070221025962382555
avg_reward
-0.01182640319372655
Loss for critic is : 0.007457401370629668
avg_reward
-0.01854756304264063
Loss for critic is : 0.006964586907997727
avg_reward
-0.028739798058723807
Loss for critic is : 0.006877651670947671
avg_reward
-0.026627023206688364
Loss for critic is : 0.0067757852375507355
avg_reward
-0.02304599324243505
Loss for critic is : 0.006969092646613717
avg_reward
-0.01793016318907903
Loss for critic is : 0.0065996283665299416
avg_reward
-0.010457011805821653
Loss for critic is : 0.006359557854011655
avg_reward
0.0063502871955643265
Loss for critic is : 0.006793904816731811
avg_reward
0.004127073752710837
Loss for critic is : 0.006573377875611186
avg_reward
0.004020158424707813
Loss for critic is : 0.0065

Loss for critic is : 0.006605585338547826
avg_reward
-0.015436126333165623
Loss for critic is : 0.007188737392425537
avg_reward
-0.013693460303587392
Loss for critic is : 0.007134323939681053
avg_reward
-0.01065108529537927
Loss for critic is : 0.0069016930647194386
avg_reward
-0.011326617976156876
Loss for critic is : 0.008504926227033138
avg_reward
-0.011223256406485188
Loss for critic is : 0.007348149083554745
avg_reward
-0.00810090265668826
Loss for critic is : 0.006741008255630732
avg_reward
-0.009362977294504441
Loss for critic is : 0.006732092006132007
avg_reward
-0.00980614999905136
Loss for critic is : 0.006834752857685089
avg_reward
-0.009885138418651494
Loss for critic is : 0.006825434509664774
avg_reward
-0.00774726209083886
Loss for critic is : 0.006858787499368191
avg_reward
-0.012308057150201668
Loss for critic is : 0.006856744177639484
avg_reward
-0.004204647937911827
Loss for critic is : 0.007166407769545913
avg_reward
-0.016900159989794727
Loss for critic is : 0.00670

Loss for critic is : 0.00605745567008853
avg_reward
-0.007736021583994553
Loss for critic is : 0.006354507757350802
avg_reward
-0.0055568824776974345
Loss for critic is : 0.00704241287894547
avg_reward
-0.008719636783698519
Loss for critic is : 0.007364446530118585
avg_reward
-0.009216865652084912
Loss for critic is : 0.007026933832094073
avg_reward
-0.008677427779939592
Loss for critic is : 0.006360460072755814
avg_reward
-0.012796777177373164
Loss for critic is : 0.006910207914188504
avg_reward
-0.018320601458845346
Loss for critic is : 0.006406667875126004
avg_reward
-0.021724493733600383
Loss for critic is : 0.006562546361237764
avg_reward
-0.01207431664104204
Loss for critic is : 0.006403915584087372
avg_reward
-0.021816822705411843
Loss for critic is : 0.006173992529511452
avg_reward
-0.016533741221486417
Loss for critic is : 0.006997698452323675
avg_reward
-0.02239892942463964
Loss for critic is : 0.0066125772427767515
avg_reward
-0.025922447826566112
Loss for critic is : 0.0069

Loss for critic is : 0.00743933510966599
avg_reward
-0.022811507158957342
Loss for critic is : 0.007261793129146099
avg_reward
-0.01884173382214926
Loss for critic is : 0.006193871144205332
avg_reward
-0.01528164214302519
Loss for critic is : 0.006843318231403828
avg_reward
-0.004486323203129933
Loss for critic is : 0.00667918031103909
avg_reward
0.0015159436045334828
Loss for critic is : 0.0062000686302781105
avg_reward
-0.000740432874619593
Loss for critic is : 0.006794050103053451
avg_reward
0.005191132029784353
Loss for critic is : 0.006863695569336414
avg_reward
0.0013690108306767847
Loss for critic is : 0.00645719631575048
avg_reward
-0.002045489155416703
Loss for critic is : 0.007415530038997531
avg_reward
-0.004952117687199383
Loss for critic is : 0.006948264315724373
avg_reward
-0.005415881397997625
Loss for critic is : 0.007464970462024212
avg_reward
-0.009083658560008602
Loss for critic is : 0.0060857669450342655
avg_reward
-0.013988086235617734
Loss for critic is : 0.006658

Loss for critic is : 0.006559828529134393
avg_reward
-0.009484928874214897
Loss for critic is : 0.006451404886320233
avg_reward
-0.012806728152218717
Loss for critic is : 0.006552281090989709
avg_reward
-0.0015240583304852728
Loss for critic is : 0.006458798656240106
avg_reward
-0.004588196815791385
Loss for critic is : 0.006684169638901949
avg_reward
-0.0011252894451840544
Loss for critic is : 0.006022643763571978
avg_reward
0.0029284660866246513
Loss for critic is : 0.007001424906775355
avg_reward
-0.004432477000040095
Loss for critic is : 0.007000093348324299
avg_reward
-0.0084670254600618
Loss for critic is : 0.006265423726290464
avg_reward
-0.010578751558664435
Loss for critic is : 0.0063243519980460405
avg_reward
-0.013108840536193636
Loss for critic is : 0.006799622206017375
avg_reward
-0.017510055923152437
Loss for critic is : 0.006998682161793113
avg_reward
-0.004422435656137626
Loss for critic is : 0.006264857482165098
avg_reward
-0.01570890241947107
Loss for critic is : 0.00

Loss for critic is : 0.007015163544565439
avg_reward
-0.014592979449461706
Loss for critic is : 0.0067328400909900665
avg_reward
-0.015103181682265694
Loss for critic is : 0.006734557216987014
avg_reward
-0.013474785275464269
Loss for critic is : 0.006808114005252719
avg_reward
-0.012981897662039067
Loss for critic is : 0.00600661407224834
avg_reward
-0.015796907691548377
Loss for critic is : 0.006358656566590071
Episode : 97 start 
avg_reward
-0.009382641791909622
Loss for critic is : 0.006353959906846285
avg_reward
-0.016492674564702884
Loss for critic is : 0.0062662409618496895
avg_reward
-0.02255421710389071
Loss for critic is : 0.006559888366609812
avg_reward
-0.015708931580241996
Loss for critic is : 0.0059806841891258955
avg_reward
-0.025779322658775108
Loss for critic is : 0.006019810913130641
avg_reward
-0.016916737792764862
Loss for critic is : 0.006704902276396751
avg_reward
-0.017633746674137765
Loss for critic is : 0.007206027861684561
avg_reward
-0.01524812979625659
Loss 

Loss for critic is : 0.006437073461711407
avg_reward
-0.018964083163306382
Loss for critic is : 0.006429280852898955
avg_reward
-0.02116705814049284
Loss for critic is : 0.00626359460875392
avg_reward
-0.01570193599969212
Loss for critic is : 0.006736418465152383
avg_reward
-0.017105144377115536
Loss for critic is : 0.006534544052556157
avg_reward
-0.01736511361559607
Loss for critic is : 0.006108832778409123
avg_reward
-0.015298779086527617
Loss for critic is : 0.006483662640675902
avg_reward
-0.02069177144824462
Loss for critic is : 0.006580141838639975
avg_reward
-0.02055921353709652
Loss for critic is : 0.0064267488196492195
avg_reward
-0.021699627261015597
Loss for critic is : 0.0073451693169772625
avg_reward
-0.019394096722625116
Loss for critic is : 0.007022356381639838
avg_reward
-0.02604631089787393
Loss for critic is : 0.006489879917353392
avg_reward
-0.029584606472489103
Loss for critic is : 0.006393552292138338
avg_reward
-0.026976045691340836
Loss for critic is : 0.0065491

Loss for critic is : 0.006294812774285674
avg_reward
-0.016889509179747336
Loss for critic is : 0.006884399801492691
avg_reward
-0.011650853906767704
Loss for critic is : 0.006390666589140892
avg_reward
-0.01487493242450872
Loss for critic is : 0.006573175545781851
avg_reward
-0.01785736961170696
Loss for critic is : 0.00652065291069448
avg_reward
-0.019282219902876423
Loss for critic is : 0.006317767547443509
avg_reward
-0.018115705664681153
Loss for critic is : 0.006289559183642268
avg_reward
-0.02293884547161927
Loss for critic is : 0.006489145802333951
avg_reward
-0.024840865119737956
Loss for critic is : 0.006111315917223692
avg_reward
-0.022140484460841246
Loss for critic is : 0.006167807849124074
avg_reward
-0.020067904736371617
Loss for critic is : 0.007014100672677159
avg_reward
-0.014400354438833151
Loss for critic is : 0.006812560837715864
avg_reward
-0.012061659523808891
Loss for critic is : 0.00664070644415915
avg_reward
-0.015131680053767112
Loss for critic is : 0.0063543

Loss for critic is : 0.0062292448710650206
avg_reward
-0.009385115854873026
Loss for critic is : 0.006897986866533756
avg_reward
-0.01349972162864866
Loss for critic is : 0.00673944759182632
avg_reward
-0.009438909192607542
Loss for critic is : 0.006653988733887672
avg_reward
-0.005390814397212947
Loss for critic is : 0.0065273328218609095
avg_reward
-0.004421416552573738
Loss for critic is : 0.006690433248877525
avg_reward
-0.0026174394325264166
Loss for critic is : 0.0066118426620960236
avg_reward
0.0014184606899632062
Loss for critic is : 0.006788827478885651
avg_reward
-0.004522403106328872
Loss for critic is : 0.006915516685694456
avg_reward
-0.0010783049805140645
Loss for critic is : 0.006548123434185982
avg_reward
-0.003157646461280448
Loss for critic is : 0.007448915392160416
avg_reward
-0.009141338999755742
Loss for critic is : 0.007046971935778856
avg_reward
-0.007582708380529933
Loss for critic is : 0.006662801373749971
avg_reward
-0.0030264553986153395
Loss for critic is : 

KeyboardInterrupt: 