In [1]:
import tensorflow as tf
import os 
import numpy as np
import pandas as pd
from model import CNN_Model,Params
from Env import Env

  from ._conv import register_converters as _register_converters


In [2]:
class experience_replay_buffer:
#     def __init__(self,size,dtypes):
#         self.column_names=['state','action','next_state','reward','done']
#         self.buffer={self.column_names[i]:np.empty(size,dtype=dtypes[i]) for i in np.arange(len(self.column_names)) }
#         self.num_items=0
#         self.capacity=size
    def __init__(self,buffer_len,sample):
        self.column_names=['state','action','next_state','reward','done']
        self.buffer={col_name:np.empty(shape=[buffer_len,*np.array(item).shape],dtype=np.array(item).dtype) for col_name,item in zip(self.column_names,sample) }
        self.num_items=0
        self.capacity=buffer_len
    def add_experience(self,state,action,next_state,reward,done):
        ind=self.num_items
        if self.num_items<self.capacity:
            self.num_items+=1
        else:
            ind=np.random.randint(low=0,high=self.capacity,size=1,dtype=np.int32)
            
        self.buffer['state'][ind]=state
        self.buffer['action'][ind]=action
        self.buffer['next_state'][ind]=next_state
        self.buffer['reward'][ind]=reward
        self.buffer['done'][ind]=done
           
    
    def get_batch(self,batch_size):
        inds=np.random.randint(low=0,high=self.capacity,size=batch_size,dtype=np.int32)
        return self.buffer['state'][inds],self.buffer['action'][inds],self.buffer['next_state'][inds],self.buffer['reward'][inds],self.buffer['done'][inds]
    

In [3]:
class Q_Network(CNN_Model):
    def __init__(self,max_experience_buffer_len=120,param_dict={},restore_params=False,pickle_file_path=""):
        CNN_Model.__init__(self,param_dict,restore_params,pickle_file_path)
        self.max_experience_buffer_len=max_experience_buffer_len
        
    def form_loss(self,logits,targets):
        entropies=self.params.loss_fn(labels=targets,logits=logits)
        return entropies
        
    def Build_model(self):
        self.build_model_till_logits()
        with tf.variable_scope(self.params.name_scope):
            #logits are q values]
#             self.max_q_value_actions=tf.argmax(self.logits,axis=1) #value which has the highest q value
#             self.max_q_value_actions_one_hot=tf.one_hot(self.max_q_value_actions,depth=self.params.num_outputs)
#             self.max_q_values=self.logits*self.max_q_value_actions_one_hot
            self.max_q_value_actions=tf.squeeze(tf.argmax(self.logits,axis=1)) #value which has the highest q value
            self.max_q_values=tf.reduce_max(self.logits,axis=1)
            
#             print (self.max_q_values.shape)
            #placeholder for action at current timestep
#             self.one_hot_actions=self.form_placeholder((None,self.params.num_outputs),tf.float32) #actions are not one hot
#             self.one_hot_actions=tf.one_hot(indices=self.actions,depth=self.params.num_outputs)
#             q_vals=self.logits*self.one_hot_actions
            self.actions=self.form_placeholder((None),tf.int32)
            one_hot_actions=tf.one_hot(indices=self.actions,depth=self.params.num_outputs)
            q_vals=tf.reduce_sum(self.logits*one_hot_actions,axis=1)
            
            
            
            #placeholder for max next state q values,rewards and discount rate
#             self.max_q_values_next_state=self.form_placeholder((None,self.params.num_outputs),tf.float32)
            self.max_q_values_next_state=self.form_placeholder((None),tf.float32)
            self.rewards=self.form_placeholder((None),tf.float32)
            self.notended=self.form_placeholder((None),tf.float32)
            self.discount_rate=self.form_placeholder([],tf.float32)
            
            self.loss=tf.reduce_mean(tf.square(q_vals-(self.rewards+(self.discount_rate*self.max_q_values_next_state*self.notended))))
            #computing gradients 
            optimizer=self.params.optimizer_fn(learning_rate=self.lr_placeholder)
            self.grads_and_vars=optimizer.compute_gradients(loss=self.loss,var_list=self.model_trainable_variables)
            
            self.train_op=optimizer.apply_gradients(grads_and_vars=self.grads_and_vars,global_step=self.step_no)
            self.model_variables=tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.params.name_scope)
            self.saver=tf.train.Saver(var_list=self.model_variables)

            self.initializer=tf.global_variables_initializer()
    
    def add_to_experience_replay(self,state,action,next_state,reward,done):
        
             
#         experience=tuple((state,action,next_state,reward))
        if not hasattr(self,"experience_replay_buffer"):
            sample=[state,action,next_state,reward,done]
            self.experience_replay_buffer=experience_replay_buffer(buffer_len=self.max_experience_buffer_len,sample=sample)
        self.experience_replay_buffer.add_experience(state,action,next_state,reward,done)



    def train(self,sess,episodes,steps,epsilon,discount_rate,batch_size,env,save_dir,save_every_n_iter,log_every_n_iter,initialize=False,set_logging=True):
        if initialize:
            print ("Initializing.....\n")
            sess.run([self.initializer])
        if set_logging:
            print ("Setting up for Logging ...\n")
            log_dir,set_logging=self.create_log_directory_if_doesnt_exist(save_dir)
        if set_logging: #creating file handlers if dir cretaed or found in above statement
            print("Logging called but no code implemented")
#                 train_writer = tf.summary.FileWriter(os.path.join(log_dir,'train'), sess.graph)
#                 validation_writer = tf.summary.FileWriter(os.path.join(log_dir ,'validation'))
        print ("Retreiveing step no...\n")
        [iter_no]=sess.run([self.step_no]) 

        for episode in np.arange(episodes):
            state=env.reset()
            step=0
            for step in np.arange(steps):
#                 print ("step={}".format(step))
                #choosing action 
                action=-1 
                if (np.random.random(1)<epsilon):
                    action=np.random.randint(low=0,high=self.params.num_outputs,size=1,dtype=np.int32)
                else:
                    feed_dict={self.X:np.expand_dims(state,axis=0),self.lr_placeholder:self.params.learning_rate,self.training_mode:True}
                    action=sess.run([self.max_q_value_actions],feed_dict=feed_dict)
                    action=action[0]

                next_state,reward,done,info=env.step(action)
                
                self.add_to_experience_replay(state,action,next_state,reward,done)
                
                episode_has_finished=done
#                 if done:
# #                     next_state=np.nan
# #                     self.add_to_experience_replay(state,action,next_state,reward,done)
#                     break
#                 else:
#                     self.add_to_experience_replay(state,action,next_state,reward,doen)
                state=next_state

                if self.experience_replay_buffer.num_items>batch_size: #perform training if there are enough experiences
#                     print("buffer filled")
                    
                    
                    #performing training step
                    states,actions,next_states,rewards,dones=self.experience_replay_buffer.get_batch(batch_size=batch_size)
                    #done is true or flase depending on if the episode ended or not
#                   
#                     next_states[dones]=np.zeros(shape=self.params.input_shape)
#                     next_states[nan_inds]=np.zeros(shape=self.params.input_shape[1:],dtype=np.float32)

                    #finding vals of next states
#                     print (next_states.shape)
                    feed_dict={self.X:next_states,self.lr_placeholder:self.params.learning_rate,self.training_mode:True}
                    [max_q_vals_next_state]=sess.run([self.max_q_values],feed_dict=feed_dict)
#                     max_q_vals_next_state[nan_inds]=0

                    #calculating loss and running train op
#                     print("shapes\n")
#                     print(states.shape)
#                     print(actions.shape)
#                     print(max_q_vals_next_state.shape)
#                     print(rewards.shape)
#                     print(dones.shape)
#                     print(discount_rate)
                    feed_dict={self.X:states,self.actions:actions,self.max_q_values_next_state:max_q_vals_next_state,self.rewards:rewards,self.notended:((np.logical_not(dones)).astype(np.int32)),self.discount_rate:discount_rate,self.lr_placeholder:self.params.learning_rate,self.training_mode:True}
                    loss,_=sess.run([self.loss,self.train_op],feed_dict=feed_dict)
                    iter_no+=1
                    if (iter_no)%save_every_n_iter==0:
                        print("^^^^ saving model ^^^^ \n")
                        self.save_model(sess,save_dir,self.step_no)

                    print ("Trainaing Step:\t Iteration no={} Game Step ={} loss={} ".format(iter_no,step,loss))
                if episode_has_finished:
                    break

            print ("=======>Episode Length={} <=======\n".format(step))       

In [4]:
env=Env('SpaceInvaders-v0',convert_to_grayscale=True,crop=True,valid_Y=[20,-10],valid_X=[10,-10],resize=True,resize_Y=90,resize_X=70)
params={
    'input_shape':[None, *env.image_shape],
    'num_outputs':env.action_space,
    
    'layer_hierarchy':[
        {'layer_type':'conv_layer','kernel_size':5,'kernel_strides':1,'num_filters':8,'padding':'valid'},
        {'layer_type':'batch_normalization_layer'},
        {'layer_type':'activation_layer'},
        {'layer_type':'conv_layer','kernel_size':3,'kernel_strides':1,'num_filters':16,'padding':'valid'},
#         {'layer_type':'batch_normalization_layer'},
        {'layer_type':'activation_layer'},
        {'layer_type':'flattening_layer'},
        {'layer_type':'fc_layer','num_hidden_units':100},
#         {'layer_type':'batch_normalization_layer'},
        {'layer_type':'activation_layer'},
#         {'layer_type':'dropout_layer','dropout_probability':0.2},
        {'layer_type':'fc_layer','num_hidden_units':50},
        {'layer_type':'batch_normalization_layer'},
        {'layer_type':'activation_layer'}
#         {'layer_type':'dropout_layer','dropout_probability':0.2}
        
    ],
    'initializer_fn':tf.contrib.layers.variance_scaling_initializer,
    'activation_fn':tf.nn.relu,
#     'loss_fn':tf.nn.sparse_softmax_cross_entropy_with_logits, #carefull
    'learning_rate':0.001,
    'optimizer_fn':tf.train.AdamOptimizer,
    'logdir':'/tf_logs_rnn/run/',
    'name_scope':'q_network'
}
print (params['num_outputs'])



Resetting Environment...

6


In [6]:
n_episodes=50
max_steps=500
save_every_n_iter=10
log_every_n_iter=5
initialize=False
save_dir="deep_q_saves"
max_experience_buffer_len=360
epsilon=0.3
discount_rate=0.99
batch_size=120

tf.reset_default_graph()


    

model=""
with tf.Session() as sess:
    params['input_shape']
    if(not initialize):
        model=Q_Network(max_experience_buffer_len,params,restore_params=True,pickle_file_path="deep_q_saves/q_network/model_object.pkl")
        model.Build_model()
        model.restore_model(sess,save_dir)
        model.params.learning_rate=0.001
    else:
        model=Q_Network(max_experience_buffer_len,params,restore_params=False,pickle_file_path="deep_q_saves/q_network/model_object.pkl")
        model.Build_model()
    
    model.train(sess=sess,episodes=n_episodes,steps=max_steps,epsilon=epsilon,discount_rate=discount_rate,batch_size=batch_size,env=env,save_dir=save_dir,save_every_n_iter=save_every_n_iter,log_every_n_iter=log_every_n_iter,initialize=initialize,set_logging=True)

restoring path:D:\dino_game_current\AI_Plays_Dino_Game\deep_q_saves\q_network
INFO:tensorflow:Restoring parameters from D:\dino_game_current\AI_Plays_Dino_Game\deep_q_saves\q_network\model_weights.ckpt-2520
Setting up for Logging ...

Logging called but no code implemented
Retreiveing step no...

Resetting Environment...

Trainaing Step:	 Iteration no=2521.0 Game Step =120 loss=7430.36865234375 
Trainaing Step:	 Iteration no=2522.0 Game Step =121 loss=6892.43505859375 
Trainaing Step:	 Iteration no=2523.0 Game Step =122 loss=7166.556640625 
Trainaing Step:	 Iteration no=2524.0 Game Step =123 loss=6404.75537109375 
Trainaing Step:	 Iteration no=2525.0 Game Step =124 loss=6541.9287109375 
Trainaing Step:	 Iteration no=2526.0 Game Step =125 loss=5802.45947265625 
Trainaing Step:	 Iteration no=2527.0 Game Step =126 loss=5438.58837890625 
Trainaing Step:	 Iteration no=2528.0 Game Step =127 loss=5964.587890625 
Trainaing Step:	 Iteration no=2529.0 Game Step =128 loss=4242.28564453125 
^^^^ s

Trainaing Step:	 Iteration no=2623.0 Game Step =222 loss=4009.674560546875 
Trainaing Step:	 Iteration no=2624.0 Game Step =223 loss=3372.218017578125 
Trainaing Step:	 Iteration no=2625.0 Game Step =224 loss=4422.349609375 
Trainaing Step:	 Iteration no=2626.0 Game Step =225 loss=3932.60107421875 
Trainaing Step:	 Iteration no=2627.0 Game Step =226 loss=3730.5869140625 
Trainaing Step:	 Iteration no=2628.0 Game Step =227 loss=3597.158935546875 
Trainaing Step:	 Iteration no=2629.0 Game Step =228 loss=4207.31103515625 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=2630.0 Game Step =229 loss=4489.9150390625 
Trainaing Step:	 Iteration no=2631.0 Game Step =230 loss=3878.526611328125 
Trainaing Step:	 Iteration no=2632.0 Game Step =231 loss=4500.65283203125 
Trainaing Step:	 Iteration no=2633.0 Game Step =232 loss=3292.02294921875 
Trainaing Step:	 Iteration no=2634.0 Game Step =233 loss=4343.5615234375 
Trainaing Step:	 Iteration no=2635.0 Game Step =234 loss=3487.591552734375 


Trainaing Step:	 Iteration no=2729.0 Game Step =328 loss=986.3425903320312 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=2730.0 Game Step =329 loss=1393.4921875 
Trainaing Step:	 Iteration no=2731.0 Game Step =330 loss=1294.153076171875 
Trainaing Step:	 Iteration no=2732.0 Game Step =331 loss=2287.39111328125 
Trainaing Step:	 Iteration no=2733.0 Game Step =332 loss=2112.5400390625 
Trainaing Step:	 Iteration no=2734.0 Game Step =333 loss=2059.889892578125 
Trainaing Step:	 Iteration no=2735.0 Game Step =334 loss=2051.19287109375 
Trainaing Step:	 Iteration no=2736.0 Game Step =335 loss=1535.7308349609375 
Trainaing Step:	 Iteration no=2737.0 Game Step =336 loss=2211.89501953125 
Trainaing Step:	 Iteration no=2738.0 Game Step =337 loss=2170.178955078125 
Trainaing Step:	 Iteration no=2739.0 Game Step =338 loss=2352.434814453125 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=2740.0 Game Step =339 loss=2870.03857421875 
Trainaing Step:	 Iteration no=2741.0 Game Step =

Trainaing Step:	 Iteration no=2834.0 Game Step =433 loss=6.862826347351074 
Trainaing Step:	 Iteration no=2835.0 Game Step =434 loss=4.143389701843262 
Trainaing Step:	 Iteration no=2836.0 Game Step =435 loss=7.215899467468262 
Trainaing Step:	 Iteration no=2837.0 Game Step =436 loss=10.358907699584961 
Trainaing Step:	 Iteration no=2838.0 Game Step =437 loss=15.178112030029297 
Trainaing Step:	 Iteration no=2839.0 Game Step =438 loss=4.19166898727417 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=2840.0 Game Step =439 loss=8.484261512756348 
Trainaing Step:	 Iteration no=2841.0 Game Step =440 loss=6.136191368103027 
Trainaing Step:	 Iteration no=2842.0 Game Step =441 loss=9.01302433013916 
Trainaing Step:	 Iteration no=2843.0 Game Step =442 loss=9.873966217041016 
Trainaing Step:	 Iteration no=2844.0 Game Step =443 loss=9.476608276367188 
Trainaing Step:	 Iteration no=2845.0 Game Step =444 loss=6.921943187713623 
Trainaing Step:	 Iteration no=2846.0 Game Step =445 loss=7.8855

Trainaing Step:	 Iteration no=2938.0 Game Step =37 loss=5.990084171295166 
Trainaing Step:	 Iteration no=2939.0 Game Step =38 loss=7.384382247924805 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=2940.0 Game Step =39 loss=5.143544673919678 
Trainaing Step:	 Iteration no=2941.0 Game Step =40 loss=2.4597084522247314 
Trainaing Step:	 Iteration no=2942.0 Game Step =41 loss=8.24842643737793 
Trainaing Step:	 Iteration no=2943.0 Game Step =42 loss=4.9273786544799805 
Trainaing Step:	 Iteration no=2944.0 Game Step =43 loss=6.0941691398620605 
Trainaing Step:	 Iteration no=2945.0 Game Step =44 loss=3.600449562072754 
Trainaing Step:	 Iteration no=2946.0 Game Step =45 loss=3.9024293422698975 
Trainaing Step:	 Iteration no=2947.0 Game Step =46 loss=6.374361515045166 
Trainaing Step:	 Iteration no=2948.0 Game Step =47 loss=4.951531887054443 
Trainaing Step:	 Iteration no=2949.0 Game Step =48 loss=3.083198308944702 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=2950.0 Game Step 

Trainaing Step:	 Iteration no=3043.0 Game Step =142 loss=9.308666229248047 
Trainaing Step:	 Iteration no=3044.0 Game Step =143 loss=2.3102850914001465 
Trainaing Step:	 Iteration no=3045.0 Game Step =144 loss=6.11238431930542 
Trainaing Step:	 Iteration no=3046.0 Game Step =145 loss=12.109393119812012 
Trainaing Step:	 Iteration no=3047.0 Game Step =146 loss=2.9734768867492676 
Trainaing Step:	 Iteration no=3048.0 Game Step =147 loss=5.748318672180176 
Trainaing Step:	 Iteration no=3049.0 Game Step =148 loss=8.60272216796875 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3050.0 Game Step =149 loss=4.724087715148926 
Trainaing Step:	 Iteration no=3051.0 Game Step =150 loss=12.163336753845215 
Trainaing Step:	 Iteration no=3052.0 Game Step =151 loss=8.477714538574219 
Trainaing Step:	 Iteration no=3053.0 Game Step =152 loss=8.076003074645996 
Trainaing Step:	 Iteration no=3054.0 Game Step =153 loss=6.566088676452637 
Trainaing Step:	 Iteration no=3055.0 Game Step =154 loss=5.54

Trainaing Step:	 Iteration no=3148.0 Game Step =247 loss=19.25175666809082 
Trainaing Step:	 Iteration no=3149.0 Game Step =248 loss=11.6796293258667 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3150.0 Game Step =249 loss=6.512792110443115 
Trainaing Step:	 Iteration no=3151.0 Game Step =250 loss=9.590129852294922 
Trainaing Step:	 Iteration no=3152.0 Game Step =251 loss=2.2150068283081055 
Trainaing Step:	 Iteration no=3153.0 Game Step =252 loss=14.69023323059082 
Trainaing Step:	 Iteration no=3154.0 Game Step =253 loss=11.873412132263184 
Trainaing Step:	 Iteration no=3155.0 Game Step =254 loss=4.630378246307373 
Trainaing Step:	 Iteration no=3156.0 Game Step =255 loss=14.08268928527832 
Trainaing Step:	 Iteration no=3157.0 Game Step =256 loss=10.22546100616455 
Trainaing Step:	 Iteration no=3158.0 Game Step =257 loss=5.98724889755249 
Trainaing Step:	 Iteration no=3159.0 Game Step =258 loss=3.123265027999878 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3160.0 G

Trainaing Step:	 Iteration no=3252.0 Game Step =351 loss=2.581843852996826 
Trainaing Step:	 Iteration no=3253.0 Game Step =352 loss=2.8929781913757324 
Trainaing Step:	 Iteration no=3254.0 Game Step =353 loss=1.519008994102478 
Trainaing Step:	 Iteration no=3255.0 Game Step =354 loss=2.923457384109497 
Trainaing Step:	 Iteration no=3256.0 Game Step =355 loss=1.9872891902923584 
Trainaing Step:	 Iteration no=3257.0 Game Step =356 loss=2.002000331878662 
Trainaing Step:	 Iteration no=3258.0 Game Step =357 loss=3.320899724960327 
Trainaing Step:	 Iteration no=3259.0 Game Step =358 loss=3.7304656505584717 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3260.0 Game Step =359 loss=1.425536870956421 
Trainaing Step:	 Iteration no=3261.0 Game Step =360 loss=2.13024640083313 
Trainaing Step:	 Iteration no=3262.0 Game Step =361 loss=1.8295865058898926 
Trainaing Step:	 Iteration no=3263.0 Game Step =362 loss=2.724886655807495 
Trainaing Step:	 Iteration no=3264.0 Game Step =363 loss=1.7

Trainaing Step:	 Iteration no=3356.0 Game Step =455 loss=2.308049201965332 
Trainaing Step:	 Iteration no=3357.0 Game Step =456 loss=4.730136394500732 
Trainaing Step:	 Iteration no=3358.0 Game Step =457 loss=1.9261001348495483 
Trainaing Step:	 Iteration no=3359.0 Game Step =458 loss=3.3730289936065674 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3360.0 Game Step =459 loss=3.542480707168579 
Trainaing Step:	 Iteration no=3361.0 Game Step =460 loss=2.9790830612182617 
Trainaing Step:	 Iteration no=3362.0 Game Step =461 loss=1.5495169162750244 
Trainaing Step:	 Iteration no=3363.0 Game Step =462 loss=2.288194417953491 
Trainaing Step:	 Iteration no=3364.0 Game Step =463 loss=3.939110040664673 
Trainaing Step:	 Iteration no=3365.0 Game Step =464 loss=6.942704200744629 
Trainaing Step:	 Iteration no=3366.0 Game Step =465 loss=3.1087889671325684 
Trainaing Step:	 Iteration no=3367.0 Game Step =466 loss=6.472250461578369 
Trainaing Step:	 Iteration no=3368.0 Game Step =467 loss=1

^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3460.0 Game Step =59 loss=3.6169533729553223 
Trainaing Step:	 Iteration no=3461.0 Game Step =60 loss=4.9005279541015625 
Trainaing Step:	 Iteration no=3462.0 Game Step =61 loss=3.1511001586914062 
Trainaing Step:	 Iteration no=3463.0 Game Step =62 loss=2.2649223804473877 
Trainaing Step:	 Iteration no=3464.0 Game Step =63 loss=3.017514228820801 
Trainaing Step:	 Iteration no=3465.0 Game Step =64 loss=2.240206241607666 
Trainaing Step:	 Iteration no=3466.0 Game Step =65 loss=2.6563405990600586 
Trainaing Step:	 Iteration no=3467.0 Game Step =66 loss=1.0977293252944946 
Trainaing Step:	 Iteration no=3468.0 Game Step =67 loss=3.8650929927825928 
Trainaing Step:	 Iteration no=3469.0 Game Step =68 loss=5.427079200744629 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3470.0 Game Step =69 loss=2.669651985168457 
Trainaing Step:	 Iteration no=3471.0 Game Step =70 loss=3.0826807022094727 
Trainaing Step:	 Iteration no=3472.0 Game 

Trainaing Step:	 Iteration no=3564.0 Game Step =163 loss=8.948101997375488 
Trainaing Step:	 Iteration no=3565.0 Game Step =164 loss=2.6014788150787354 
Trainaing Step:	 Iteration no=3566.0 Game Step =165 loss=9.281671524047852 
Trainaing Step:	 Iteration no=3567.0 Game Step =166 loss=2.52873158454895 
Trainaing Step:	 Iteration no=3568.0 Game Step =167 loss=2.747140645980835 
Trainaing Step:	 Iteration no=3569.0 Game Step =168 loss=2.332892894744873 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3570.0 Game Step =169 loss=7.099794387817383 
Trainaing Step:	 Iteration no=3571.0 Game Step =170 loss=2.9004151821136475 
Trainaing Step:	 Iteration no=3572.0 Game Step =171 loss=1.7243298292160034 
Trainaing Step:	 Iteration no=3573.0 Game Step =172 loss=3.317838430404663 
Trainaing Step:	 Iteration no=3574.0 Game Step =173 loss=2.7695059776306152 
Trainaing Step:	 Iteration no=3575.0 Game Step =174 loss=1.483799695968628 
Trainaing Step:	 Iteration no=3576.0 Game Step =175 loss=6.4

Trainaing Step:	 Iteration no=3669.0 Game Step =268 loss=1.9542840719223022 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3670.0 Game Step =269 loss=2.5869638919830322 
Trainaing Step:	 Iteration no=3671.0 Game Step =270 loss=2.0771048069000244 
Trainaing Step:	 Iteration no=3672.0 Game Step =271 loss=1.72892427444458 
Trainaing Step:	 Iteration no=3673.0 Game Step =272 loss=1.5182794332504272 
Trainaing Step:	 Iteration no=3674.0 Game Step =273 loss=3.8442866802215576 
Trainaing Step:	 Iteration no=3675.0 Game Step =274 loss=1.1716151237487793 
Trainaing Step:	 Iteration no=3676.0 Game Step =275 loss=3.4489595890045166 
Trainaing Step:	 Iteration no=3677.0 Game Step =276 loss=1.9328786134719849 
Trainaing Step:	 Iteration no=3678.0 Game Step =277 loss=1.1462877988815308 
Trainaing Step:	 Iteration no=3679.0 Game Step =278 loss=2.063565254211426 
^^^^ saving model ^^^^ 

Trainaing Step:	 Iteration no=3680.0 Game Step =279 loss=2.4440789222717285 
Trainaing Step:	 Iteration no

KeyboardInterrupt: 

In [None]:
# # a=np.arange(6).reshape(3,2)
# # b=np.empty(6,type(a))
# # print (b)
# # b[1]=a
# # print (b.shape)
# # print(type(b))
# # np.isnan(np.array(b))
# a=np.array([[1,2],[3,4]])
# # a=np.array(a)
# print (1,*a.shape)
# print (a.dtype)

In [None]:
print (env.image_shape)