In [None]:
import numpy as np
import gym
import tensorflow as tf
import time
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, Input
import matplotlib.pyplot as plt
%matplotlib inline

# tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) # stop displaying warnings that mean nothing!!!

In [None]:
# !pip install autorom
# !autorom
%pip install -U gym>=0.21.0
%pip install -U gym[atari,accept-rom-license]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
env = gym.make("Asteroids-v0") # create the pong 'environment'

  


In [None]:
def prepro(I):
    # preprocess each frame for learning
    # save some memory and computation
    # pre-process the image from a 210x160x3 uint8 frame into an (80x80) float array 
    I = I[16:,:,:].copy() # crop the top of the image...score image doesn't matter for how to play
    I = I[::2,::2,0].copy()
    I[I == 144] = 0 # erase background (background type 1)
    I[I == 109] = 0 # erase background (background type 2)
    I[I != 0] = 1 # everything else (paddles, ball) just set to 1
    return np.array(I.copy())

In [None]:
def create_model(height,width,channels):

    imp = Input(shape=(height,width,channels))
    mid = Conv2D(16,(8,8),strides=4,activation='relu')(imp)
    mid = Conv2D(32,(4,4),strides=2,activation='relu')(mid)
    mid = Flatten()(mid)
    mid = Dense(256,activation='relu')(mid)
    out0 = Dense(1,activation='linear',name='out0')(mid)
    out1 = Dense(1,activation='linear',name='out1')(mid)
    out2 = Dense(1,activation='linear',name='out2')(mid)
    out3 = Dense(1,activation='linear',name='out3')(mid)
    out4 = Dense(1,activation='linear',name='out4')(mid)
    out5 = Dense(1,activation='linear',name='out5')(mid)
    out6 = Dense(1,activation='linear',name='out6')(mid)
    out7 = Dense(1,activation='linear',name='out7')(mid)
    out8 = Dense(1,activation='linear',name='out8')(mid)
    out9 = Dense(1,activation='linear',name='out9')(mid)
    out10 = Dense(1,activation='linear',name='out10')(mid)
    out11 = Dense(1,activation='linear',name='out11')(mid)
    out12 = Dense(1,activation='linear',name='out12')(mid)
    out13 = Dense(1,activation='linear',name='out13')(mid)   
    model = Model(imp,[out0,out1,out2,out3,out4,out5,out6,out7,out8,out9,out10,out11,out12,out13]) 
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss='mean_squared_error')
    
    return model

In [None]:
frames_to_net = 4              # how many previous frames will we feed the NN
possible_actions = [0,1,2,3,4,5,6,7,8,9,10,11,12,13]
# mod2 = create_model(97,80,frames_to_net)
# mod2.call = tf.function(mod2.call,experimental_relax_shapes=True)

# mod2.summary()

In [None]:
def play1game(model,ep):
    env0 = gym.make("Asteroids-v0")
    pix = env0.reset()
    pix = prepro(pix)
    frames_this_game = 0
    feed = np.zeros((1,97,80,frames_to_net))
    feed[0,:,:,0] = pix.copy()
    
    frame_array = []
    action_array = []
    reward_array = []
    
    score = 0
    done = False
    while not done:
        if np.random.random() < ep:
            action = np.random.choice(3)
        else:
            vf = mod2(feed,training=False)
            vf = [vf[0][0,0].numpy(),vf[1][0,0].numpy(),vf[2][0,0].numpy(),
                  vf[3][0,0].numpy(),vf[4][0,0].numpy(),vf[5][0,0].numpy(),
                  vf[6][0,0].numpy(),vf[7][0,0].numpy(),vf[8][0,0].numpy(),
                  vf[9][0,0].numpy(),vf[10][0,0].numpy(),vf[11][0,0].numpy(),
                  vf[12][0,0].numpy(),vf[13][0,0].numpy()]
            action = np.argmax(vf)
        action0 = possible_actions[action]
        pix_new, reward, done, info = env0.step(action0)
        frame_array.append(pix)
        action_array.append(action)
        reward_array.append(reward)
        if done == False:
          if reward != 0:
            reward_array.append(1)
          else:
            reward_array.append(reward)
        else:
          reward_array.append(-1)

        pix = prepro(pix_new)
        frames_this_game += 1

        for f in range(1,frames_to_net):
            feed[0,:,:,frames_to_net-f] = feed[0,:,:,frames_to_net-f-1].copy()
        feed[0,:,:,0] = pix.copy()
        score += reward
    return frame_array, action_array, reward_array, score

In [None]:
ngames = 10000
epsvec = np.linspace(0.135,0.05,ngames)
delt = 0.99
nbatch = 32

In [None]:
mod2 = tf.keras.models.load_model("/content/drive/MyDrive/Opti/model2")
mod2.call = tf.function(mod2.call, experimental_relax_shapes = True)

In [None]:
for game in range(ngames):
    start = time.time()
    frames, actions, rewards, score = play1game(mod2,epsvec[game])

    nframes = len(frames)
    current_frames = np.zeros((nframes,97,80,frames_to_net))
    future_frames = np.zeros((nframes,97,80,frames_to_net))
  
    for grab in range(nframes):
        for f in range(frames_to_net):
            if grab-f > 0:
                current_frames[grab,:,:,f] = frames[grab-f].copy()
            if (grab-f+1 > 0) & (grab-f+1 < (nframes-1)):
                future_frames[grab,:,:,f] = frames[grab-f+1].copy()
    target_vf = mod2.predict(future_frames)

    y0 = np.zeros((nframes,1))
    y1 = np.zeros((nframes,1))
    y2 = np.zeros((nframes,1))
    y3 = np.zeros((nframes,1))
    y4 = np.zeros((nframes,1))
    y5 = np.zeros((nframes,1))
    y6 = np.zeros((nframes,1))
    y7 = np.zeros((nframes,1))
    y8 = np.zeros((nframes,1))
    y9 = np.zeros((nframes,1))
    y10 = np.zeros((nframes,1))
    y11 = np.zeros((nframes,1))
    y12 = np.zeros((nframes,1))
    y13 = np.zeros((nframes,1))

    weight0 = np.zeros(nframes)
    weight1 = np.zeros(nframes)
    weight2 = np.zeros(nframes)
    weight3 = np.zeros(nframes)
    weight4 = np.zeros(nframes)
    weight5 = np.zeros(nframes)
    weight6 = np.zeros(nframes)
    weight7 = np.zeros(nframes)
    weight8 = np.zeros(nframes)
    weight9 = np.zeros(nframes)
    weight10 = np.zeros(nframes)
    weight11 = np.zeros(nframes)
    weight12 = np.zeros(nframes)
    weight13 = np.zeros(nframes)

  
    for grab in range(nframes):
        rhs = rewards[grab]
        if rhs == 0:
            rhs = delt*np.max([target_vf[0][grab],target_vf[1][grab],target_vf[2][grab],
                               target_vf[3][grab],target_vf[4][grab],target_vf[5][grab],
                               target_vf[6][grab],target_vf[7][grab],target_vf[8][grab],
                               target_vf[9][grab],target_vf[10][grab],target_vf[11][grab],
                               target_vf[12][grab],target_vf[13][grab]])
        if actions[grab] == 0:
            y0[grab,0] = rhs
            weight0[grab] = 1
        elif actions[grab] == 1:
            y1[grab,0] = rhs
            weight1[grab] = 1
        elif actions[grab] == 1:
            y2[grab,0] = rhs
            weight2[grab] = 1
        elif actions[grab] == 1:
            y3[grab,0] = rhs
            weight3[grab] = 1
        elif actions[grab] == 1:
            y4[grab,0] = rhs
            weight4[grab] = 1
        elif actions[grab] == 1:
            y5[grab,0] = rhs
            weight5[grab] = 1
        elif actions[grab] == 1:
            y6[grab,0] = rhs
            weight6[grab] = 1
        elif actions[grab] == 1:
            y7[grab,0] = rhs
            weight7[grab] = 1
        elif actions[grab] == 1:
            y8[grab,0] = rhs
            weight8[grab] = 1
        elif actions[grab] == 1:
            y9[grab,0] = rhs
            weight9[grab] = 1
        elif actions[grab] == 1:
            y10[grab,0] = rhs
            weight10[grab] = 1
        elif actions[grab] == 1:
            y11[grab,0] = rhs
            weight11[grab] = 1
        elif actions[grab] == 1:
            y12[grab,0] = rhs
            weight12[grab] = 1
        else:
            y13[grab,0] = rhs
            weight13[grab] = 1
  
    mod2.fit(current_frames,[y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13],
            epochs=1,batch_size=nbatch,verbose=0,
            sample_weight={'out0':weight0,'out1':weight1,'out2':weight2,
                           'out3':weight3,'out4':weight4,'out5':weight5,
                           'out6':weight6,'out7':weight7,'out8':weight8,
                           'out9':weight9,'out10':weight10,'out11':weight11,
                           'out12':weight12,'out13':weight13},use_multiprocessing=True)
    stop = time.time()
    if game % 50 == 0:
      mod2.save('/content/drive/MyDrive/Opti/model2')
      print("Games: ",game)
      print("Time: ", stop - start)
      print("Score: ", score)