In [41]:
import numpy as np
from IPython.display import clear_output
import tensorflow as tf
import time
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, Input, MaxPool2D
from tensorflow.keras import regularizers

In [2]:
tf.test.gpu_device_name()

''

# board structure
instead of using colors, we'll use +1 for one player and -1 for the other player.

In [3]:
def update_board(board_temp,color,column):
    # this is a function that takes the current board status, a color, and a column and outputs the new board status
    # columns 0 - 6 are for putting a checker on the board: if column is full just return the current board...this should be forbidden by the player
    # columns 7 - 13 are for pulling a checker off the board: this does not check if removing the checker is allowed...
    # 
    # the color input should be either 'plus' or 'minus'
    
    board = board_temp.copy()
    ncol = board.shape[1]
    nrow = board.shape[0]
    if column < ncol: # drop a checker on the board
        row = -1
        # start by assuming you can't add to the column
        # loop through the rows checking if you can go to each row or not
        for check in range(nrow):
            if (board[check,column]!=0):
                break # if this row is occupied, you're done
            else: # otherwise, you can go on this row!
                row += 1

        if row >= 0: # if you can add to the column
            if color == 'plus': # check the color
                board[row,column] = 1
            else:
                board[row,column] = -1
        return board
    else:
        column -= ncol
        if column >= ncol:
            return board # can't play anything bigger than 13...
        board[1:,column] = board[:-1,column].copy()
        board[0,column] = 0
        return board

In [4]:
board = np.zeros((6,7))
board = update_board(board,'plus',3)
print(board)
print('---------------------')
board = update_board(board,'minus',3)
print(board)
print('---------------------')
board = update_board(board,'plus',10)
print(board)

[[0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]]
---------------------
[[ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.]
 [ 0.  0.  0.  1.  0.  0.  0.]]
---------------------
[[ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  0. -1.  0.  0.  0.]]


In [5]:
def check_for_win(board):
    # this function checks to see if anyone has won on the given board
    nrow = board.shape[0]
    ncol = board.shape[1]
    winner = 'nobody'
    for row in range(nrow):
        for col in range(ncol):
            # check for vertical winners
            if row <= (nrow-4): # can't have a column go from rows 4-7...
                if np.sum(board[row:(row+4),col])==4:
                    winner = 'v-plus'
                    return winner
                elif np.sum(board[row:(row+4),col])==-4:
                    winner = 'v-minus'
                    return winner
            # check for horizontal winners
            if col <= (ncol-4):
                if np.sum(board[row,col:(col+4)])==4:
                    winner = 'h-plus'
                    return winner
                elif np.sum(board[row,col:(col+4)])==-4:
                    winner = 'h-minus'
                    return winner
            # check for top left to bottom right diagonal winners
            if (row <= (nrow-4)) and (col <= (ncol-4)):
                if np.sum(board[range(row,row+4),range(col,col+4)])==4:
                    winner = 'd-plus'
                    return winner
                elif np.sum(board[range(row,row+4),range(col,col+4)])==-4:
                    winner = 'd-minus'
                    return winner
            # check for top right to bottom left diagonal winners
            if (row <= (nrow-4)) and (col >= 3):
                if np.sum(board[range(row,row+4),range(col,col-4,-1)])==4:
                    winner = 'd-plus'
                    return winner
                elif np.sum(board[range(row,row+4),range(col,col-4,-1)])==-4:
                    winner = 'd-minus'
                    return winner
    return winner

In [6]:
def display_board(board):
    # this function displays the board as ascii using X for +1 and O for -1
    clear_output()
    horizontal_line = '-'*(7*5+8)
    blank_line = '|'+' '*5
    blank_line *= 7
    blank_line += '|'
    print(horizontal_line)
    for row in range(6):
        print(blank_line)
        this_line = '|'
        for col in range(7):
            if board[row,col] == 0:
                this_line += ' '*5 + '|'
            elif board[row,col] == 1:
                this_line += '  X  |'
            else:
                this_line += '  O  |'
        print(this_line)
        print(blank_line)
        print(horizontal_line)

            

In [7]:
[0,1,2][1]

1

In [8]:
def is_legal(board, player, move):
    player = {"plus": 1, "minus": -1}[player]
    if(move < 7):
        return board[0,move] == 0
    return board[-1, move%7] == player

def where_legal(board, player):
    # Return indices of where legal to move
    player = {"plus": 1, "minus": -1}[player]
    answer = np.zeros(14)
    answer[:7] = board[0] == 0
    answer[7:] = board[-1] == player
    return answer

In [9]:
# this is how you can play a game
winner = 'nobody'
board = np.zeros((6,7))
display_board(board)
player = 'plus'
while winner == 'nobody':
    display_board(board)
    legal_move = False
    while(not legal_move):
        move = input('Pick a move (0-13) for player '+player+': ')
        move = int(move)
        legal_move = is_legal(board, player, move)
    board = update_board(board,player,move)
    winner = check_for_win(board)
    if player == 'plus':
        player = 'minus'
    else:
        player = 'plus'
print('The winner is '+winner)
    
    

-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     | 

ValueError: invalid literal for int() with base 10: 'c'

In [None]:
where_legal(board, "plus")

In [10]:
def create_model(height,width,channels):
    # we cannot simply have 3 output nodes because we want to put a weight on each node's impact to the objective
    # that is different for each data point.  the only way to achieve this is to have 3 output layers, each having 1 node
    # the effect is the same, just the way TF/keras handles weights is different
    imp = Input(shape=(height,width,channels))
    mid = Conv2D(32, (2,2), activation='tanh')(imp)
    mid = Conv2D(64, (2,2), activation='tanh')(mid)
    mid = Conv2D(64, (2,2), activation='tanh')(mid)
    mid = Flatten()(mid)
    mid = Dense(128,activation='relu')(mid) # could also use a tanh activation function...all VFs are between -1 to 1
    mid = Dense(64,activation='relu')(mid) # could also use a tanh activation function...all VFs are between -1 to 1
    mid = Dense(64,activation='relu')(mid) # could also use a tanh activation function...all VFs are between -1 to 1
    out = Dense(14, activation='linear')(mid) # could also use a tanh activation function...all VFs are between -1 to 1
    model = Model(imp,out) 
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss='mean_squared_error')
    
    return model

In [11]:
def create_model2(height,width,channels):
    # we cannot simply have 3 output nodes because we want to put a weight on each node's impact to the objective
    # that is different for each data point.  the only way to achieve this is to have 3 output layers, each having 1 node
    # the effect is the same, just the way TF/keras handles weights is different
    imp = Input(shape=(height,width,channels))
    mid = Conv2D(64, (4,4), activation='relu')(imp)
    mid = Conv2D(64, (2,2), activation='relu')(mid)
    mid = Conv2D(64, (2,2), activation='relu')(mid)
    mid = Flatten()(mid)
    mid = Dense(100,activation='relu')(mid)
    mid = Dense(64,activation='relu')(mid) 
    out = Dense(14, activation='tanh')(mid) # could also use a tanh activation function...all VFs are between -1 to 1
    model = Model(imp,out) 
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss='mean_squared_error')
    
    return model

In [12]:
def create_model3(height,width,channels):
    # we cannot simply have 3 output nodes because we want to put a weight on each node's impact to the objective
    # that is different for each data point.  the only way to achieve this is to have 3 output layers, each having 1 node
    # the effect is the same, just the way TF/keras handles weights is different
    imp = Input(shape=(height,width,channels))
    mid = Conv2D(50, (4,4), activation='relu')(imp)
    mid = Conv2D(75, (2,2), activation='relu')(mid)
    mid = Conv2D(100, (2,2), activation='relu')(mid)
    mid = Flatten()(mid)
    mid = Dense(64,activation='relu')(mid)
    mid = Dense(64,activation='relu')(mid) 
    out = Dense(14, activation='tanh')(mid) # could also use a tanh activation function...all VFs are between -1 to 1
    model = Model(imp,out) 
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss='mean_squared_error')
    
    return model

In [44]:
def best_move(board, player):
    # Returns best move for a model
    mod = players[player]
    values = mod(np.expand_dims(board,[0,3]),training=False)
    legal_values = values + 2*where_legal(board, player)
    return np.argmax(legal_values[0])

def best_value(board, player):
    # Returns value for the best move of a model
    mod = players[player]
    values = mod(np.expand_dims(board,[0,3]),training=False)
    legal_values = values + 2*where_legal(board, player)
    return np.max(legal_values[0]) - 2

In [80]:
warmupframes = 10000
max_frames = 10000000+warmupframes
tot_frames = 0
anneal1 = 10000
anneal2 = 100000
batch_size = 32
buffn = 5000

len_buff = 0
ep0 = 0.95
ep1 = 0.1
ep2 = 0.05
ep = ep0
dep1 = (ep0-ep1)/anneal1
dep2 = (ep1-ep2)/anneal2
buffer = {"plus":[],
          "minus":[]}
buffer_nf = {"plus":[],
              "minus":[]}
buffer_ac = {"plus":[],
              "minus":[]}
buffer_rw = {"plus":[],
              "minus":[]}
mse_loss = tf.keras.losses.MeanSquaredError()

game_num = 0

In [56]:
frames_to_net = 1              # how many previous frames will we feed the NN
possible_actions = np.arange(14)
delt = 0.95
player1 = create_model_split(6,7,frames_to_net)
player1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss='mean_squared_error')
player1.call = tf.function(player1.call,experimental_relax_shapes=True,reduce_retracing=True)

player2 = create_model_split(6,7,frames_to_net)
player2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss='mean_squared_error')
player2.call = tf.function(player2.call,experimental_relax_shapes=True,reduce_retracing=True)

players = {"plus":player1, "minus":player2 }

player1.summary()

NameError: name 'create_model_split' is not defined

In [277]:
while tot_frames < max_frames:
    winner = 'nobody'
    board = np.zeros((6,7))
    player = 'plus'
    frames_this_game = 0
    st = time.time()
    game_num += 1
    feed = np.zeros((6,7,1))
    while winner == 'nobody':
        # Select which model ur looking at
        mod = players[player]
        
        # If random, make random move
        if np.random.random() < ep:
            possible_moves = where_legal(board,player).nonzero()[0]
            move = np.random.choice(possible_moves)
        # Otherwise, pick the best move
        else:
            was_legal = False
            vf = mod(np.array([feed]), training = False)
            vf = np.array(vf[0])
            move = np.argmax(vf)
            was_legal = is_legal(board, player, move)
            # Punish illegal moves
            while(not was_legal):
                # Guarantee illegal move not picked again
                vf[move] = -1
                move = np.argmax(vf)
                
                # Add illegal move to memory buffer, with reward -1
                #buffer[player].append(feed.copy())
                #buffer_nf[player].append(feed.copy())
                #buffer_ac[player].append(move)
                #buffer_rw[player].append(-1)
                
                was_legal = is_legal(board, player, move)
            
        # update frames
        buffer[player].append(feed.copy())
        buffer_nf[player].append(feed.copy())
        
        # Make moves
        board = update_board(board,player,move)
        feed[:,:,0] = board
        
        # Check for winner
        winner = check_for_win(board)
        
        # update new frames
        buffer_nf[player].append(feed.copy())
        buffer_ac[player].append(move)
        buffer_rw[player].append(1 if winner[2:] == player else -1 if winner != "nobody" else 0)
        
        #update framecount
        tot_frames += 1
        
        # update weights and biases
        if tot_frames > warmupframes:
            # anneal epsilon
            if ep > ep1:
                ep -= dep1
            elif ep > ep2:
                ep -= dep2
            for possible_player in ["plus", "minus"]:
                # Select which model ur looking at
                mod = players[possible_player]
                other_player = "plus" if possible_player == "minus" else "minus"
                opp_mod = players[other_player]
                # how many frames are there to choose from
                lbuff = len(buffer[possible_player])
                
                
                # Sample random frames
                which_choose = np.random.choice(lbuff,batch_size,replace=False)
                
                # Grab current frames
                current_frames = np.array(buffer[possible_player])[which_choose]
                next_frames = np.array(buffer_nf[possible_player])[which_choose]
                actions = np.zeros((batch_size,2))
                actions[:,0] = np.arange(batch_size)
                actions[:,1] = np.array(buffer_ac[possible_player])[which_choose]
                rewards = np.array(buffer_rw[possible_player])[which_choose]
                

                # Construct next frames
                # Find opponent's rewards for moves
                op_rewards = opp_mod(next_frames, training = False)
                
                # Check legal moves
                legal_moves = np.array([where_legal(next_frame,other_player) for next_frame in np.squeeze(next_frames,3)])
                
                # Force opponent to only pick from legal moves
                op_rewards = opp_mod(next_frames) + 2 * legal_moves
                
                # Find opponent's best move
                op_moves = tf.argmax(op_rewards, axis = 1)
                # Play opponent's moves
                next_frames = np.array([update_board(next_frames[i], other_player, op_moves[i]) for i in range(batch_size)])
                
                # Check for winner after opponent's moves
                pos_win = [check_for_win(frame)[2:] for frame in np.squeeze(next_frames,3)]
                win = np.array([-1 if won == other_player else 1 if won == possible_player else 0 for won in pos_win])
                
                # Update rewards for if opponent forces win
                rewards = rewards*(rewards != 0) + win*(rewards == 0)
                
                # Return rewards
                target_vals = mod(next_frames)
                # Make it so target only exists where legal moves exist
                where_legal_targets = np.array([where_legal(np.squeeze(next_frame), possible_player) for next_frame in next_frames])
                target_vals = np.array(target_vals) + where_legal_targets * 2
                target = tf.reduce_max(target_vals,axis = 1) - 2
                y = delt*target*(rewards==0) + rewards
                
                
                # gradienttape is pretty cool
                # if you ONLY do tf operations inside the tape, then TF can auto-differentiate!
                with tf.GradientTape() as tape:
                    # make prediction on x data
                    pred = tf.gather_nd(mod(current_frames,training=False),actions.astype('int32'))
                    # tf.gather_nd is the tf operation to evaluate only particular entries in the array, as specified by the actions list
                    loss = mse_loss(y,pred) # calculate mse of truth - prediction
                # calculate the gradient of the stuff inside the tape
                gradient = tape.gradient(loss,mod.trainable_variables)
                # do 1 step of sgd (with the optimizer specified above)
                mod.optimizer.apply_gradients(zip(gradient,mod.trainable_variables))
        frames_this_game += 1
        # swap player
        player = 'plus' if player == 'minus' else 'minus'
        
    # Check if buffer needs to be updated.
    for possible_player in ["plus", "minus"]:
        player_buffer = buffer[possible_player]
        lbuff = len(player_buffer)
        if lbuff > buffn:
            excess = lbuff - buffn
            buffer[possible_player] = player_buffer[excess:].copy()
            buffer_nf[possible_player] = buffer_nf[possible_player][excess:].copy()
            buffer_rw[possible_player] = buffer_rw[possible_player][excess:].copy()
            buffer_ac[possible_player] = buffer_ac[possible_player][excess:].copy()
            len_buff = len(player_buffer)
    print(game_num,winner,ep,tot_frames,time.time()-st)

InvalidArgumentError: Exception encountered when calling layer "model_8" "                 f"(type Functional).

Graph execution error:

Detected at node 'conv2d_19/Relu' defined at (most recent call last):
    File "C:\ProgramData\Anaconda3\lib\runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\ProgramData\Anaconda3\lib\runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\ProgramData\Anaconda3\lib\site-packages\traitlets\config\application.py", line 845, in launch_instance
      app.start()
    File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 612, in start
      self.io_loop.start()
    File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 149, in start
      self.asyncio_loop.run_forever()
    File "C:\ProgramData\Anaconda3\lib\asyncio\base_events.py", line 570, in run_forever
      self._run_once()
    File "C:\ProgramData\Anaconda3\lib\asyncio\base_events.py", line 1859, in _run_once
      handle._run()
    File "C:\ProgramData\Anaconda3\lib\asyncio\events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\ioloop.py", line 690, in <lambda>
      lambda f: self._run_callback(functools.partial(callback, future))
    File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
      ret = callback()
    File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\gen.py", line 787, in inner
      self.run()
    File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\gen.py", line 748, in run
      yielded = self.gen.send(value)
    File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
      yield gen.maybe_future(dispatch(*args))
    File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\gen.py", line 209, in wrapper
      yielded = next(result)
    File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 268, in dispatch_shell
      yield gen.maybe_future(handler(stream, idents, msg))
    File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\gen.py", line 209, in wrapper
      yielded = next(result)
    File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 543, in execute_request
      self.do_execute(
    File "C:\ProgramData\Anaconda3\lib\site-packages\tornado\gen.py", line 209, in wrapper
      yielded = next(result)
    File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 306, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\ProgramData\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2877, in run_cell
      result = self._run_cell(
    File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2923, in _run_cell
      return runner(coro)
    File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
      coro.send(None)
    File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3146, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3338, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 3418, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "<ipython-input-213-cc2cf45fb5f6>", line 1, in <module>
      play_game(player1,opposing_minus, use_filters_plus = False, use_filters_minus = True)
    File "<ipython-input-200-03e7d7861cd1>", line 13, in play_game
      vf = mod(np.expand_dims(board,[0,3]),training=False)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\training.py", line 557, in __call__
      return super().__call__(*args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\functional.py", line 510, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\functional.py", line 667, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\engine\base_layer.py", line 1097, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\utils\traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\layers\convolutional\base_conv.py", line 314, in call
      return self.activation(outputs)
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\activations.py", line 317, in relu
      return backend.relu(
    File "C:\ProgramData\Anaconda3\lib\site-packages\keras\backend.py", line 5366, in relu
      x = tf.nn.relu(x)
Node: 'conv2d_19/Relu'
input depth must be evenly divisible by filter depth: 1 vs 2
	 [[{{node conv2d_19/Relu}}]] [Op:__inference_call_352839810]

Call arguments received by layer "model_8" "                 f"(type Functional):
  • inputs=tf.Tensor(shape=(1, 6, 7, 1), dtype=float64)
  • training=False
  • mask=None

In [47]:
def create_split_model(height,width,channels):
    # we cannot simply have 3 output nodes because we want to put a weight on each node's impact to the objective
    # that is different for each data point.  the only way to achieve this is to have 3 output layers, each having 1 node
    # the effect is the same, just the way TF/keras handles weights is different
    imp = Input(shape=(height,width,channels))
    mid = Conv2D(16, (2,2), activation='relu')(imp)
    mid = Flatten()(mid)
    mid = Dense(128,activation='relu')(mid)
    mid = Dense(128,activation='relu')(mid) 
    mid = Dense(128,activation='relu')(mid) 
    out = Dense(14, activation='tanh')(mid) # could also use a tanh activation function...all VFs are between -1 to 1
    model = Model(imp,out) 
    return model

In [81]:
def create_split_model2(height,width,channels):
    # we cannot simply have 3 output nodes because we want to put a weight on each node's impact to the objective
    # that is different for each data point.  the only way to achieve this is to have 3 output layers, each having 1 node
    # the effect is the same, just the way TF/keras handles weights is different
    imp = Input(shape=(height,width,channels))
    mid = Conv2D(16, (2,2), activation='relu', kernel_regularizer=regularizers.L1(0.01))(imp)
    mid = Conv2D(32, (2,2), activation='relu', kernel_regularizer=regularizers.L1(0.01))(mid)
    mid = Conv2D(64, (2,2), activation='relu', kernel_regularizer=regularizers.L1(0.01))(mid)
    mid = Flatten()(mid)
    mid = Dropout(.2)(mid)
    mid = Dense(128,activation='relu')(mid)
    mid = Dense(128,activation='relu')(mid)
    out = Dense(14, activation='tanh')(mid) # could also use a tanh activation function...all VFs are between -1 to 1
    model = Model(imp,out) 
    return model

In [82]:

possible_actions = np.arange(14)
delt = 0.97
player1 = create_split_model2(6,7,2)
player1.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss='mean_squared_error')
player1.call = tf.function(player1.call,experimental_relax_shapes=True,reduce_retracing=True)

player2 = create_split_model2(6,7,2)
player2.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),loss='mean_squared_error')
player2.call = tf.function(player2.call,experimental_relax_shapes=True,reduce_retracing=True)

players = {"plus":player1, "minus":player2 }

player1.summary()

Model: "model_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 6, 7, 2)]         0         
                                                                 
 conv2d_19 (Conv2D)          (None, 5, 6, 16)          144       
                                                                 
 conv2d_20 (Conv2D)          (None, 4, 5, 32)          2080      
                                                                 
 conv2d_21 (Conv2D)          (None, 3, 4, 64)          8256      
                                                                 
 flatten_9 (Flatten)         (None, 768)               0         
                                                                 
 dropout_5 (Dropout)         (None, 768)               0         
                                                                 
 dense_28 (Dense)            (None, 128)               9843

In [78]:
def split_filters(board):
    three_filter_board  = np.zeros((6,7,2))
    three_filter_board[:,:,0] = board == -1
    three_filter_board[:,:,1] = board == 1
    return three_filter_board

In [83]:
ep2 = 0.1

In [283]:
# Identical to previous verision, but works for (6,7,2) input
while tot_frames < max_frames:
    winner = 'nobody'
    board = np.zeros((6,7))
    player = 'plus'
    frames_this_game = 0
    st = time.time()
    game_num += 1
    moves_this_game = 0
    feed = np.zeros((6,7,2))
    while winner == 'nobody':
        # Select which model ur looking at
        mod = players[player]
        
        # If random, make random move
        if np.random.random() < ep:
            possible_moves = where_legal(board,player).nonzero()[0]
            move = np.random.choice(possible_moves)
        # Otherwise, pick the best move
        else:
            was_legal = False
            vf = mod(np.array([feed]), training = False)
            vf = np.array(vf[0])
            move = np.argmax(vf)
            was_legal = is_legal(board, player, move)
            # Punish illegal moves
            while(not was_legal):
                # Guarantee illegal move not picked again
                vf[move] = -1
                move = np.argmax(vf)
                
                # Add illegal move to memory buffer, with reward -1
#                 buffer[player].append(feed.copy())
#                 n_moves[player].append(moves_this_game)
#                 buffer_nf[player].append(feed.copy())
#                 buffer_ac[player].append(move)
#                 buffer_rw[player].append(-1)
                
                was_legal = is_legal(board, player, move)
            
        # update frames
        buffer[player].append(feed.copy())
        n_moves[player].append(moves_this_game)
        
        # Make moves
        board = update_board(board,player,move)
        feed[:,:,:] = split_filters(board)
        moves_this_game += 1
        
        # Check for winner
        winner = check_for_win(board)
        
        # update new frames
        buffer_nf[player].append(feed.copy())
        buffer_ac[player].append(move)
        buffer_rw[player].append(1 if winner[2:] == player else -1 if winner != "nobody" else 0)
        
        #update framecount
        tot_frames += 1
        
        # update weights and biases
        if tot_frames > warmupframes:
            # anneal epsilon
            if ep > ep1:
                ep -= dep1
            elif ep > ep2:
                ep -= dep2
            for possible_player in ["plus", "minus"]:
                # Select which model ur looking at
                mod = players[possible_player]
                other_player = "plus" if possible_player == "minus" else "minus"
                opp_mod = players[other_player]
                # how many frames are there to choose from
                lbuff = len(buffer[possible_player])
                
                # Wait the chances of choosing each sample
                p = (np.array(n_moves[possible_player]) + 20)
                p = p/np.sum(p)
                # Sample random frames
                which_choose = np.random.choice(lbuff,batch_size,replace=False, p = p)
                
                # Grab current frames
                current_frames = np.array(buffer[possible_player])[which_choose]
                next_frames = np.array(buffer_nf[possible_player])[which_choose]
                next_frames_board = next_frames[:,:,:,1]-next_frames[:,:,:,0]
                actions = np.zeros((batch_size,2))
                actions[:,0] = np.arange(batch_size)
                actions[:,1] = np.array(buffer_ac[possible_player])[which_choose]
                rewards = np.array(buffer_rw[possible_player])[which_choose]
                

                # Construct next frames
                # Find opponent's rewards for moves
                op_rewards = opp_mod(next_frames, training = False)
                
                # Check legal moves
                legal_moves = np.array([where_legal(next_frame,other_player) for next_frame in next_frames_board])
                
                # Force opponent to only pick from legal moves
                op_rewards = opp_mod(next_frames) + 2 * legal_moves
                
                # Find opponent's best move
                op_moves = tf.argmax(op_rewards, axis = 1)
                # Play opponent's moves
                next_frames = np.array([update_board(next_frames_board[i], other_player, op_moves[i]) for i in range(batch_size)])
                
                # Check for winner after opponent's moves
                pos_win = [check_for_win(frame)[2:] for frame in next_frames]
                win = np.array([-1 if won == other_player else 1 if won == possible_player else 0 for won in pos_win])
                
                # Update rewards for if opponent forces win
                rewards = rewards*(rewards != 0) + win*(rewards == 0)
                
                # Fix formatting of next_frames
                next_frames = np.array([split_filters(next_frame) for next_frame in next_frames])
                
                # Return rewards
                target_vals = mod(next_frames)
                
                # Make it so target only exists where legal moves exist
                #where_legal_targets = np.array([where_legal(np.squeeze(next_frame), possible_player) for next_frame in next_frames_board])
                #target_vals = np.array(target_vals) + where_legal_targets * 2
                target = tf.reduce_max(target_vals,axis = 1)
                y = delt*target*(rewards==0) + rewards*1.1
                
                
                # gradienttape is pretty cool
                # if you ONLY do tf operations inside the tape, then TF can auto-differentiate!
                with tf.GradientTape() as tape:
                    # make prediction on x data
                    pred = tf.gather_nd(mod(current_frames,training=False),actions.astype('int32'))
                    # tf.gather_nd is the tf operation to evaluate only particular entries in the array, as specified by the actions list
                    loss = mse_loss(y,pred) # calculate mse of truth - prediction
                # calculate the gradient of the stuff inside the tape
                gradient = tape.gradient(loss,mod.trainable_variables)
                # do 1 step of sgd (with the optimizer specified above)
                mod.optimizer.apply_gradients(zip(gradient,mod.trainable_variables))
        frames_this_game += 1
        # swap player
        player = 'plus' if player == 'minus' else 'minus'
        
    # Check if buffer needs to be updated.
    for possible_player in ["plus", "minus"]:
        player_buffer = buffer[possible_player]
        lbuff = len(player_buffer)
        if lbuff > buffn:
            excess = lbuff - buffn
            buffer[possible_player] = player_buffer[excess:].copy()
            buffer_nf[possible_player] = buffer_nf[possible_player][excess:].copy()
            buffer_rw [possible_player] = buffer_rw[possible_player][excess:].copy()
            buffer_ac[possible_player] = buffer_ac[possible_player][excess:].copy()
            
            n_moves[possible_player] = n_moves[possible_player][excess:].copy()
            len_buff = len(player_buffer)
    print(game_num,winner,ep,tot_frames,time.time()-st)

11309 v-minus 0.0986944999999987 224142 4.023935317993164
11310 v-plus 0.09868599999999869 224159 5.464834690093994
11311 h-plus 0.09867549999999868 224180 6.864807367324829
11312 d-minus 0.09866549999999867 224200 6.659410715103149
11313 v-plus 0.09865699999999866 224217 5.617711544036865
11314 d-plus 0.09864549999999865 224240 7.704020261764526
11315 d-plus 0.09863199999999864 224267 8.755953550338745
11316 h-plus 0.09862749999999863 224276 2.898634672164917
11317 v-minus 0.09861849999999862 224294 5.948713541030884
11318 h-plus 0.09860949999999861 224312 5.645173072814941
11319 v-plus 0.09860299999999861 224325 4.270042657852173
11320 d-plus 0.0985914999999986 224348 7.311254262924194
11321 v-plus 0.09858099999999859 224369 6.738154649734497
11322 h-plus 0.09857149999999858 224388 6.269340991973877
11323 v-minus 0.09856349999999857 224404 5.137086629867554
11324 h-plus 0.09855299999999856 224425 7.009932994842529
11325 v-minus 0.09854699999999855 224437 3.9025461673736572
11326 d-mi

11450 h-plus 0.09731899999999732 226893 5.327517032623291
11451 v-minus 0.0972999999999973 226931 12.052744626998901
11452 d-minus 0.0972939999999973 226943 4.0416786670684814
11453 v-plus 0.09728449999999729 226962 6.067598104476929
11454 d-minus 0.09727749999999728 226976 4.772337198257446
11455 h-plus 0.09726949999999727 226992 5.11066460609436
11456 h-plus 0.09725599999999726 227019 8.777503252029419
11457 v-minus 0.09724599999999725 227039 6.374061107635498
11458 h-plus 0.09723549999999724 227060 6.564022064208984
11459 h-minus 0.09722849999999723 227074 4.629206895828247
11460 h-plus 0.09722099999999723 227089 4.735205411911011
11461 v-minus 0.09721199999999722 227107 5.7257239818573
11462 h-plus 0.09720349999999721 227124 5.363943099975586
11463 v-minus 0.0971974999999972 227136 3.7544126510620117
11464 h-minus 0.09718749999999719 227156 6.394442319869995
11465 v-plus 0.09717899999999718 227173 5.337374925613403
11466 d-plus 0.09716549999999717 227200 8.440043210983276
11467 h-p

11591 h-plus 0.09593799999999594 229655 6.583357095718384
11592 v-minus 0.09593299999999594 229665 3.1065099239349365
11593 h-minus 0.09592299999999593 229685 6.2291436195373535
11594 v-plus 0.09591049999999592 229710 7.771546125411987
11595 h-minus 0.09590249999999591 229726 4.995940446853638
11596 d-minus 0.0958904999999959 229750 7.607456207275391
11597 h-plus 0.09588299999999589 229765 4.6473774909973145
11598 d-plus 0.09587099999999588 229789 7.47223687171936
11599 h-plus 0.09586249999999587 229806 5.295980215072632
11600 v-minus 0.09585649999999586 229818 3.903548002243042
11601 h-plus 0.09584499999999585 229841 7.399169206619263
11602 v-plus 0.09583449999999584 229862 6.801931858062744
11603 h-plus 0.09583099999999584 229869 2.288097858428955
11604 v-minus 0.09581999999999583 229891 7.179627418518066
11605 v-minus 0.09581199999999582 229907 5.119643211364746
11606 h-minus 0.09580199999999581 229927 6.42252516746521
11607 h-minus 0.0957909999999958 229949 7.226770639419556
11608 

11732 h-plus 0.09468549999999469 232160 5.4959728717803955
11733 h-plus 0.09467999999999469 232171 3.4186313152313232
11734 v-plus 0.09466749999999467 232196 8.098928451538086
11735 d-minus 0.09465149999999466 232228 10.405846357345581
11736 h-plus 0.09464399999999465 232243 4.862723112106323
11737 v-plus 0.09463549999999464 232260 5.464966058731079
11738 h-plus 0.09462999999999464 232271 3.5813846588134766
11739 v-plus 0.09461849999999462 232294 7.397638559341431
11740 v-plus 0.09460499999999461 232321 8.755792617797852
11741 h-minus 0.0945929999999946 232345 7.756410837173462
11742 v-minus 0.09457899999999458 232373 9.037628889083862
11743 v-plus 0.09456849999999457 232394 6.818660497665405
11744 h-plus 0.09456099999999457 232409 4.802364349365234
11745 d-minus 0.09455299999999456 232425 5.123836517333984
11746 h-plus 0.09454349999999455 232444 6.042617559432983
11747 h-plus 0.09453299999999454 232465 6.79603910446167
11748 h-plus 0.09452349999999453 232484 6.1091859340667725
11749 h

11873 d-minus 0.09322149999999323 235088 8.357129335403442
11874 v-minus 0.09320649999999321 235118 9.913708686828613
11875 d-plus 0.0931899999999932 235151 10.62094759941101
11876 v-minus 0.09317899999999318 235173 6.936594247817993
11877 v-plus 0.09316749999999317 235196 7.378098249435425
11878 h-minus 0.09315799999999316 235215 6.131496906280518
11879 d-plus 0.09314749999999315 235236 6.786617994308472
11880 h-plus 0.09313699999999314 235257 6.868234395980835
11881 v-minus 0.09312899999999313 235273 5.201469898223877
11882 h-plus 0.09311849999999312 235294 6.824519872665405
11883 d-minus 0.09311249999999312 235306 3.955125331878662
11884 v-minus 0.09310449999999311 235322 5.078536748886108
11885 v-minus 0.0930934999999931 235344 7.035140037536621
11886 d-minus 0.09308199999999309 235367 7.3869147300720215
11887 h-plus 0.09307149999999308 235388 6.6724324226379395
11888 h-plus 0.09305899999999306 235413 8.068602323532104
11889 d-minus 0.09304499999999305 235441 9.353332042694092
1189

KeyboardInterrupt: 

In [281]:
ep = 0.1

In [144]:
def num_moves(board):
    return np.sum(board == 1) + np.sum(board == -1) 

display_board(board)
print(player_buffer[0].sum())

-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |  X  |  X  |  O  |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |  O  |  X  |  X  |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  O  |     |     |  O  |  O  |  X  |  O  |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  X  |     |  X  |  X  |  X  | 

In [163]:
n_moves = {'plus': [position.sum() for position in buffer["plus"]],
          'minus': [position.sum() for position in buffer["minus"]]}


In [152]:
n_moves[0] = "banana"
n_moves["plus"] = [position.sum() for position in player_buffer["plus"]]
n_moves["mius"] = [position.sum() for position in player_buffer["minus"]]
n_moves["plus"]

TypeError: list indices must be integers or slices, not str

In [110]:
buffn

10000

In [122]:
batch_size = 100

In [120]:
ep2 = 0.05


best_value(player, new_board)

In [284]:
player1.save("plus.h5")

In [285]:
player2.save("minus.h5")

In [38]:
player1 = tf.keras.models.load_model("player1_reg.h5")
player1.call = tf.function(player1.call,experimental_relax_shapes=True,reduce_retracing=True)
player2 = tf.keras.models.load_model("player2_final_finalish.h5")
player2.call = tf.function(player2.call,experimental_relax_shapes=True,reduce_retracing=True)
players = {"plus":player1, "minus":player2 }

In [None]:
def play_game(model1, model2, use_filters_plus = False, use_filters_minus = False, random_plus = False, random_minus = False, wait = False):
    # this is how you can play a game
    winner = 'nobody'
    board = np.zeros((6,7))
    display_board(board)
    player = 'plus'
    mod = model1
    while winner == 'nobody':
        if((player == 'plus' and use_filters_plus) or (player == 'minus' and use_filters_minus)):
            vf = mod(np.array([split_filters(board)]), training = False)
        else:
            print(player,use_filters_minus)
            vf = mod(np.expand_dims(board,[0,3]),training=False)
        vf = np.array(vf)
        if((player == 'plus' and random_plus) or (player == 'minus' and random_minus)):
            was_legal = False
            while(not was_legal):
                move = np.random.choice(np.arange(14), p = vf[0])
                was_legal = is_legal(board, player,move)
                vf[0,move] = 0
                vf = vf/vf.sum()
        else:
            move = move = np.argmax(vf + 2.1*where_legal(board,player))
        if(wait):
            time.sleep(2)
        board = update_board(board,player,move)
        display_board(board)
        print(player, move)
        print(vf)
        winner = check_for_win(board)
        if player == 'plus':
            player = 'minus'
            mod = model2
        else:
            player = 'plus'
            mod = model1
    print(winner)

In [30]:
def play_model(user, opposing_model, use_filters = False):
    # this is how you can play a game
    winner = 'nobody'
    board = np.zeros((6,7))
    display_board(board)
    player = 'plus'
    vf = 0
    while winner == 'nobody':
        if(user == player):
            move = int(input("what move:"))
        else:
            if(use_filters):
                vf = opposing_model(np.array([split_filters(board)]), training = False)
            else:
                vf = opposing_model(np.expand_dims(board,[0,3]),training=False)
            vf = np.array(vf)
            move = np.argmax(vf + 2.1*where_legal(board,player)) 
        board = update_board(board, player, move)
        display_board(board)
        if(user != player):
            print(move)
        winner = check_for_win(board)
        if player == 'plus':
            player = 'minus'
        else:
            player = 'plus'
    print(winner)

In [276]:
play_model("plus",player2, use_filters = True)

-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |  X  |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |  O  |  X  |  O  |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |  O  |  O  |  O  |  X  |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |  X  |  O  |  X  |  X  |  X  |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  O  |  O  |  X  |  O  |  X  | 

In [175]:
oldplayer1 = tf.keras.models.load_model("player1_final_finalish.h5")
oldplayer1.call = tf.function(oldplayer1.call,experimental_relax_shapes=True,reduce_retracing=True)
oldplayer2 = tf.keras.models.load_model("player2_final_finalish.h5")
oldplayer2.call = tf.function(oldplayer2.call,experimental_relax_shapes=True,reduce_retracing=True)

In [273]:
player1.summary()

Model: "model_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_10 (InputLayer)       [(None, 6, 7, 2)]         0         
                                                                 
 conv2d_19 (Conv2D)          (None, 5, 6, 16)          144       
                                                                 
 conv2d_20 (Conv2D)          (None, 4, 5, 32)          2080      
                                                                 
 conv2d_21 (Conv2D)          (None, 3, 4, 64)          8256      
                                                                 
 flatten_9 (Flatten)         (None, 768)               0         
                                                                 
 dropout_5 (Dropout)         (None, 768)               0         
                                                                 
 dense_28 (Dense)            (None, 128)               9843

In [177]:
play_game(player1, oldplayer2, use_filters_minus = True, use_filters_plus = True)

-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |  O  |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |  O  |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |     |  X  |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |  X  |  X  |  X  | 

In [293]:
play_game(player1,player2, use_filters_plus = True, use_filters_minus = True)

-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |  X  |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|     |     |     |  O  |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  O  |  O  |  O  |  O  |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  X  |  X  |  X  |  O  |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  X  |  X  |  O  |  X  |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  O  |  O  |  X  |  X  |     | 

In [93]:
def play_against_random(model, model_plays, wait = False, display = False, new_method = False):
    # this is how you can play a game
    winner = 'nobody'
    board = np.zeros((6,7))
    feed = np.zeros((1,6,7,1))
    if(display):
        display_board(board)
    player = 'plus'
    while winner == 'nobody':
        legal_move = False
        if(new_method):
            feed = np.array([split_filters(board)])
        else:    
            feed[0,:,:,0] = board
        if(wait):
            time.sleep(1)
        while(not legal_move):
            if(player == model_plays):
                vf = model(feed,training=False)
                vf = np.array(vf)
            else:
                vf = np.random.rand(1,14)
            while(not legal_move):
                move = np.argmax(vf)
                legal_move = is_legal(board, player, move)
                vf[:,move] = -100
        board = update_board(board,player,move)
        if(display):
            display_board(board)
        winner = check_for_win(board)
        if player == 'plus':
            player = 'minus'
        else:
            player = 'plus'
    return winner[2:] == model_plays

In [94]:
oldplayer2.summary()

Model: "model_57"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_58 (InputLayer)       [(None, 6, 7, 1)]         0         
                                                                 
 conv2d_171 (Conv2D)         (None, 3, 4, 50)          850       
                                                                 
 conv2d_172 (Conv2D)         (None, 2, 3, 75)          15075     
                                                                 
 conv2d_173 (Conv2D)         (None, 1, 2, 100)         30100     
                                                                 
 flatten_57 (Flatten)        (None, 200)               0         
                                                                 
 dense_171 (Dense)           (None, 64)                12864     
                                                                 
 dense_172 (Dense)           (None, 64)                416

In [286]:
np.mean([play_against_random(player2, 'minus', new_method = True) for i in range(1000)])

0.973

In [287]:
np.mean([play_against_random(player1, 'plus', new_method = True) for i in range(1000)])

0.989

In [288]:
player3 = tf.keras.models.load_model("player1_final_final_final.h5")
player3.call = tf.function(player3.call,experimental_relax_shapes=True,reduce_retracing=True)
player4 = tf.keras.models.load_model("player2_final_final_final.h5")
player4.call = tf.function(player3.call,experimental_relax_shapes=True,reduce_retracing=True)
players = {"plus":player1, "minus":player2 }

In [212]:
opposing_plus =  tf.keras.models.load_model("pgpopoutv2.h5", compile = False)
opposing_plus.call = tf.function(opposing_plus.call,experimental_relax_shapes=True,reduce_retracing=True)
opposing_minus =  tf.keras.models.load_model("pgpopoutv2_minus.h5", compile = False)
opposing_minus.call = tf.function(opposing_minus.call,experimental_relax_shapes=True,reduce_retracing=True)

In [292]:

play_game(player3,player2, use_filters_plus = True, use_filters_minus = True, wait = True)

-------------------------------------------
|     |     |     |     |     |     |     |
|  X  |     |     |     |     |     |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  X  |     |     |     |     |  O  |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  X  |     |     |     |     |  X  |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  X  |     |     |  O  |  X  |  X  |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  O  |     |     |  O  |  O  |  X  |     |
|     |     |     |     |     |     |     |
-------------------------------------------
|     |     |     |     |     |     |     |
|  X  |  O  |     |  O  |  X  | 