In [1]:
import numpy as np
import tensorflow as tf
import time
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, Input,MaxPool2D
from IPython.display import clear_output
from tensorflow.keras.models import load_model

# board structure
instead of using colors, we'll use +1 for one player and -1 for the other player.

In [2]:
def update_board(board_temp,color,column):
    # this is a function that takes the current board status, a color, and a column and outputs the new board status
    # columns 0 - 6 are for putting a checker on the board: if column is full just return the current board...this should be forbidden by the player
    # columns 7 - 13 are for pulling a checker off the board: this does not check if removing the checker is allowed...
    # 
    # the color input should be either 'plus' or 'minus'
    
    board = board_temp.copy()
    ncol = board.shape[1]
    nrow = board.shape[0]
    if column < ncol: # drop a checker on the board
        row = -1
        # start by assuming you can't add to the column
        # loop through the rows checking if you can go to each row or not
        for check in range(nrow):
            if (board[check,column]!=0):
                break # if this row is occupied, you're done
            else: # otherwise, you can go on this row!
                row += 1

        if row >= 0: # if you can add to the column
            if color == 'plus': # check the color
                board[row,column] = 1
            else:
                board[row,column] = -1
        return board
    else:
        column -= ncol
        if column >= ncol:
            return board # can't play anything bigger than 13...
        board[1:,column] = board[:-1,column].copy()
        return board

In [3]:
def check_for_win(board):
    # this function checks to see if anyone has won on the given board
    nrow = board.shape[0]
    ncol = board.shape[1]
    winner = 'nobody'
    for row in range(nrow):
        for col in range(ncol):
            # check for vertical winners
            if row <= (nrow-4): # can't have a column go from rows 4-7...
                if np.sum(board[row:(row+4),col])==4:
                    winner = 'v-plus'
                    return winner
                elif np.sum(board[row:(row+4),col])==-4:
                    winner = 'v-minus'
                    return winner
            # check for horizontal winners
            if col <= (ncol-4):
                if np.sum(board[row,col:(col+4)])==4:
                    winner = 'h-plus'
                    return winner
                elif np.sum(board[row,col:(col+4)])==-4:
                    winner = 'h-minus'
                    return winner
            # check for top left to bottom right diagonal winners
            if (row <= (nrow-4)) and (col <= (ncol-4)):
                if np.sum(board[range(row,row+4),range(col,col+4)])==4:
                    winner = 'd-plus'
                    return winner
                elif np.sum(board[range(row,row+4),range(col,col+4)])==-4:
                    winner = 'd-minus'
                    return winner
            # check for top right to bottom left diagonal winners
            if (row <= (nrow-4)) and (col >= 3):
                if np.sum(board[range(row,row+4),range(col,col-4,-1)])==4:
                    winner = 'd-plus'
                    return winner
                elif np.sum(board[range(row,row+4),range(col,col-4,-1)])==-4:
                    winner = 'd-minus'
                    return winner
    return winner

In [4]:
def legal_moves(board, player):
    legal_moves = []
    nrow = board.shape[0]
    ncol = board.shape[1]
    if player == 'plus':
        multiplier = 1
    elif player == 'minus':
        multiplier = -1
    for i in range(ncol):
        if board[0][i] == 0:
            legal_moves.append(i)
        if board[nrow-1][i] == multiplier*1:
            legal_moves.append(i+ncol)
    return legal_moves

In [15]:
mod_plus = load_model("model_plus.h5")
mod_minus = load_model("model_minus.h5")

In [6]:
def create_model(height,width):
    # we cannot simply have 3 output nodes because we want to put a weight on each node's impact to the objective
    # that is different for each data point.  the only way to achieve this is to have 3 output layers, each having 1 node
    # the effect is the same, just the way TF/keras handles weights is different
    imp = Input(shape=(height,width,1))
    mid = Conv2D(40,4,strides=1,activation='relu')(imp)
    mid = Conv2D(20,2,strides=1,activation='relu')(mid)
    mid = Conv2D(10,2,strides=1,activation='relu')(mid)
    mid = Flatten()(mid)
    mid = Dense(256,activation='relu')(mid)
    mid = Dropout(0.25)(mid)
    mid = Dense(128,activation='relu')(mid)
    mid = Dropout(0.25)(mid)
    mid = Dense(64,activation='relu')(mid)
    mid = Dropout(0.25)(mid)
    mid = Dense(32,activation='relu')(mid)
    out0 = Dense(1,activation='linear',name='out0')(mid) # could also use a tanh activation function...all VFs are between -1 to 1
    out1 = Dense(1,activation='linear',name='out1')(mid)
    out2 = Dense(1,activation='linear',name='out2')(mid)
    out3 = Dense(1,activation='linear',name='out3')(mid)
    out4 = Dense(1,activation='linear',name='out4')(mid)
    out5 = Dense(1,activation='linear',name='out5')(mid)
    out6 = Dense(1,activation='linear',name='out6')(mid)
    out7 = Dense(1,activation='linear',name='out7')(mid)
    out8 = Dense(1,activation='linear',name='out8')(mid)
    out9 = Dense(1,activation='linear',name='out9')(mid)
    out10 = Dense(1,activation='linear',name='out10')(mid)
    out11 = Dense(1,activation='linear',name='out11')(mid)
    out12 = Dense(1,activation='linear',name='out12')(mid)
    out13 = Dense(1,activation='linear',name='out13')(mid)
    model = Model(imp,[out0,out1,out2,out3,out4,out5,out6,out7,out8,out9,out10,out11,out12,out13]) 
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(optimizer=optimizer,loss={'out0':'mean_squared_error','out1':'mean_squared_error','out2':'mean_squared_error',\
                                           'out3':'mean_squared_error','out4':'mean_squared_error','out5':'mean_squared_error',\
                                           'out6':'mean_squared_error','out7':'mean_squared_error','out8':'mean_squared_error',\
                                           'out9':'mean_squared_error','out10':'mean_squared_error','out11':'mean_squared_error',\
                                           'out12':'mean_squared_error','out13':'mean_squared_error'})
    
    return model

In [7]:
mod_plus = create_model(6,7)
mod_plus.call = tf.function(mod_plus.call,experimental_relax_shapes=True)

mod_plus.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 6, 7, 1)]    0           []                               
                                                                                                  
 conv2d (Conv2D)                (None, 3, 4, 40)     680         ['input_1[0][0]']                
                                                                                                  
 conv2d_1 (Conv2D)              (None, 2, 3, 20)     3220        ['conv2d[0][0]']                 
                                                                                                  
 conv2d_2 (Conv2D)              (None, 1, 2, 10)     810         ['conv2d_1[0][0]']               
                                                                                              

In [8]:
mod_minus = create_model(6,7)
mod_minus.call = tf.function(mod_minus.call,experimental_relax_shapes=True)

mod_minus.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 6, 7, 1)]    0           []                               
                                                                                                  
 conv2d_3 (Conv2D)              (None, 3, 4, 40)     680         ['input_2[0][0]']                
                                                                                                  
 conv2d_4 (Conv2D)              (None, 2, 3, 20)     3220        ['conv2d_3[0][0]']               
                                                                                                  
 conv2d_5 (Conv2D)              (None, 1, 2, 10)     810         ['conv2d_4[0][0]']               
                                                                                            

In [16]:
def play1game(ep):
    
    board_array = []
    action_array = []
    board = np.zeros((6,7))
    player = 'plus'
    winner = 'nobody'
    while winner == 'nobody':
        feed = np.zeros((1,6,7))
        feed[0,:,:] = board.copy()
        legal_moves_list = legal_moves(board, player)
        if np.random.random() < ep:
            action = np.random.choice(legal_moves_list)
        else:
            if player == 'plus':
                vf = mod_plus(feed,training=False)
            else:
                vf = mod_minus(feed,training=False)
            vf_dict = {}
            for i in range(14):
                vf_dict[i] = vf[i][0,0].numpy()
            #vf = [vf[i][0,0].numpy() for i in range(14)]
            legal_vf_dict = {key: vf_dict[key] for key in legal_moves_list}
            res = [key for key in legal_vf_dict if legal_vf_dict[key] == max(legal_vf_dict.values())]
            if len(res) == 1:
                action = res[0]
            else:
                action = np.random.choice(res)
        
        board_array.append(board)
        board = update_board(board,player,action)
        #print(player +" played")
        winner = check_for_win(board)
        
        if player == 'plus':
            player = 'minus'
        else:
            player = 'plus'
        
    
        final_board = board
        action_array.append(action)
        

    return board_array, action_array, winner,final_board

In [86]:
def discount_rewards(board_array,player,winner):
    
    extra = 1
    delt = 0.75 # discount factor
    if winner[:1] == 'd' and winner[2:] == player:
        extra = 1000
    elif winner[:1] == 'h' and winner[2:] != player:
        extra = 100
    elif winner[:1] == 'v' and winner[2:] != player:
        extra = 1000
    
    nr = len(board_array)
    
    discounted_r = np.zeros(nr)
    if player == 'plus':
        sign = 1
    elif player == 'minus':
        sign = -1
    for t in range(nr):
        discounted_r[t] = extra*sign*multiplier*(delt**(nr-1-t))
        
    return discounted_r

In [22]:
ngames = 3000

In [None]:
winner_list = []
final_board_list = []
ep = 0.05
for game in range(ngames):
    start = time.time()
    #frames, actions, rewards, score = play1game(mod,epsvec[game])
    board_array, action_array, winner,final_board = play1game(ep)
    board_array_plus = []
    board_array_minus = []
    action_array_plus = []
    action_array_minus = []
    for i in range(len(board_array)):
        if i%2 == 0:
            board_array_plus.append(board_array[i])
            action_array_plus.append(action_array[i])
        else:
            board_array_minus.append(board_array[i])
            action_array_minus.append(action_array[i])

    if winner[2:] == 'plus':
        multiplier = 1
    elif winner[2:] == 'minus':
        multiplier = -1
    
    
    rewards_plus = discount_rewards(board_array_plus,'plus',winner)
    rewards_minus = discount_rewards(board_array_minus,'minus',winner) 
    
    
    nframes_plus = len(board_array_plus)
    nframes_minus = len(board_array_minus)
    current_frames_plus = np.zeros((nframes_plus,6,7,1))
    current_frames_minus = np.zeros((nframes_minus,6,7,1))
  
    for grab in range(nframes_plus):
        current_frames_plus[grab,:,:,0] = board_array_plus[grab].copy()
    
    for grab in range(nframes_minus):
        current_frames_minus[grab,:,:,0] = board_array_minus[grab].copy()

#### outputs and weights for the plus player NN

    y0_plus = np.zeros((nframes_plus,1))
    y1_plus = np.zeros((nframes_plus,1))
    y2_plus = np.zeros((nframes_plus,1))
    y3_plus = np.zeros((nframes_plus,1))
    y4_plus = np.zeros((nframes_plus,1))
    y5_plus = np.zeros((nframes_plus,1))
    y6_plus = np.zeros((nframes_plus,1))
    y7_plus = np.zeros((nframes_plus,1))
    y8_plus = np.zeros((nframes_plus,1))
    y9_plus = np.zeros((nframes_plus,1))
    y10_plus = np.zeros((nframes_plus,1))
    y11_plus = np.zeros((nframes_plus,1))
    y12_plus = np.zeros((nframes_plus,1))
    y13_plus = np.zeros((nframes_plus,1))
    
    weight0_plus = np.zeros(nframes_plus)
    weight1_plus = np.zeros(nframes_plus)
    weight2_plus = np.zeros(nframes_plus)
    weight3_plus = np.zeros(nframes_plus)
    weight4_plus = np.zeros(nframes_plus)
    weight5_plus = np.zeros(nframes_plus)
    weight6_plus = np.zeros(nframes_plus)
    weight7_plus = np.zeros(nframes_plus)
    weight8_plus = np.zeros(nframes_plus)
    weight9_plus = np.zeros(nframes_plus)
    weight10_plus = np.zeros(nframes_plus)
    weight11_plus = np.zeros(nframes_plus)
    weight12_plus = np.zeros(nframes_plus)
    weight13_plus = np.zeros(nframes_plus)
  
    for grab in range(nframes_plus):
        rhs = rewards_plus[grab]
        if action_array_plus[grab] == 0:
            y0_plus[grab,0] = rhs
            weight0_plus[grab] = 1
        elif action_array_plus[grab] == 1:
            y1_plus[grab,0] = rhs
            weight1_plus[grab] = 1
        elif action_array_plus[grab] == 2:
            y2_plus[grab,0] = rhs
            weight2_plus[grab] = 1
        elif action_array_plus[grab] == 3:
            y3_plus[grab,0] = rhs
            weight3_plus[grab] = 1
        elif action_array_plus[grab] == 4:
            y4_plus[grab,0] = rhs
            weight4_plus[grab] = 1
        elif action_array_plus[grab] == 5:
            y5_plus[grab,0] = rhs
            weight5_plus[grab] = 1
        elif action_array_plus[grab] == 6:
            y6_plus[grab,0] = rhs
            weight6_plus[grab] = 1
        elif action_array_plus[grab] == 7:
            y7_plus[grab,0] = rhs
            weight7_plus[grab] = 1
        elif action_array_plus[grab] == 8:
            y8_plus[grab,0] = rhs
            weight8_plus[grab] = 1
        elif action_array_plus[grab] == 9:
            y9_plus[grab,0] = rhs
            weight9_plus[grab] = 1
        elif action_array_plus[grab] == 10:
            y10_plus[grab,0] = rhs
            weight10_plus[grab] = 1
        elif action_array_plus[grab] == 11:
            y11_plus[grab,0] = rhs
            weight11_plus[grab] = 1
        elif action_array_plus[grab] == 12:
            y12_plus[grab,0] = rhs
            weight12_plus[grab] = 1
        else:
            y13_plus[grab,0] = rhs
            weight13_plus[grab] = 1
        

        
#### outputs and weights for the minus player NN
    y0_minus = np.zeros((nframes_minus,1))
    y1_minus = np.zeros((nframes_minus,1))
    y2_minus = np.zeros((nframes_minus,1))
    y3_minus = np.zeros((nframes_minus,1))
    y4_minus = np.zeros((nframes_minus,1))
    y5_minus = np.zeros((nframes_minus,1))
    y6_minus = np.zeros((nframes_minus,1))
    y7_minus = np.zeros((nframes_minus,1))
    y8_minus = np.zeros((nframes_minus,1))
    y9_minus = np.zeros((nframes_minus,1))
    y10_minus = np.zeros((nframes_minus,1))
    y11_minus = np.zeros((nframes_minus,1))
    y12_minus = np.zeros((nframes_minus,1))
    y13_minus = np.zeros((nframes_minus,1))
    
    weight0_minus = np.zeros(nframes_minus)
    weight1_minus = np.zeros(nframes_minus)
    weight2_minus = np.zeros(nframes_minus)
    weight3_minus = np.zeros(nframes_minus)
    weight4_minus = np.zeros(nframes_minus)
    weight5_minus = np.zeros(nframes_minus)
    weight6_minus = np.zeros(nframes_minus)
    weight7_minus = np.zeros(nframes_minus)
    weight8_minus = np.zeros(nframes_minus)
    weight9_minus = np.zeros(nframes_minus)
    weight10_minus = np.zeros(nframes_minus)
    weight11_minus = np.zeros(nframes_minus)
    weight12_minus = np.zeros(nframes_minus)
    weight13_minus = np.zeros(nframes_minus)
  
    for grab in range(nframes_minus):
        rhs = rewards_minus[grab]
        if action_array_minus[grab] == 0:
            y0_minus[grab,0] = rhs
            weight0_minus[grab] = 1
        elif action_array_minus[grab] == 1:
            y1_minus[grab,0] = rhs
            weight1_minus[grab] = 1
        elif action_array_minus[grab] == 2:
            y2_minus[grab,0] = rhs
            weight2_minus[grab] = 1
        elif action_array_minus[grab] == 3:
            y3_minus[grab,0] = rhs
            weight3_minus[grab] = 1
        elif action_array_minus[grab] == 4:
            y4_minus[grab,0] = rhs
            weight4_minus[grab] = 1
        elif action_array_minus[grab] == 5:
            y5_minus[grab,0] = rhs
            weight5_minus[grab] = 1
        elif action_array_minus[grab] == 6:
            y6_minus[grab,0] = rhs
            weight6_minus[grab] = 1
        elif action_array_minus[grab] == 7:
            y7_minus[grab,0] = rhs
            weight7_minus[grab] = 1
        elif action_array_minus[grab] == 8:
            y8_minus[grab,0] = rhs
            weight8_minus[grab] = 1
        elif action_array_minus[grab] == 9:
            y9_minus[grab,0] = rhs
            weight9_minus[grab] = 1
        elif action_array_minus[grab] == 10:
            y10_minus[grab,0] = rhs
            weight10_minus[grab] = 1
        elif action_array_minus[grab] == 11:
            y11_minus[grab,0] = rhs
            weight11_minus[grab] = 1
        elif action_array_minus[grab] == 12:
            y12_minus[grab,0] = rhs
            weight12_minus[grab] = 1
        else:
            y13_minus[grab,0] = rhs
            weight13_minus[grab] = 1
    
### fitting the plus player NN
    mod_plus.fit(current_frames_plus,{'out0':y0_plus,'out1':y1_plus,'out2':y2_plus,'out3':y3_plus,'out4':y4_plus,\
                                      'out5':y5_plus,'out6':y6_plus,'out7':y7_plus,'out8':y8_plus,'out9':y9_plus,\
                                     'out10':y10_plus,'out11':y11_plus,'out12':y12_plus,'out13':y13_plus},\
            epochs=1,verbose=0,sample_weight={'out0':weight0_plus,'out1':weight1_plus\
            ,'out2':weight2_plus,'out3':weight3_plus,'out4':weight4_plus,'out5':weight5_plus\
            ,'out6':weight6_plus,'out7':weight7_plus,'out8':weight8_plus,'out9':weight9_plus\
            ,'out10':weight10_plus,'out11':weight11_plus,'out12':weight12_plus,'out13':weight13_plus},\
            use_multiprocessing=True)

    
### fitting the minus player NN
    mod_minus.fit(current_frames_minus,{'out0':y0_minus,'out1':y1_minus,'out2':y2_minus,'out3':y3_minus,'out4':y4_minus,\
                                      'out5':y5_minus,'out6':y6_minus,'out7':y7_minus,'out8':y8_minus,'out9':y9_minus,\
                                     'out10':y10_minus,'out11':y11_minus,'out12':y12_minus,'out13':y13_minus},\
            epochs=1,verbose=0,sample_weight={'out0':weight0_minus,'out1':weight1_minus\
            ,'out2':weight2_minus,'out3':weight3_minus,'out4':weight4_minus,'out5':weight5_minus\
            ,'out6':weight6_minus,'out7':weight7_minus,'out8':weight8_minus,'out9':weight9_minus\
            ,'out10':weight10_minus,'out11':weight11_minus,'out12':weight12_minus,'out13':weight13_minus},\
            use_multiprocessing=True)

    stop = time.time()
    if game > 1999 and ep>0.05:
        ep = ep-0.05
    
    winner_list.append(winner)
    final_board_list.append(final_board)
    
    print([game,ep,winner,stop-start])

[0, 0.05, 'v-minus', 0.6613831520080566]
[1, 0.05, 'v-minus', 0.40718889236450195]
[2, 0.05, 'v-minus', 0.3844161033630371]
[3, 0.05, 'v-minus', 0.37754392623901367]
[4, 0.05, 'v-minus', 0.5664150714874268]
[5, 0.05, 'd-minus', 0.512819766998291]
[6, 0.05, 'v-minus', 0.43100690841674805]
[7, 0.05, 'v-minus', 0.4234049320220947]
[8, 0.05, 'v-minus', 0.4414350986480713]
[9, 0.05, 'v-minus', 0.4024667739868164]
[10, 0.05, 'd-plus', 0.42571496963500977]
[11, 0.05, 'v-minus', 0.40971803665161133]
[12, 0.05, 'v-minus', 0.35972094535827637]
[13, 0.05, 'v-minus', 0.45465087890625]
[14, 0.05, 'v-minus', 0.32701683044433594]
[15, 0.05, 'v-minus', 0.40175628662109375]
[16, 0.05, 'v-minus', 0.3485090732574463]
[17, 0.05, 'v-minus', 0.5382537841796875]
[18, 0.05, 'v-minus', 0.43805480003356934]
[19, 0.05, 'v-plus', 0.36974477767944336]
[20, 0.05, 'v-minus', 0.42267799377441406]
[21, 0.05, 'v-minus', 0.3686490058898926]
[22, 0.05, 'v-minus', 0.3171970844268799]
[23, 0.05, 'v-minus', 0.37091207504272

[193, 0.05, 'h-minus', 0.4244191646575928]
[194, 0.05, 'h-minus', 0.4106271266937256]
[195, 0.05, 'v-minus', 0.39015817642211914]
[196, 0.05, 'h-plus', 0.43792104721069336]
[197, 0.05, 'v-minus', 0.38018202781677246]
[198, 0.05, 'v-minus', 0.3950212001800537]
[199, 0.05, 'v-minus', 0.38155102729797363]
[200, 0.05, 'v-minus', 0.37837910652160645]
[201, 0.05, 'v-minus', 0.38676023483276367]
[202, 0.05, 'v-minus', 0.36921000480651855]
[203, 0.05, 'h-minus', 0.3826310634613037]
[204, 0.05, 'v-minus', 0.3781592845916748]
[205, 0.05, 'd-plus', 0.40788984298706055]
[206, 0.05, 'd-minus', 0.33478498458862305]
[207, 0.05, 'v-minus', 0.3854179382324219]
[208, 0.05, 'v-minus', 0.6278269290924072]
[209, 0.05, 'v-minus', 0.38506388664245605]
[210, 0.05, 'h-minus', 0.37271976470947266]
[211, 0.05, 'v-minus', 0.4036400318145752]
[212, 0.05, 'd-minus', 0.35532593727111816]
[213, 0.05, 'd-minus', 0.3782839775085449]
[214, 0.05, 'v-minus', 0.3694941997528076]
[215, 0.05, 'h-minus', 0.3904128074645996]
[

[383, 0.05, 'v-minus', 0.3819699287414551]
[384, 0.05, 'd-minus', 0.3499791622161865]
[385, 0.05, 'h-minus', 0.3283371925354004]
[386, 0.05, 'h-minus', 0.4053220748901367]
[387, 0.05, 'h-plus', 0.42215514183044434]
[388, 0.05, 'h-plus', 0.42897605895996094]
[389, 0.05, 'h-minus', 0.32138991355895996]
[390, 0.05, 'v-minus', 0.35549163818359375]
[391, 0.05, 'h-minus', 0.4039289951324463]
[392, 0.05, 'v-minus', 0.3549480438232422]
[393, 0.05, 'd-plus', 0.3814430236816406]
[394, 0.05, 'v-plus', 0.3346590995788574]
[395, 0.05, 'v-minus', 0.3446500301361084]
[396, 0.05, 'd-plus', 0.43903589248657227]
[397, 0.05, 'h-minus', 0.4035811424255371]
[398, 0.05, 'h-minus', 0.40622830390930176]
[399, 0.05, 'v-minus', 0.44939231872558594]
[400, 0.05, 'd-minus', 0.3816678524017334]
[401, 0.05, 'd-plus', 0.3896961212158203]
[402, 0.05, 'h-minus', 0.45845603942871094]
[403, 0.05, 'h-minus', 0.4088308811187744]
[404, 0.05, 'd-plus', 0.3902859687805176]
[405, 0.05, 'h-minus', 0.389739990234375]
[406, 0.05,

[574, 0.05, 'h-minus', 0.419083833694458]
[575, 0.05, 'v-minus', 0.4550797939300537]
[576, 0.05, 'h-plus', 0.38773417472839355]
[577, 0.05, 'h-plus', 0.3873758316040039]
[578, 0.05, 'h-plus', 0.3781247138977051]
[579, 0.05, 'h-plus', 0.38614606857299805]
[580, 0.05, 'v-plus', 0.488753080368042]
[581, 0.05, 'h-plus', 0.43320679664611816]
[582, 0.05, 'h-plus', 0.38307881355285645]
[583, 0.05, 'h-plus', 0.38170289993286133]
[584, 0.05, 'h-plus', 0.4435722827911377]
[585, 0.05, 'h-plus', 0.3842508792877197]
[586, 0.05, 'h-plus', 0.37191200256347656]
[587, 0.05, 'v-minus', 0.45853209495544434]
[588, 0.05, 'h-plus', 0.3788599967956543]
[589, 0.05, 'v-plus', 0.42929577827453613]
[590, 0.05, 'h-plus', 0.38980913162231445]
[591, 0.05, 'h-plus', 0.3868091106414795]
[592, 0.05, 'h-plus', 0.3725621700286865]
[593, 0.05, 'h-plus', 0.3993501663208008]
[594, 0.05, 'h-plus', 0.41815900802612305]
[595, 0.05, 'h-plus', 0.39282894134521484]
[596, 0.05, 'h-plus', 0.3798048496246338]
[597, 0.05, 'h-plus', 

In [78]:
final_board_list[0]

array([[ 0., -1., -1.,  0., -1.,  0.,  0.],
       [ 0.,  1.,  1.,  0.,  1.,  0.,  0.],
       [ 0., -1., -1.,  0., -1.,  0.,  0.],
       [ 0.,  1.,  1.,  0.,  1.,  0.,  0.],
       [ 0., -1., -1.,  0., -1.,  0.,  0.],
       [ 0.,  1.,  1.,  1.,  1.,  0.,  0.]])

In [79]:
final_board_list[2999]

array([[ 0., -1., -1.,  0., -1.,  0.,  0.],
       [ 0.,  1.,  1.,  0.,  1.,  0.,  0.],
       [ 0., -1., -1.,  0., -1.,  0.,  0.],
       [ 0.,  1.,  1.,  0.,  1.,  0.,  0.],
       [ 0., -1., -1.,  0., -1.,  0.,  0.],
       [ 0.,  1.,  1.,  1.,  1.,  0.,  0.]])

In [69]:
mod_plus.save('model_plus_latest.h5')
mod_minus.save('model_minus_latest.h5')

In [92]:
winner = 'nobody'
board = np.zeros((6,7))
player = 'plus'
while winner == 'nobody':
    if player == 'minus':
        move = input('Pick a move (0-13) for player '+player+': ')
    else:
        feed = np.zeros((1,6,7))
        feed[0,:,:] = board.copy()
        legal_moves_list = legal_moves(board, player)
        vf = mod_plus(feed,training=False)
        vf_dict = {}
        for i in range(14):
            vf_dict[i] = vf[i][0,0].numpy()
        #vf = [vf[i][0,0].numpy() for i in range(14)]
        print(vf_dict)
        legal_vf_dict = {key: vf_dict[key] for key in legal_moves_list}
        print(legal_vf_dict)
        res = [key for key in legal_vf_dict if legal_vf_dict[key] == max(legal_vf_dict.values())]
        print(res)
        if len(res) == 1:
            move = res[0]
        else:
            move = np.random.choice(res)
    move = int(move)
    board = update_board(board,player,move)
    clear_output()
    print(board)
    winner = check_for_win(board)
    if player == 'plus':
        player = 'minus'
    else:
        player = 'plus'
print('The winner is '+winner)

[[ 0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.  0.  0.]
 [ 0.  0. -1. -1.  0.  0.  0.]
 [ 0.  0.  1. -1.  0.  0.  0.]
 [ 0.  0.  1. -1.  0.  0.  0.]
 [ 0.  0.  1. -1.  0.  0.  0.]]
The winner is v-minus


In [94]:
ngames_random = 1000
winner_list_random = []
for i in range(ngames_random):
    winner = 'nobody'
    board = np.zeros((6,7))
    player = 'plus'
    while winner == 'nobody':
        if player == 'minus':
            legal_moves_list = legal_moves(board, player)
            move = np.random.choice(legal_moves_list)
        else:
            feed = np.zeros((1,6,7))
            feed[0,:,:] = board.copy()
            legal_moves_list = legal_moves(board, player)
            vf = mod_plus(feed,training=False)
            vf_dict = {}
            for i in range(14):
                vf_dict[i] = vf[i][0,0].numpy()
            #vf = [vf[i][0,0].numpy() for i in range(14)]
            #print(vf_dict)
            legal_vf_dict = {key: vf_dict[key] for key in legal_moves_list}
            #print(legal_vf_dict)
            res = [key for key in legal_vf_dict if legal_vf_dict[key] == max(legal_vf_dict.values())]
            #print(res)
            if len(res) == 1:
                move = res[0]
            else:
                move = np.random.choice(res)
        move = int(move)
        board = update_board(board,player,move)
        winner = check_for_win(board)
        if player == 'plus':
            player = 'minus'
        else:
            player = 'plus'

    if winner[2:] == 'minus':
        winner = 'random'
    else:
        winner = 'NN'
    print(winner)
    winner_list_random.append(winner)

NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
random
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
random
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
random
NN
NN
random
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
random
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN
NN

In [95]:
winner_list_random.count("NN")

982

In [None]:
winner = 'nobody'
board = np.zeros((6,7))
player = 'plus'
while winner == 'nobody':
    if player == 'plus':
        move = input('Pick a move (0-13) for player '+player+': ')
    else:
        feed = np.zeros((1,6,7))
        feed[0,:,:] = board.copy()
        legal_moves_list = legal_moves(board, player)
        vf = mod_minus(feed,training=False)
        vf_dict = {}
        for i in range(14):
            vf_dict[i] = vf[i][0,0].numpy()
        #vf = [vf[i][0,0].numpy() for i in range(14)]
        print(vf_dict)
        legal_vf_dict = {key: vf_dict[key] for key in legal_moves_list}
        print(legal_vf_dict)
        res = [key for key in legal_vf_dict if legal_vf_dict[key] == max(legal_vf_dict.values())]
        print(res)
        if len(res) == 1:
            move = res[0]
        else:
            move = np.random.choice(res)
    move = int(move)
    board = update_board(board,player,move)
    clear_output()
    print(board)
    winner = check_for_win(board)
    if player == 'plus':
        player = 'minus'
    else:
        player = 'plus'
print('The winner is '+winner)