In [40]:
import backgammon
import random
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import clear_output
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Dropout, LeakyReLU, BatchNormalization

# if GPU is available this code will state 1
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

class BGBoard:
    def __init__(self):
        self.board = backgammon.Backgammon()
        self.board.start()
        self.first_turn = self.board.match.player
        self.winner = -1
        self.state_players = []
        
    def get_moves_and_positions(self,swap=False):
        possible_moves_positions = {}
        possible_moves = []
        for play in self.board.generate_plays():
            pair = []
            for move in play.moves:
                pair.append((move.source,move.destination))
            pair = tuple(pair)
            turn = self.board.match.turn.value
            if turn == 0:
                current_player = 1
            else:
                current_player = 0
            if swap == False:
                position = play.position
            else:
                position = swap_position(play.position)
            possible_moves_positions[pair] = self.state(position,current_player)
            possible_moves.append(pair)
        return possible_moves_positions,possible_moves

    # def get_possible_moves(self, possible_moves_positions:dict):
    #     possible_moves = possible_moves_positions.keys()
    #     return list(possible_moves)
    
    def state(self,position,current_player):
        # position = self.board.position
        board_points = list(position.board_points)
        p_bar = [position.player_bar]
        p_off = [position.player_off]
        op_bar = [position.opponent_bar]
        op_off = [position.opponent_off]
        die_1 = [self.board.match.dice[0]]
        die_2 = [self.board.match.dice[1]]
        current_player = [current_player]
        
        state_list = board_points+p_bar+p_off+op_bar+op_off+die_1+die_2+current_player
        state_array = np.array(state_list)/15
        
        return state_array

    def make_move(self, move):
        '''
        move is tuple of tuples. e.g ((22,19),(23,21),...)
        '''
        if self.board.match.player.value == self.first_turn:
            self.state_players.append(0)
        else:
            self.state_players.append(1)
        
        if move == None:
            self.board.skip()
        else:
            self.board.play(move)
        
        
        if self.is_over():
            return self.winner
            
        if self.board.match.dice == (0, 0):
            self.board.roll()
        
    def is_over(self):
        p0_score = self.board.match.player_0_score
        p1_score = self.board.match.player_1_score
        
        if p0_score > p1_score:
            self.winner = 0
            return True
        elif p0_score < p1_score:
            self.winner = 1
            return True
        else:
            return False

    # def get_reward(self):
    #     player = self.board.match.player
    #     if winner != -1:
    #         if winner == player:
    #             return torch.tensor(1,device=device, dtype=torch.float32)
    #         elif winner != player:
    #             return torch.tensor(-1,device=device, dtype=torch.float32)
        

Num GPUs Available:  1


In [41]:
model = Sequential([
    Dense(64, activation=tf.keras.layers.LeakyReLU(alpha=0.3), input_shape=(31,)), # 31 features for board state
    BatchNormalization(),
    Dropout(0.5),
    # Dense(512, activation=tf.keras.layers.LeakyReLU(alpha=0.3)),
    # BatchNormalization(),
    # Dropout(0.5),
    Dense(64, activation=tf.keras.layers.LeakyReLU(alpha=0.3)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(1, activation="tanh")  # Output: Estimated value of the state
])
learning_rate = 0.1
optimizer = Adam(learning_rate=learning_rate)

# Compile the model
model.compile(optimizer=optimizer,
              loss='mean_squared_error')



        
    
losses = []

def td_lambda_update(states, rewards, model, eligibility_traces, alpha=learning_rate, gamma=0.99, lambda_=0.8):
    """
    Perform TD(λ) updates with eligibility traces.

    Args:
        states: List of game states (numpy arrays).
        rewards: List of rewards for each state.
        model: The neural network model.
        eligibility_traces: Persistent eligibility traces for the model's parameters.
        alpha: Learning rate.
        gamma: Discount factor.
        lambda_: Eligibility trace decay factor.
        
    Returns:
        Average TD error loss for the episode.
    """
    total_loss = 0

    for t in range(len(states) - 1):
        with tf.GradientTape() as tape:
            # Predict current and next state values
            V_t = model(tf.convert_to_tensor([states[t]], dtype=tf.float32))  # Shape: (1, 1)
            V_t_plus_1 = model(tf.convert_to_tensor([states[t + 1]], dtype=tf.float32))  # Shape: (1, 1)

            # Compute TD error (delta_t)
            delta_t = rewards[t] + gamma * tf.squeeze(V_t_plus_1) - tf.squeeze(V_t)

        # Calculate gradients for the current state's value prediction
        gradients = tape.gradient(V_t, model.trainable_variables)

        # Update eligibility traces and model weights
        for i, gradient in enumerate(gradients):
            if gradient is not None:
                eligibility_traces[i] = gamma * lambda_ * eligibility_traces[i] + gradient
                model.trainable_variables[i].assign_add(alpha * delta_t * eligibility_traces[i])

        total_loss += delta_t ** 2

    return total_loss / (len(states) - 1)


In [42]:
from backgammon.position import Position

def swap_position(position) -> "Position":
        return Position(
            tuple(map(lambda n: -n, position.board_points[::-1])),
            position.opponent_bar,
            position.opponent_off,
            position.player_bar,
            position.player_off,
        )

In [83]:
def play_game(model,random_threshold=0.75):
    states = []
    b = BGBoard()
    first_player_turn = b.board.match.turn.value
    # print(f"first player is {first_player_turn}" )
    while not b.is_over():
        turn = b.board.match.turn.value
        position = b.board.position
        # states.append(b.state(position,turn))
        # Append the current state (for the current player)
        if turn == first_player_turn:
            states.append(b.state(position,turn))
            swap = False
        else:
            current_state_swapped = b.state(swap_position(position),turn)
            states.append(current_state_swapped)
            swap = True
        
        moves_positions,possible_moves = b.get_moves_and_positions(swap=swap)
        
        # possible_moves = b.get_possible_moves(moves_positions)
        
        
        move_values = []
        
        if moves_positions != {}:
            if random.uniform(0,1)<random_threshold:
                best_move = random.choice(possible_moves)
                
            else:
                # Create a batch of next states for all possible moves
                moves_positions_list = [moves_positions[move] for move in possible_moves]
                
                next_states = np.array(moves_positions_list)
                
                # Predict values for all possible moves in a single batch
                move_values = model.predict(next_states).flatten()  # Flatten to 1D array
                
                
                best_move = possible_moves[np.argmax(move_values)]
                
        else:
            best_move = None
        # print("best move:")
        # if best_move is not None:
        #     states.append(moves_positions[best_move])
        # else:
        #     states.append(b.state(position))
        
        b.make_move(best_move)
        # print(b.board)
    
    states = np.array(states)
    
    rewards = [0 for state_player in b.state_players]
    
    if states[-1][-1] * 15 == first_player_turn:
        rewards[-1] = 1
        
    else:
        rewards[-1] = -1
       
    
    # print(b.winner)
    # final_rewards_winner = [0 for state_player in b.state_players if state_player == b.winner]
    # final_rewards_loser = [0 for state_player in b.state_players if state_player != b.winner]
    # final_rewards_winner[-1] = 1
    # final_rewards_loser[-1] = -1
    # print("winner")
    # print(b.winner)
    # print(final_rewards_winner)
    # print("loser")
    # print(final_rewards_loser)
    return states, rewards

In [91]:
last_epoch = 0
epochs = 40000
random_threshold = 0.8
learning_rate = 0.005
for epoch in range(last_epoch, epochs):  # Number of training games
    # print(f'progress: {epoch}/{epochs}, threshold: {random_threshold} ',end='\r')
    # Generate a game through self-play
    states,rewards = play_game(model,random_threshold)
    # for state in states:
    #     print(state*15)
    # break
    if random_threshold < 0.85:
        random_threshold += 0.00002
    eligibility_traces = [tf.zeros_like(var) for var in model.trainable_variables]

    # Perform TD(λ) updates
    loss = td_lambda_update(states, rewards, model,eligibility_traces, alpha=learning_rate)
    losses.append(loss)
    # Evaluate the model every 100 games
    if epoch % 100 == 0:
        model.save(f"backgammon_RLmodel06_64_64_{epoch}.h5")
        
    if epoch % 1000 == 0:
        learning_rate *= 0.95
    
    if len(losses) % 10 == 0:
        clear_output(wait=True)  # Clear the output of the cell
        
        # Plot the losses
        plt.plot(losses, label='TD Error (Loss)')
        
        # Calculate the mean of the last 20 losses (if there are at least 20 values)
        if len(losses) >= 100:
            rolling_mean = np.convolve(losses, np.ones(100)/100, mode='valid')
            plt.plot(range(99, len(losses)), rolling_mean, label='Mean of last 100', color='orange', linestyle='--')
    
    
        plt.xlabel('Games')
        plt.ylabel('TD Error (Loss)')
        plt.legend()(_)
        plt.show()  # Display the updated plot
    

first player is 1
[-2.  0.  0.  0.  0.  5.  0.  3.  0.  0.  0. -5.  5.  0.  0.  0. -3.  0.
 -5.  0.  0.  0.  0.  2.  0.  0.  0.  0.  3.  4.  1.]
[-2.  0.  0.  0.  0.  6.  0.  3.  0.  0.  0. -5.  4.  0.  0.  0. -3.  0.
 -5.  0.  0.  0.  0.  2.  0.  0.  0.  0.  2.  1.  0.]
[-2.  0.  0.  0.  0.  6.  0.  3.  0.  0.  0. -5.  4.  0.  0.  0. -2.  0.
 -5. -1.  0.  0.  0.  2.  0.  0.  0.  0.  5.  2.  1.]
[-2.  0.  1.  1.  0.  5.  0.  2.  0.  0.  0. -5.  4.  0.  0.  0. -2.  0.
 -5. -1.  0.  0.  0.  2.  0.  0.  0.  0.  1.  1.  0.]
[-2.  0.  1.  1.  0.  5.  0.  2.  0.  0.  0. -5.  4.  0.  0.  0. -1.  0.
 -6.  0.  0. -1.  0.  2.  0.  0.  0.  0.  2.  1.  1.]
[-2.  0.  1.  1.  0.  6.  1.  0.  0.  0.  0. -5.  4.  0.  0.  0. -1.  0.
 -6.  0.  0. -1.  0.  2.  0.  0.  0.  0.  2.  3.  0.]
[-2.  0.  1.  1.  0.  6.  1.  0.  0.  0.  0. -4.  4. -1.  0.  0.  0.  0.
 -6. -1.  0. -1.  0.  2.  0.  0.  0.  0.  3.  4.  1.]
[-2.  0.  2.  1.  0.  6.  0.  0.  0.  1.  0. -4.  3. -1.  0.  0.  0.  0.
 -6. -1.  0. -1.  0.

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 10]

In [26]:
def evaluate_play_game(model_dumb,model_smart):
    states = []
    b = BGBoard()
    
    while not b.is_over():
        
        # if i % 2 == 0:
        #     model = model_smart
        # else:
        #     model = model_dumb
        
        position = b.board.position
        # Append the current state (for the current player)
        # print(b.board)
        
        moves_positions, possible_moves = b.get_moves_and_positions()
        # possible_moves = b.get_possible_moves(moves_positions)

        move_values = []
        
        if moves_positions != {}:

            # Create a batch of next states for all possible moves
            next_states = np.array([moves_positions[move] for move in possible_moves])

            if b.board.match.turn.value == 0:
                # Predict values for all possible moves in a single batch
                move_values = model_dumb.predict(next_states).flatten()  # Flatten to 1D array
            else:
                move_values = model_smart.predict(next_states).flatten()  # Flatten to 1D array
            best_move = possible_moves[np.argmax(move_values)]
        else:
            best_move = None
        b.make_move(best_move)
        # print(b.board)
        
    p0_score = b.board.match.player_0_score
    p1_score = b.board.match.player_1_score
    # print(f"dumb's score : {p0_score},smart's score: {p1_score}")
    return p0_score,p1_score




In [34]:
model_dumb = tf.keras.models.load_model('./backgammon_RLmodel05_64_64_100.h5')
model_smart = tf.keras.models.load_model('./backgammon_RLmodel05_64_64_12000.h5')
p0_scores = []
p1_scores = []
def calculate_wins(p):
    all_wins = []
    for score in p:
        if score > 0:
            all_wins.append(score)
    return len(all_wins)
for i in range(400):
    p0,p1 = evaluate_play_game(model_dumb,model_smart)
    # print(f'{p0}/{p1}')
    p0_scores.append(p0)
    p1_scores.append(p1)
    print(f"{calculate_wins(p0_scores)}/{calculate_wins(p1_scores)}", end='\r')
    
    


206/194

In [31]:
print(sum(p0_scores),sum(p1_scores))
all_wins_0 = []
all_wins_1 =  []
def calculate_wins(p):
    all_wins = []
    for score in p:
        if score > 0:
            all_wins.append(score)
    return len(all_wins)

102 140


In [68]:
model_dumb = None
model_smart = None 
del model_dumb,model_smart

In [33]:
print(calculate_wins(p0_scores))
print(calculate_wins(p1_scores))


49
64


In [12]:
b = BGBoard()
# b.start()


In [13]:
print(b.board)

                 Position ID: 4HPwATDgc/ABMA
                 Match ID   : cIl0AAAAAAAA
 +13-14-15-16-17-18------19-20-21-22-23-24-+
 | X           O    |   | O              X |
 | X           O    |   | O              X |
 | X           O    |   | O                |
 | X                |   | O                |
 | X                |   | O                |
v|                  |BAR|                  |
 | O                |   | X                |
 | O                |   | X                |
 | O           X    |   | X                |
 | O           X    |   | X              O |
 | O           X    |   | X              O |
 +12-11-10--9--8--7-------6--5--4--3--2--1-+



In [14]:
b.board.match.turn.value

1

In [28]:
p = b.board.position
print(p)

Position(board_points=(-2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0)


In [29]:
swap_players(p)

Position(board_points=(-2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0)

In [6]:
moves_positions,possible_moves = b.get_moves_and_positions()

In [12]:
for play in b.board.generate_plays():
    print(play)
    break

Play(moves=(Move(pips=5, source=7, destination=2), Move(pips=1, source=5, destination=4)), position=Position(board_points=(-2, 0, 1, 0, 1, 4, 0, 2, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0))


In [13]:
b.board.position

Position(board_points=(-2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0)

In [42]:
position = b.board.position
current_state = b.state(position)
# Append the current state (for the current player)
states.append(current_state)

moves_positions = b.get_moves_and_positions()
possible_moves = b.get_possible_moves(moves_positions)
first_player = b.board.match.turn.value
# if i == 1:
    # print(f'{first_player} is player_dumb')
move_values = []

if moves_positions != {}:

    # Create a batch of next states for all possible moves
    next_states = np.array([moves_positions[move] for move in possible_moves])
    
    # Predict values for all possible moves in a single batch
    move_values = model_smart.predict(next_states).flatten()  # Flatten to 1D array

    best_move = possible_moves[np.argmax(move_values)]
else:
    best_move = None

In [47]:
print(possible_moves)
print(move_values)

[((7, 5), (12, 9)), ((12, 10), (10, 7)), ((12, 10), (23, 20)), ((7, 5), (23, 20)), ((23, 21), (7, 4)), ((23, 21), (12, 9)), ((23, 21), (5, 2)), ((5, 3), (23, 20)), ((5, 3), (7, 4)), ((12, 10), (5, 2)), ((5, 3), (12, 9)), ((5, 3), (5, 2)), ((23, 21), (23, 20)), ((7, 5), (7, 4)), ((7, 5), (5, 2)), ((12, 10), (12, 9)), ((12, 10), (7, 4))]
[-0.06178284 -0.08350936 -0.0698778  -0.06428883 -0.04922668 -0.04423356
 -0.05701607 -0.10789806 -0.1091629  -0.07325294 -0.1095074  -0.11514501
 -0.06106399 -0.06847084 -0.06826864 -0.06854022 -0.07058718]


In [20]:
k = 0.5
for i in range(15000):
    k += 0.00002

In [21]:
print(k)

0.8000000000003


In [None]:
counter = 0
roll = False
winner = None
while winner == None:
    
    possible_moves = []
    if roll == True:
        b.roll()
        
    for play in b.generate_plays():
        pair = []
        for move in play.moves:
            pair.append((move.source,move.destination))
        possible_moves.append(tuple(pair))
    if len(possible_moves) < 1:
        b.skip()
        counter +=1
        print(counter, end='\r')
        continue
    move = random.choice(possible_moves)
    print(move)
    
    b.play(move)
    
    roll = True
    counter +=1
    
    print(b.position.player_off)
    if b.match.player_1_score > 1:
        print(f"player 1 won")
        winner = 1
        break
    elif b.match.player_0_score > 1:
        print(f"player 0 won")
        winner = 0
        break

In [121]:
print(b.match.dice)

(0, 0)


In [195]:
# print(b.match.player_1_score)
for play in b.generate_plays():
    print(play)
    print("#")

Play(moves=(Move(pips=2, source=23, destination=21), Move(pips=5, source=7, destination=2)), position=Position(board_points=(-2, 0, 1, 0, 0, 5, 0, 2, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 1, 0, 1), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0))
#
Play(moves=(Move(pips=2, source=23, destination=21), Move(pips=5, source=12, destination=7)), position=Position(board_points=(-2, 0, 0, 0, 0, 5, 0, 4, 0, 0, 0, -5, 4, 0, 0, 0, -3, 0, -5, 0, 0, 1, 0, 1), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0))
#
Play(moves=(Move(pips=2, source=7, destination=5), Move(pips=5, source=12, destination=7)), position=Position(board_points=(-2, 0, 0, 0, 0, 6, 0, 3, 0, 0, 0, -5, 4, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0))
#
Play(moves=(Move(pips=2, source=5, destination=3), Move(pips=5, source=12, destination=7)), position=Position(board_points=(-2, 0, 0, 1, 0, 4, 0, 4, 0, 0, 0, -5, 4, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2), play

In [181]:
b = backgammon.Backgammon()
position = b.position
board_points = list(position.board_points)
p_bar = [position.player_bar]
p_off = [position.player_off]
op_bar = [position.opponent_bar]
op_off = [position.opponent_off]




state_list = board_points+p_bar+p_off+op_bar+op_off
state_array = np.array(state_list)
print(state_array)

[-2  0  0  0  0  5  0  3  0  0  0 -5  5  0  0  0 -3  0 -5  0  0  0  0  2
  0  0  0  0]


In [162]:
b.end_game(1)

backgammon.backgammon.Backgammon('dwJRSwEAAAAAAA', 'cIoEAAAAIAAA')

In [182]:
len(state_array)

28

In [184]:
b = backgammon.Backgammon()
b.first_roll()
possible_moves = []
for play in b.generate_plays():
    pair = []
    for move in play.moves:
        pair.append((move.source,move.destination))
    possible_moves.append(tuple(pair))

In [185]:
possible_moves

[((23, 21), (7, 2)),
 ((23, 21), (12, 7)),
 ((7, 5), (12, 7)),
 ((5, 3), (12, 7)),
 ((5, 3), (7, 2)),
 ((12, 10), (7, 2)),
 ((7, 5), (7, 2)),
 ((12, 10), (12, 7))]

In [186]:
print(b)

                 Position ID: 4HPwATDgc/ABMA
                 Match ID   : cAgVAAAAAAAA
 +13-14-15-16-17-18------19-20-21-22-23-24-+
 | X           O    |   | O              X |
 | X           O    |   | O              X |
 | X           O    |   | O                |
 | X                |   | O                |
 | X                |   | O                |
v|                  |BAR|                  |
 | O                |   | X                |
 | O                |   | X                |
 | O           X    |   | X                |
 | O           X    |   | X              O |
 | O           X    |   | X              O |
 +12-11-10--9--8--7-------6--5--4--3--2--1-+



In [188]:
print(b.match.dice)

(2, 5)


In [88]:
possible_moves = []
for play in b.generate_plays():
    # print(play.moves)
    pair = []
    for move in play.moves:
        pair.append((move.source,move.destination))
    possible_moves.append(tuple(pair))

    
    # play = play[0]
    # first = play[0]
    # second = play [1]
    # pair_1 = (first.source,first.destination)
    # pair_2 = (second.source,second.destination)
    # possible_moves.append((pair_1,pair_2))

        
    
    # print(play.position)
    


###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###


In [89]:
print(possible_moves)

[((None, 21), (4, 1), (4, 1), (12, 9)), ((None, 21), (4, 1), (5, 2), (7, 4)), ((None, 21), (12, 9), (9, 6), (12, 9)), ((None, 21), (5, 2), (5, 2), (7, 4)), ((None, 21), (12, 9), (12, 9), (12, 9)), ((None, 21), (5, 2), (7, 4), (7, 4)), ((None, 21), (7, 4), (12, 9), (9, 6)), ((None, 21), (5, 2), (5, 2), (5, 2)), ((None, 21), (4, 1), (4, 1), (5, 2)), ((None, 21), (12, 9), (9, 6), (6, 3)), ((None, 21), (7, 4), (12, 9), (12, 9)), ((None, 21), (4, 1), (12, 9), (12, 9)), ((None, 21), (5, 2), (7, 4), (12, 9)), ((None, 21), (4, 1), (5, 2), (12, 9)), ((None, 21), (7, 4), (7, 4), (12, 9)), ((None, 21), (4, 1), (7, 4), (7, 4)), ((None, 21), (4, 1), (4, 1), (7, 4)), ((None, 21), (4, 1), (5, 2), (5, 2)), ((None, 21), (4, 1), (7, 4), (12, 9)), ((None, 21), (5, 2), (5, 2), (12, 9)), ((None, 21), (5, 2), (12, 9), (9, 6)), ((None, 21), (4, 1), (12, 9), (9, 6)), ((None, 21), (5, 2), (12, 9), (12, 9))]


In [78]:
# b.skip()
pos = ((None, 22), (12, 9))
b.play(pos)
    
    


BackgammonError: Invalid move: sV3CQQRiZ3AARw:cIgNAAAAAAAA ((None, 22), (12, 9))

In [67]:
new_pos= b.position.apply_move(None, 21).apply_move(12, 9)


In [70]:
new_pos.encode()

'sV3CQQRiZ2IADw'

In [70]:
winner = 0
state_players = [0,1,0,1,0,1,0,1,0,1,0,1]
filter_array = np.array(state_players)
if winner == 0:
    final_rewards = [+1 if state_player == 0 else -1 for state_player in state_players]
elif winner == 1:
    final_rewards = [+1 if state_player == 1 else -1 for state_player in state_players]

In [71]:
final_rewards

[1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1]

In [73]:
final_rewards = np.array(final_rewards)
final_rewards[filter_array == 1]

array([-1, -1, -1, -1, -1, -1])

In [68]:
state_players

[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 10]

In [2]:
b = backgammon.Backgammon()
b.first_roll()

(4, 1)

AttributeError: 'Backgammon' object has no attribute 'board'

In [34]:
b.first_roll()

(1, 4)

In [3]:
print(b)
print(b.position)


                 Position ID: 4HPwATDgc/ABMA
                 Match ID   : MAAGAAAAAAAA
 +12-11-10--9--8--7-------6--5--4--3--2--1-+
 | X           O    |   | O              X |
 | X           O    |   | O              X |
 | X           O    |   | O                |
 | X                |   | O                |
 | X                |   | O                |
^|                  |BAR|                  |
 | O                |   | X                |
 | O                |   | X                |
 | O           X    |   | X                |
 | O           X    |   | X              O |
 | O           X    |   | X              O |
 +13-14-15-16-17-18------19-20-21-22-23-24-+

Position(board_points=(-2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0)


In [13]:


possible_moves_positions = {}
for play in b.generate_plays():
    print(play.moves[0])
    # pair = []
    # for move in play.moves:
    #     pair.append((move.source,move.destination))
    # print(tuple(pair))
    break
    # possible_moves_positions[tuple(pair)] = self.state(play.position)


        
    # def get_moves_and_positions(self):
    #     possible_moves_positions = {}
    #     for play in self.board.generate_plays():
    #         pair = []
    #         for move in play.moves:
    #             pair.append((move.source,move.destination))
    #         possible_moves_positions[tuple(pair)] = self.state(play.position)
    #     return possible_moves_positions


Move(pips=4, source=5, destination=1)


In [6]:
pair

[(7, 3), (3, 2)]

In [58]:
print(b.position.swap_players())

Position(board_points=(-2, 0, 0, 0, 0, 5, 0, 3, 0, 0, 0, -5, 5, 0, 0, 0, -2, -1, -4, 0, 0, 0, -1, 2), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0)


In [61]:
1 % 2

1

In [None]:
for state in states:
    print(state*15)

In [33]:
learning_rate = 0.015

In [34]:
for i in range(10):
    learning_rate *= 0.90
    print(learning_rate)

0.0135
0.01215
0.010935
0.009841500000000001
0.008857350000000002
0.007971615000000001
0.007174453500000001
0.006457008150000001
0.005811307335000002
0.005230176601500001


In [39]:
model1 = tf.keras.models.load_model('./backgammon_RLmodel05_64_64_100.h5')
model2 = tf.keras.models.load_model('./backgammon_RLmodel05_64_64_10000.h5')

In [40]:
model1.trainable_variables

[<tf.Variable 'dense/kernel:0' shape=(31, 64) dtype=float32, numpy=
 array([[ 0.24865772, -0.10203935, -0.15886983, ..., -0.22225152,
          0.02141946, -0.11422437],
        [-0.12545873,  0.02884449,  0.22662452, ..., -0.24045987,
          0.19724675, -0.20846383],
        [-0.05151866, -0.03881101, -0.08221499, ..., -0.14624432,
          0.20926365, -0.14613411],
        ...,
        [-0.16588624, -0.1387194 ,  0.07627302, ..., -0.12613666,
         -0.15953946,  0.06095667],
        [ 0.14565748,  0.00089765, -0.0970078 , ...,  0.06033377,
          0.24657752, -0.05173407],
        [ 0.14460222, -0.01814933, -0.04799673, ...,  0.07900107,
          0.2433995 , -0.10517941]], dtype=float32)>,
 <tf.Variable 'dense/bias:0' shape=(64,) dtype=float32, numpy=
 array([ 0.02782262,  0.01611478,  0.05063552,  0.02329272, -0.00166099,
         0.00719492, -0.00584468, -0.01495863, -0.01518282,  0.01080851,
         0.0078951 ,  0.03142129, -0.01899794,  0.06092185,  0.0067733 ,
       

In [41]:
model2.trainable_variables

[<tf.Variable 'dense/kernel:0' shape=(31, 64) dtype=float32, numpy=
 array([[ 0.25598252, -0.10503063, -0.05014001, ..., -0.21671061,
          0.05244816, -0.11172025],
        [-0.07616533,  0.0381564 ,  0.2235378 , ..., -0.236818  ,
          0.21805777, -0.21494205],
        [-0.01008032, -0.04090825, -0.04120066, ..., -0.15100454,
          0.2351648 , -0.14327295],
        ...,
        [-0.16425456, -0.12189455,  0.06000654, ..., -0.1337059 ,
         -0.16070904,  0.05958709],
        [ 0.13544248,  0.00185266, -0.07016875, ...,  0.05273537,
          0.25023785, -0.04626461],
        [ 0.13141398, -0.03379642, -0.06835079, ...,  0.08885832,
          0.20393403, -0.09868057]], dtype=float32)>,
 <tf.Variable 'dense/bias:0' shape=(64,) dtype=float32, numpy=
 array([ 1.6066642e-04,  6.9072351e-02,  1.5786183e-01,  2.0008404e-02,
        -1.4154663e-02, -9.2420364e-03, -2.3906535e-02, -2.8751912e-02,
        -4.7398051e-03,  9.7969763e-02,  2.2650191e-03, -1.1017655e-02,
        -5