In [1]:
import backgammon
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Dropout

# if GPU is available this code will state 1
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

class BGBoard:
    def __init__(self):
        self.board = backgammon.Backgammon()
        self.board.start()
        self.turn = self.board.match.player
        self.winner = -1
        self.state_players = []
        
    def get_moves_and_positions(self):
        possible_moves_positions = {}
        for play in self.board.generate_plays():
            pair = []
            for move in play.moves:
                pair.append((move.source,move.destination))
            possible_moves_positions[tuple(pair)] = self.state(play.position)
        return possible_moves_positions

    def get_possible_moves(self, possible_moves_positions:dict):
        possible_moves = possible_moves_positions.keys()
        return list(possible_moves)
    
    def state(self,position):
        # position = self.board.position
        board_points = list(position.board_points)
        p_bar = [position.player_bar]
        p_off = [position.player_off]
        op_bar = [position.opponent_bar]
        op_off = [position.opponent_off]
        die_1 = [self.board.match.dice[0]]
        die_2 = [self.board.match.dice[1]]
        current_player = [self.board.match.player.value]
        state_list = board_points+p_bar+p_off+op_bar+op_off+die_1+die_2+current_player
        state_array = np.array(state_list)/15
        
        return state_array

    def make_move(self, move):
        '''
        move is tuple of tuples. e.g ((22,19),(23,21),...)
        '''
        self.state_players.append(int(self.board.match.turn.value))
        if move == None:
            self.board.skip()
        else:
            self.board.play(move)
        
        
        if self.is_over():
            return self.winner
            
        if self.board.match.dice == (0, 0):
            self.board.roll()
        
    def is_over(self):
        p0_score = self.board.match.player_0_score
        p1_score = self.board.match.player_1_score
        
        if p0_score > p1_score:
            self.winner = 0
            return True
        elif p0_score < p1_score:
            self.winner = 1
            return True
        else:
            return False

    # def get_reward(self):
    #     player = self.board.match.player
    #     if winner != -1:
    #         if winner == player:
    #             return torch.tensor(1,device=device, dtype=torch.float32)
    #         elif winner != player:
    #             return torch.tensor(-1,device=device, dtype=torch.float32)
        

Num GPUs Available:  1


In [2]:
model = Sequential([
    Dense(96, activation='relu', input_shape=(31,)), # 31 features for board state
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(1, activation="tanh")  # Output: Estimated value of the state
])

optimizer = Adam(learning_rate=0.00002)

# Compile the model
model.compile(optimizer=optimizer,
              loss='mean_squared_error')

lambda_ = 0.7 # Eligibility trace decay
alpha = 0.1 # Learning rate
gamma = 0.99  # Discount factor

        
    


def td_lambda_update(self, states, rewards, model, alpha=0.00002, gamma=gamma, lambda_=lambda_):
    """
    Perform TD(λ) updates on the model.

    Args:
        states: List of game states (numpy arrays).
        rewards: List of rewards for each state.
        model: The neural network model.
        alpha: Learning rate.
        gamma: Discount factor.
        lambda_: Eligibility trace decay factor.
        
    """
    # Initialize eligibility traces with the same shapes as trainable variables
    eligibility_traces = [tf.zeros_like(var) for var in model.trainable_variables]

    for t in range(len(states) - 1):
        with tf.GradientTape() as tape:
            # Predict current and next state values
            V_t = model(tf.convert_to_tensor([states[t]], dtype=tf.float32))  # Shape: (1, 1)
            V_t_plus_1 = model(tf.convert_to_tensor([states[t + 1]], dtype=tf.float32))  # Shape: (1, 1)

            # Compute TD error (delta_t)
            delta_t = rewards[t] + gamma * tf.squeeze(V_t_plus_1) - tf.squeeze(V_t)

        # Calculate gradients for the current state's value prediction
        gradients = tape.gradient(V_t, model.trainable_variables)

        # Update eligibility traces and model weights
        for i in range(len(model.trainable_variables)):
            # Ensure gradient and eligibility trace shapes match
            if gradients[i] is not None:  # Some variables may not contribute to the gradient
                eligibility_traces[i] = gamma * lambda_ * eligibility_traces[i] + gradients[i]
                
                # Ensure eligibility trace matches the variable's shape
                eligibility_trace = tf.reshape(eligibility_traces[i], model.trainable_variables[i].shape)
                
                # Perform the TD(λ) weight update
                model.trainable_variables[i].assign_add(alpha * delta_t * eligibility_trace)


In [3]:

def play_game(model,random_threshold=0.75):
    states = []
    b = BGBoard()
    
    while not b.is_over():
        position = b.board.position
        current_state = b.state(position)
        # Append the current state (for the current player)
        states.append(current_state)
        
        moves_positions = b.get_moves_and_positions()
        possible_moves = b.get_possible_moves(moves_positions)
        
        
        move_values = []
        
        if moves_positions != {}:
            if random.uniform(0,1)<random_threshold:
                best_move = random.choice(possible_moves)
            
            else:
                    # Create a batch of next states for all possible moves
                next_states = np.array([moves_positions[move] for move in possible_moves])
                
                # Predict values for all possible moves in a single batch
                move_values = model.predict(next_states).flatten()  # Flatten to 1D array
            
                best_move = possible_moves[np.argmax(move_values)]
        else:
            best_move = None
        b.make_move(best_move)
        # print(b.board)
    filter_array = np.array(b.state_players)
    states = np.array(states)
    states_winner = states[filter_array == b.winner]
    states_loser = states[filter_array != b.winner]
    final_rewards_winner = [0 for state_player in b.state_players if state_player == b.winner]
    final_rewards_loser = [0 for state_player in b.state_players if state_player != b.winner]
    final_rewards_winner[-1] = 1
    final_rewards_loser[-1] = -1
    # print("winner")
    # print(b.winner)
    # print(final_rewards_winner)
    # print("loser")
    # print(final_rewards_loser)
    return states_winner, states_loser, final_rewards_winner, final_rewards_loser

    

In [5]:
last_epoch = 909
epochs = 2500
random_threshold = 0.3
for epoch in range(last_epoch, epochs):  # Number of training games
    print(f'progress: {epoch}/{epochs}, threshold: {random_threshold} ',end='\r')
    # Generate a game through self-play
    states_winner,states_loser, rewards_winner, rewards_loser = play_game(model,random_threshold)
    random_threshold += 0.0002
    
    # Perform TD(λ) updates
    td_lambda_update(states_winner, rewards_winner, model)
    td_lambda_update(states_loser,rewards_loser,model)

    # Evaluate the model every 100 games
    if epoch % 100 == 0:
        model.save(f"backgammon_RLmodel_64_02_{epoch}.h5")
    

progress: 909/2500, threshold: 0.48179999999997997 


KeyboardInterrupt



In [4]:
def evaluate_play_game(model_dumb,model_smart):
    states = []
    b = BGBoard()
    i = 1
    while not b.is_over():
        
        if i % 2 == 0:
            model = model_smart
        else:
            model = model_dumb
        i+=1
        position = b.board.position
        current_state = b.state(position)
        # Append the current state (for the current player)
        states.append(current_state)
        
        moves_positions = b.get_moves_and_positions()
        possible_moves = b.get_possible_moves(moves_positions)
        first_player = b.board.match.turn.value
        # if i == 1:
            # print(f'{first_player} is player_dumb')
        move_values = []
        
        if moves_positions != {}:

            # Create a batch of next states for all possible moves
            next_states = np.array([moves_positions[move] for move in possible_moves])
            
            # Predict values for all possible moves in a single batch
            move_values = model.predict(next_states).flatten()  # Flatten to 1D array

            best_move = possible_moves[np.argmax(move_values)]
        else:
            best_move = None
        b.make_move(best_move)
        # print(b.board)
 
    p0_score = b.board.match.player_0_score
    p1_score = b.board.match.player_1_score
    # print(f"dumb's score : {p0_score},smart's score: {p1_score}")
    return p0_score,p1_score
    

In [6]:
model_dumb = tf.keras.models.load_model('./backgammon_RLmodel_64_02_100.h5')
model_smart = tf.keras.models.load_model('./backgammon_RLmodel_64_02_900.h5')
p0_scores = []
p1_scores = []
def calculate_wins(p):
    all_wins = []
    for score in p:
        if score > 0:
            all_wins.append(score)
    return len(all_wins)
for i in range(400):
    p0,p1 = evaluate_play_game(model_dumb,model_smart)
    p0_scores.append(p0)
    p1_scores.append(p1)
    print(f"{calculate_wins(p0_scores)}/{calculate_wins(p1_scores)}", end='\r')
    


179/166


KeyboardInterrupt



In [31]:
print(sum(p0_scores),sum(p1_scores))
all_wins_0 = []
all_wins_1 =  []
def calculate_wins(p):
    all_wins = []
    for score in p:
        if score > 0:
            all_wins.append(score)
    return len(all_wins)

102 140


In [68]:
model_dumb = None
model_smart = None 
del model_dumb,model_smart

In [33]:
print(calculate_wins(p0_scores))
print(calculate_wins(p1_scores))


49
64


In [39]:
b = BGBoard()
# b.start()


In [41]:
print(b.board)

                 Position ID: 4HPwATDgc/ABMA
                 Match ID   : cAltAAAAAAAA
 +13-14-15-16-17-18------19-20-21-22-23-24-+
 | X           O    |   | O              X |
 | X           O    |   | O              X |
 | X           O    |   | O                |
 | X                |   | O                |
 | X                |   | O                |
v|                  |BAR|                  |
 | O                |   | X                |
 | O                |   | X                |
 | O           X    |   | X                |
 | O           X    |   | X              O |
 | O           X    |   | X              O |
 +12-11-10--9--8--7-------6--5--4--3--2--1-+



In [42]:
position = b.board.position
current_state = b.state(position)
# Append the current state (for the current player)
states.append(current_state)

moves_positions = b.get_moves_and_positions()
possible_moves = b.get_possible_moves(moves_positions)
first_player = b.board.match.turn.value
# if i == 1:
    # print(f'{first_player} is player_dumb')
move_values = []

if moves_positions != {}:

    # Create a batch of next states for all possible moves
    next_states = np.array([moves_positions[move] for move in possible_moves])
    
    # Predict values for all possible moves in a single batch
    move_values = model_smart.predict(next_states).flatten()  # Flatten to 1D array

    best_move = possible_moves[np.argmax(move_values)]
else:
    best_move = None

In [47]:
print(possible_moves)
print(move_values)

[((7, 5), (12, 9)), ((12, 10), (10, 7)), ((12, 10), (23, 20)), ((7, 5), (23, 20)), ((23, 21), (7, 4)), ((23, 21), (12, 9)), ((23, 21), (5, 2)), ((5, 3), (23, 20)), ((5, 3), (7, 4)), ((12, 10), (5, 2)), ((5, 3), (12, 9)), ((5, 3), (5, 2)), ((23, 21), (23, 20)), ((7, 5), (7, 4)), ((7, 5), (5, 2)), ((12, 10), (12, 9)), ((12, 10), (7, 4))]
[-0.06178284 -0.08350936 -0.0698778  -0.06428883 -0.04922668 -0.04423356
 -0.05701607 -0.10789806 -0.1091629  -0.07325294 -0.1095074  -0.11514501
 -0.06106399 -0.06847084 -0.06826864 -0.06854022 -0.07058718]


In [11]:
k = 0.7
for i in range(1000):
    k += 0.0002

In [12]:
print(k)

0.8999999999999779


In [None]:
counter = 0
roll = False
winner = None
while winner == None:
    
    possible_moves = []
    if roll == True:
        b.roll()
        
    for play in b.generate_plays():
        pair = []
        for move in play.moves:
            pair.append((move.source,move.destination))
        possible_moves.append(tuple(pair))
    if len(possible_moves) < 1:
        b.skip()
        counter +=1
        print(counter, end='\r')
        continue
    move = random.choice(possible_moves)
    print(move)
    
    b.play(move)
    
    roll = True
    counter +=1
    
    print(b.position.player_off)
    if b.match.player_1_score > 1:
        print(f"player 1 won")
        winner = 1
        break
    elif b.match.player_0_score > 1:
        print(f"player 0 won")
        winner = 0
        break

In [121]:
print(b.match.dice)

(0, 0)


In [195]:
# print(b.match.player_1_score)
for play in b.generate_plays():
    print(play)
    print("#")

Play(moves=(Move(pips=2, source=23, destination=21), Move(pips=5, source=7, destination=2)), position=Position(board_points=(-2, 0, 1, 0, 0, 5, 0, 2, 0, 0, 0, -5, 5, 0, 0, 0, -3, 0, -5, 0, 0, 1, 0, 1), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0))
#
Play(moves=(Move(pips=2, source=23, destination=21), Move(pips=5, source=12, destination=7)), position=Position(board_points=(-2, 0, 0, 0, 0, 5, 0, 4, 0, 0, 0, -5, 4, 0, 0, 0, -3, 0, -5, 0, 0, 1, 0, 1), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0))
#
Play(moves=(Move(pips=2, source=7, destination=5), Move(pips=5, source=12, destination=7)), position=Position(board_points=(-2, 0, 0, 0, 0, 6, 0, 3, 0, 0, 0, -5, 4, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2), player_bar=0, player_off=0, opponent_bar=0, opponent_off=0))
#
Play(moves=(Move(pips=2, source=5, destination=3), Move(pips=5, source=12, destination=7)), position=Position(board_points=(-2, 0, 0, 1, 0, 4, 0, 4, 0, 0, 0, -5, 4, 0, 0, 0, -3, 0, -5, 0, 0, 0, 0, 2), play

In [181]:
b = backgammon.Backgammon()
position = b.position
board_points = list(position.board_points)
p_bar = [position.player_bar]
p_off = [position.player_off]
op_bar = [position.opponent_bar]
op_off = [position.opponent_off]




state_list = board_points+p_bar+p_off+op_bar+op_off
state_array = np.array(state_list)
print(state_array)

[-2  0  0  0  0  5  0  3  0  0  0 -5  5  0  0  0 -3  0 -5  0  0  0  0  2
  0  0  0  0]


In [162]:
b.end_game(1)

backgammon.backgammon.Backgammon('dwJRSwEAAAAAAA', 'cIoEAAAAIAAA')

In [182]:
len(state_array)

28

In [184]:
b = backgammon.Backgammon()
b.first_roll()
possible_moves = []
for play in b.generate_plays():
    pair = []
    for move in play.moves:
        pair.append((move.source,move.destination))
    possible_moves.append(tuple(pair))

In [185]:
possible_moves

[((23, 21), (7, 2)),
 ((23, 21), (12, 7)),
 ((7, 5), (12, 7)),
 ((5, 3), (12, 7)),
 ((5, 3), (7, 2)),
 ((12, 10), (7, 2)),
 ((7, 5), (7, 2)),
 ((12, 10), (12, 7))]

In [186]:
print(b)

                 Position ID: 4HPwATDgc/ABMA
                 Match ID   : cAgVAAAAAAAA
 +13-14-15-16-17-18------19-20-21-22-23-24-+
 | X           O    |   | O              X |
 | X           O    |   | O              X |
 | X           O    |   | O                |
 | X                |   | O                |
 | X                |   | O                |
v|                  |BAR|                  |
 | O                |   | X                |
 | O                |   | X                |
 | O           X    |   | X                |
 | O           X    |   | X              O |
 | O           X    |   | X              O |
 +12-11-10--9--8--7-------6--5--4--3--2--1-+



In [188]:
print(b.match.dice)

(2, 5)


In [88]:
possible_moves = []
for play in b.generate_plays():
    # print(play.moves)
    pair = []
    for move in play.moves:
        pair.append((move.source,move.destination))
    possible_moves.append(tuple(pair))

    
    # play = play[0]
    # first = play[0]
    # second = play [1]
    # pair_1 = (first.source,first.destination)
    # pair_2 = (second.source,second.destination)
    # possible_moves.append((pair_1,pair_2))

        
    
    # print(play.position)
    


###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###
###


In [89]:
print(possible_moves)

[((None, 21), (4, 1), (4, 1), (12, 9)), ((None, 21), (4, 1), (5, 2), (7, 4)), ((None, 21), (12, 9), (9, 6), (12, 9)), ((None, 21), (5, 2), (5, 2), (7, 4)), ((None, 21), (12, 9), (12, 9), (12, 9)), ((None, 21), (5, 2), (7, 4), (7, 4)), ((None, 21), (7, 4), (12, 9), (9, 6)), ((None, 21), (5, 2), (5, 2), (5, 2)), ((None, 21), (4, 1), (4, 1), (5, 2)), ((None, 21), (12, 9), (9, 6), (6, 3)), ((None, 21), (7, 4), (12, 9), (12, 9)), ((None, 21), (4, 1), (12, 9), (12, 9)), ((None, 21), (5, 2), (7, 4), (12, 9)), ((None, 21), (4, 1), (5, 2), (12, 9)), ((None, 21), (7, 4), (7, 4), (12, 9)), ((None, 21), (4, 1), (7, 4), (7, 4)), ((None, 21), (4, 1), (4, 1), (7, 4)), ((None, 21), (4, 1), (5, 2), (5, 2)), ((None, 21), (4, 1), (7, 4), (12, 9)), ((None, 21), (5, 2), (5, 2), (12, 9)), ((None, 21), (5, 2), (12, 9), (9, 6)), ((None, 21), (4, 1), (12, 9), (9, 6)), ((None, 21), (5, 2), (12, 9), (12, 9))]


In [78]:
# b.skip()
pos = ((None, 22), (12, 9))
b.play(pos)
    
    


BackgammonError: Invalid move: sV3CQQRiZ3AARw:cIgNAAAAAAAA ((None, 22), (12, 9))

In [67]:
new_pos= b.position.apply_move(None, 21).apply_move(12, 9)


In [70]:
new_pos.encode()

'sV3CQQRiZ2IADw'

In [70]:
winner = 0
state_players = [0,1,0,1,0,1,0,1,0,1,0,1]
filter_array = np.array(state_players)
if winner == 0:
    final_rewards = [+1 if state_player == 0 else -1 for state_player in state_players]
elif winner == 1:
    final_rewards = [+1 if state_player == 1 else -1 for state_player in state_players]

In [71]:
final_rewards

[1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1]

In [73]:
final_rewards = np.array(final_rewards)
final_rewards[filter_array == 1]

array([-1, -1, -1, -1, -1, -1])

In [68]:
state_players

[0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 10]