In [1]:
import yut.engine
from example_player import ExamplePlayer
import math
import random
import numpy as np
import random

### Minimax YeThu Player

In [29]:
distance_to_goal = np.zeros( yut.rule.FINISHED+1 )
outcomes, probs = yut.rule.enumerate_all_cast_outcomes(depth=5)

for _ in range(10):
	for s in range(yut.rule.FINISHED):
		weighted_sum = 0.0
		for outcome, prob in zip( outcomes, probs ):
			pos = s
			for ys in outcome:
				pos = yut.rule.next_position( pos, ys, True )
			weighted_sum += ( 1 + distance_to_goal[pos] ) * prob 
		distance_to_goal[s] = weighted_sum

class MinimaxYethuPlayer(yut.engine.Player):
    def __init__(self, max_depth=2):
        """
        Initialize the Minimax player with configurable search depth
        
        Args:
            max_depth (int): Maximum depth to search in the game tree
        """
        self.max_depth = max_depth
    
    def _evaluate_board_state(self, my_positions, enemy_positions):
        my_duplicates = [ sum(np == p for np in my_positions) for p in my_positions ]
        enemy_duplicates = [ sum(np == p for np in enemy_positions) for p in enemy_positions ]
        # multipliers = [ 1, 0.85, 0.7, 0.55, 0.4 ] # best aginst random player
        multipliers = [ 1, 1, 0.7, 0.4, 0.3 ] # best aginst example player
     
        return - sum( distance_to_goal[p] * (multipliers[np] if p != 0 else 1) for p,np in zip(my_positions,my_duplicates) ) \
                + sum( distance_to_goal[p] * (multipliers[np] if p != 0 else 1) for p,np in zip(enemy_positions,enemy_duplicates) ) \
    
    def _is_shortcut_possible(self, position):
        shortcut_positions = {15, 22, 29} 
        return position in shortcut_positions
    
    def minimax(self, my_positions, enemy_positions, available_yutscores, depth, is_maximizing):
        """
        Minimax algorithm with robust move validation
        
        Args:
            my_positions (list): Current player's piece positions
            enemy_positions (list): Opponent's piece positions
            available_yutscores (list): Available move distances
            depth (int): Current search depth
            is_maximizing (bool): Whether it's maximizing player's turn
        
        Returns:
            tuple: (best_score, best_move)
        """
        # Terminal conditions
        if depth == 0:
            return self._evaluate_board_state(my_positions, enemy_positions), None
        
        # Determine current player's positions based on turn
        current_positions = my_positions if is_maximizing else enemy_positions
        # other_positions = enemy_positions if is_maximizing else my_positions
        
        best_score = float('-inf') if is_maximizing else float('inf')
        best_move = None
        
        # Filter out invalid move combinations beforehand
        for mal_index, mal_pos in enumerate(current_positions):
            if mal_pos == yut.rule.FINISHED:
                continue
            
    
            for ys in available_yutscores:
                shortcuts = [True, False] if self._is_shortcut_possible(mal_pos) else [False]
                
                for shortcut in shortcuts:        
                    # Simulate move with error handling
                    if is_maximizing:
                        legal_move, next_my_positions, next_enemy_positions, mal_caught = yut.rule.make_move(
                            my_positions, enemy_positions, mal_index, ys, shortcut
                        )
                    else:
                        legal_move, next_enemy_positions, next_my_positions, mal_caught = yut.rule.make_move(
                            enemy_positions, my_positions, mal_index, ys, shortcut
                        )
                    
                    if not legal_move:
                        continue
            
                    # Recursive minimax call
                    scores = []
                    outcomes, probs = yut.rule.enumerate_all_cast_outcomes(depth=1)
                    for outcome, prob in zip(outcomes, probs):
                        for next_ys in outcome:
                            score, _ = self.minimax(next_my_positions, next_enemy_positions, [next_ys], depth - 1, not is_maximizing)
                      
                            scores.append(score) 
                            
                          
                    # Update best score and move
                    score = np.max(scores) if is_maximizing else np.min(scores)
                    
                    if is_maximizing:
                        if score > best_score:
                            best_score = score
                            best_move = (mal_index, ys, shortcut)
                    else:
                        if score < best_score:
                            best_score = score
                            best_move = (mal_index, ys, shortcut)
        
        return best_score, best_move

    def action(self, state):
        """
        Determine the best action for the current game state
        
        Args:
            state (tuple): Current game state 
                           (turn, my_positions, enemy_positions, available_yutscores)
        
        Returns:
            tuple: Move details (mal_index, yut_score, shortcut, extra_info)
        """
        _, my_positions, enemy_positions, available_yutscores = state
        # Run minimax to find best move
        _, best_move = self.minimax(my_positions, enemy_positions, available_yutscores, depth=self.max_depth, is_maximizing=True)
        
        # If no valid move found, return a default move
        if best_move is None:
            # Try to find any legal move
            for mal_index, mal_pos in enumerate(my_positions):
                if mal_pos == yut.rule.FINISHED:
                    continue
                for ys in available_yutscores:
                    for shortcut in [True, False]:
                        legal_move, _, _, _ = yut.rule.make_move(my_positions, enemy_positions, mal_index, ys, shortcut)
                        if legal_move:
                            return mal_index, ys, shortcut, ""
            
            # Absolute fallback
            return 0, available_yutscores[0], True, ""
        
        return best_move[0], best_move[1], best_move[2], ""
    
    
    def name(self):
        return "Robust Minimax Player"
    
    def on_my_action(self, state, my_action, result):
        pass

In [93]:
# create a game engine
engine = yut.engine.GameEngine()

# create two game players
player1 = MinimaxYethuPlayer()
player2 = ExamplePlayer()

# simulate a game between two players with a given random seed
num_trial = 100
win_rate = 0
# random_numbers = [random.randint(1, 10000) for _ in range(num_trial)]
random_numbers = [x for x in range(100)]
for i, random_seed in enumerate(random_numbers):
	winner = engine.play( player1, player2, seed=random_seed )
	if winner == 0:
		win_rate += 1
		print( str(i) + ": Player 1 won!" )
	else:
		print( str(i) + ": Player 2 won!" )

print("Player 1 wins " + str(win_rate) + " times, with winrate of " + str(win_rate/num_trial) )

0: Player 2 won!
1: Player 2 won!
2: Player 2 won!
3: Player 1 won!
4: Player 2 won!
5: Player 2 won!
6: Player 2 won!
7: Player 2 won!
8: Player 2 won!
9: Player 2 won!
10: Player 1 won!
11: Player 1 won!
12: Player 2 won!
13: Player 2 won!
14: Player 2 won!
15: Player 2 won!
16: Player 1 won!
17: Player 2 won!
18: Player 2 won!
19: Player 1 won!
20: Player 1 won!
21: Player 2 won!
22: Player 1 won!
23: Player 1 won!
24: Player 2 won!
25: Player 1 won!
26: Player 1 won!
27: Player 2 won!
28: Player 1 won!
29: Player 1 won!
30: Player 2 won!
31: Player 2 won!
32: Player 1 won!
33: Player 2 won!
34: Player 2 won!
35: Player 2 won!
36: Player 2 won!
37: Player 2 won!
38: Player 1 won!
39: Player 1 won!
40: Player 2 won!
41: Player 1 won!
42: Player 2 won!
43: Player 1 won!
44: Player 2 won!
45: Player 1 won!
46: Player 1 won!
47: Player 2 won!
48: Player 1 won!
49: Player 2 won!
50: Player 2 won!
51: Player 1 won!
52: Player 2 won!
53: Player 1 won!
54: Player 1 won!
55: Player 2 won!
56

### Minimax Akira's Player

In [2]:
import numpy as np
class RandomPlayer(yut.engine.Player):
	def name(self):
		return "Random"

	def action(self, state):
		turn, my_positions, enemy_positions, available_yutscores = state
		yutscore_to_use = np.random.choice( available_yutscores )
		available_mals = []
		if yutscore_to_use == -1:
			available_mals = [ mal_index for mal_index,mal_position in enumerate(my_positions) if mal_position != yut.rule.FINISHED and mal_position != 0 ]
		if len(available_mals) == 0:
			available_mals = [ mal_index for mal_index,mal_position in enumerate(my_positions) if mal_position != yut.rule.FINISHED ]
		mal_to_move = np.random.choice( available_mals )
		shortcut = True
		debug_msg = ""

		return mal_to_move, yutscore_to_use, shortcut, debug_msg

In [46]:
distance_to_goal = np.zeros( yut.rule.FINISHED+1 )

outcomes, probs = yut.rule.enumerate_all_cast_outcomes(depth=2)

for _ in range(10):
	for s in range(yut.rule.FINISHED):
		weighted_sum = 0.0
		for outcome, prob in zip( outcomes, probs ):
			pos = s
			for ys in outcome:
				pos = yut.rule.next_position( pos, ys, True )
			weighted_sum += ( 0.05 + distance_to_goal[pos] ) * prob 
		distance_to_goal[s] = weighted_sum

class MinimaxAkiraPlayer(yut.engine.Player):
    def __init__(self, max_depth=2):
        """
        Initialize the Minimax player with configurable search depth
        
        Args:
            max_depth (int): Maximum depth to search in the game tree
        """
        self.max_depth = max_depth
    
    def _evaluate_board_state(self, my_positions, enemy_positions, mal_caught):
        my_duplicates = [sum(np == p for np in my_positions) for p in my_positions]
        enemy_duplicates = [sum(np == p for np in enemy_positions) for p in enemy_positions]
        multipliers = [1, 1, 0.7, 0.4, 0.3]  # For evaluating based on the distance

        vulnerable_map = {0: [], 1: [], 2: [0], 3:[0, 1], 4: [1, 2], 5: [2, 3], 6:[3, 4], 7:[4], 8:[6], 9: [6,7], 10: [7,8], 
                          11: [], 12:[10], 13:[], 14:[5], 15:[10,11,5,13], 16: [13,14], 17:[14], 18:[8,9], 19:[9], 20:[18], 
                          21:[18,19], 22:[19,20,16], 23:[11,12], 24:[12,15], 25:[20,21,16,17], 26:[17,21,22], 27:[22,25], 28:[25,26], 29:[26,27], 30:[]}

        capture_bonus = 0.05  # Bonus for capturing an enemy's mal
        landing_penalty = -0.01


        # Uncomment for default
        # capture_bonus = 0  # Bonus for capturing an enemy's mal
        # shortcut_bonus = 0  # Bonus for landing on a shortcut position
        # landing_penalty = 0
    
        

        # Initialize evaluation score
        evaluation = 0

        # Evaluate the player's positions
        for p, np in zip(my_positions, my_duplicates):
            multiplier = multipliers[np] if p != 0 else 1
            evaluation -= distance_to_goal[p] * multiplier
            
            # Add bonus for capturing an enemy's mal
            if p in enemy_positions:
                evaluation += capture_bonus
            # Penalize if the player's mal is 2 or 3 tiles ahead of any enemy piece
            for enemy_pos in enemy_positions:
                if enemy_pos in vulnerable_map[p]:
                    evaluation += landing_penalty*len(vulnerable_map[p])
        
        # Evaluate the enemy's positions
        for p, np in zip(enemy_positions, enemy_duplicates):
            multiplier = multipliers[np] if p != 0 else 1
            evaluation += distance_to_goal[p] * multiplier

            # Add penalty for the enemy capturing a mal
            if p in my_positions:
                evaluation += capture_bonus
            # Penalize if the enemy's mal is 2 or 3 tiles ahead of our piece
            for my_pos in my_positions:
                if my_pos in vulnerable_map[p]:
                    evaluation -= landing_penalty*len(vulnerable_map[p])

        return evaluation

    
    def _is_shortcut_possible(self, position):
        shortcut_positions = {15, 22, 29} 
        return position in shortcut_positions
    

    def minimax(self, my_positions, enemy_positions, available_yutscores, depth, alpha, beta, is_maximizing, mal_caught):

        if depth == 0:
            return self._evaluate_board_state(my_positions, enemy_positions, mal_caught), None

        current_positions = my_positions if is_maximizing else enemy_positions

        best_score = float('-inf') if is_maximizing else float('inf')
        best_move = None

        for mal_index, mal_pos in enumerate(current_positions):
            # Skip finished pieces
            if mal_pos == yut.rule.FINISHED:
                continue

            for ys in available_yutscores:
                # shortcut possibility
                shortcuts = [True, False] if self._is_shortcut_possible(mal_pos) else [False]

                for shortcut in shortcuts:
                    # Simulate
                    if is_maximizing:
                        legal_move, next_my_positions, next_enemy_positions, mal_caught = yut.rule.make_move(
                            my_positions, enemy_positions, mal_index, ys, shortcut
                        )
                    else:
                        legal_move, next_enemy_positions, next_my_positions, mal_caught = yut.rule.make_move(
                            enemy_positions, my_positions, mal_index, ys, shortcut
                        )

                    if not legal_move:
                        continue

                    # Recursive call to minimax with Alpha-Beta Pruning
                    scores = []
                    outcomes, probs = yut.rule.enumerate_all_cast_outcomes(depth=1)

                    for outcome, prob in zip(outcomes, probs):
                        for next_ys in outcome:
                            score, _ = self.minimax(
                                next_my_positions, 
                                next_enemy_positions, 
                                [next_ys], 
                                depth - 1, 
                                alpha, 
                                beta, 
                                not is_maximizing,
                                mal_caught
                            )

                            
                            scores.append(score)
                    # Calculate the best score based on the player's role
                    score = np.max(scores) if is_maximizing else np.min(scores)

                    # Update best score and best move
                    if is_maximizing:
                        if score > best_score:
                            best_score = score
                            best_move = (mal_index, ys, shortcut)

                        # Alpha-Beta Pruning
                        alpha = max(alpha, best_score)
                        if alpha >= beta:
                            return best_score, best_move  # Prune remaining branches
                    else:
                        if score < best_score:
                            best_score = score
                            best_move = (mal_index, ys, shortcut)

                        # Alpha-Beta Pruning
                        beta = min(beta, best_score)
                        if beta <= alpha:
                            return best_score, best_move  # Prune remaining branches

        return best_score, best_move

    def action(self, state):
        
        _, my_positions, enemy_positions, available_yutscores = state
        # Run minimax with Alpha-Beta Pruning to find the best move
        _, best_move = self.minimax(
            my_positions, enemy_positions, available_yutscores, 
            depth=self.max_depth, 
            alpha=float('-inf'), 
            beta=float('inf'), 
            is_maximizing=True,
            mal_caught=0
        )

        # If no valid move found, return a default move
        if best_move is None:
            # Try to find any legal move
            for mal_index, mal_pos in enumerate(my_positions):
                if mal_pos == yut.rule.FINISHED:
                    continue
                for ys in available_yutscores:
                    for shortcut in [True, False]:
                        legal_move, _, _, _ = yut.rule.make_move(my_positions, enemy_positions, mal_index, ys, shortcut)
                        if legal_move:
                            return mal_index, ys, shortcut, ""

            # Absolute fallback
            return 0, available_yutscores[0], True, ""

        return best_move[0], best_move[1], best_move[2], ""

    
    
    def name(self):
        return "Minimax Player"

In [47]:
# create a game engine
engine = yut.engine.GameEngine()

# create two game players
player2 = MinimaxAkiraPlayer()
player1 = ExamplePlayer()

# simulate a game between two players with a given random seed
num_trial = 100
win_rate = 0
# random_numbers = [random.randint(1, 10000) for _ in range(num_trial)]
random_numbers = [x for x in range(100)]
for i, random_seed in enumerate(random_numbers):
	winner = engine.play( player1, player2, seed=random_seed )
	if winner == 0:
		win_rate += 1
		print( str(i) + ": Player 1 won!" )
	else:
		
		print( str(i) + ": Player 2 won!" )

print("Player 1 wins " + str(win_rate) + " times, with win rate of " + str(win_rate/num_trial) )

0: Player 2 won!
1: Player 1 won!
2: Player 2 won!
3: Player 2 won!
4: Player 2 won!
5: Player 1 won!
6: Player 1 won!
7: Player 2 won!
8: Player 1 won!
9: Player 2 won!
10: Player 2 won!
11: Player 1 won!
12: Player 2 won!
13: Player 1 won!
14: Player 2 won!
15: Player 2 won!
16: Player 2 won!
17: Player 2 won!
18: Player 2 won!
19: Player 1 won!
20: Player 2 won!
21: Player 2 won!
22: Player 1 won!
23: Player 1 won!
24: Player 2 won!
25: Player 2 won!
26: Player 2 won!
27: Player 2 won!
28: Player 1 won!
29: Player 2 won!
30: Player 2 won!
31: Player 2 won!
32: Player 2 won!
33: Player 2 won!
34: Player 2 won!
35: Player 1 won!
36: Player 1 won!
37: Player 2 won!
38: Player 1 won!
39: Player 1 won!
40: Player 2 won!
41: Player 2 won!


KeyboardInterrupt: 

### Minimax Akira Player (version 2)

In [51]:
distance_to_goal = np.zeros(yut.rule.FINISHED + 1)

proximity_weight = 0.5  # Increased weight for positions closer to the goal

outcomes, probs = yut.rule.enumerate_all_cast_outcomes(depth=2)

for _ in range(10):
    for s in range(yut.rule.FINISHED):
        weighted_sum = 0.0
        for outcome, prob in zip(outcomes, probs):
            pos = s
            for ys in outcome:
                pos = yut.rule.next_position(pos, ys, True)

            # Base distance value
            base_value = 0.05 + distance_to_goal[pos]

            # Apply bonuses and penalties
            weighted_sum += base_value * prob
        distance_to_goal[s] = weighted_sum


class MinimaxAkiraPlayer2(yut.engine.Player):
    def __init__(self, max_depth=2): # Change depth does not really affect performance since it is mostly random dice outcomes

        self.max_depth = max_depth
    
    def _evaluate_board_state(self, my_positions, enemy_positions, mal_caught):
        my_duplicates = [sum(np == p for np in my_positions) for p in my_positions]
        enemy_duplicates = [sum(np == p for np in enemy_positions) for p in enemy_positions]
        multipliers = [1, 1, 0.7, 0.4, 0.3]  # For evaluating based on the distance

        #vulnerable_map (based on statistics, gae and gol appear almost 70% of the time, making 2 or 3 tiles ahead of opponent's mal a risky poition)
        vulnerable_map = {0: [], 1: [], 2: [0], 3:[0, 1], 4: [1, 2], 5: [2, 3], 6:[3, 4], 7:[4], 8:[6], 9: [6,7], 10: [7,8], 
                          11: [], 12:[10], 13:[], 14:[5], 15:[10,11,5,13], 16: [13,14], 17:[14], 18:[8,9], 19:[9], 20:[18], 
                          21:[18,19], 22:[19,20,16], 23:[11,12], 24:[12,15], 25:[20,21,16,17], 26:[17,21,22], 27:[22,25], 28:[25,26], 29:[26,27], 30:[]}

        capture_bonus = 0.07  # Bonus for capturing an enemy's mal (Second priority after finishing the furthest mal first)
        landing_penalty = -0.01

        evaluation = 0

        for p, np in zip(my_positions, my_duplicates):
            multiplier = multipliers[np] if p != 0 else 1
            evaluation -= distance_to_goal[p] * multiplier
            
            # Bonus for capturing mal
            if p in enemy_positions:
                evaluation += capture_bonus
            # Penalty for high risk squre
            for enemy_pos in enemy_positions:
                if enemy_pos in vulnerable_map[p]:
                    evaluation += landing_penalty*len(vulnerable_map[p]) # add penalty proportional to the riskiness
        
        # Evaluate the enemy's positions
        for p, np in zip(enemy_positions, enemy_duplicates):
            multiplier = multipliers[np] if p != 0 else 1
            evaluation += distance_to_goal[p] * multiplier

            # Penalty for capturing mal
            if p in my_positions:
                evaluation -= capture_bonus
            # Bonus for high risk square
            for my_pos in my_positions:
                if my_pos in vulnerable_map[p]:
                    evaluation -= landing_penalty*len(vulnerable_map[p])

        return evaluation

    
    def _is_shortcut_possible(self, position):
        shortcut_positions = {15, 22, 29} 
        return position in shortcut_positions
    

    def minimax(self, my_positions, enemy_positions, available_yutscores, depth, alpha, beta, is_maximizing, mal_caught):

        # Base case
        if depth == 0:
            return self._evaluate_board_state(my_positions, enemy_positions, mal_caught), None

        current_positions = my_positions if is_maximizing else enemy_positions
        best_score = float('-inf') if is_maximizing else float('inf')
        best_move = None

        for mal_index, mal_pos in enumerate(current_positions):

            if mal_pos == yut.rule.FINISHED:
                continue

            for ys in available_yutscores:
                shortcuts = [True, False] if self._is_shortcut_possible(mal_pos) else [False]

                for shortcut in shortcuts:
                    # Simulate the move
                    if is_maximizing:
                        legal_move, next_my_positions, next_enemy_positions, mal_caught = yut.rule.make_move(
                            my_positions, enemy_positions, mal_index, ys, shortcut
                        )
                    else:
                        legal_move, next_enemy_positions, next_my_positions, mal_caught = yut.rule.make_move(
                            enemy_positions, my_positions, mal_index, ys, shortcut
                        )

                    if not legal_move:
                        continue

                    # Recursive call to minimax with Alpha-Beta Pruning
                    scores = []
                    outcomes, probs = yut.rule.enumerate_all_cast_outcomes(depth=1) # can be changed, depth 3 would be the best but it takes too long. Depth 1 and 2 show no significnt difference

                    for outcome, prob in zip(outcomes, probs):
                        for next_ys in outcome:
                            score, _ = self.minimax(
                                next_my_positions, 
                                next_enemy_positions, 
                                [next_ys], 
                                depth - 1, 
                                alpha, 
                                beta, 
                                not is_maximizing,
                                mal_caught
                            )

                            
                            scores.append(score)
                    score = np.max(scores) if is_maximizing else np.min(scores)

                    # Update best score and best move
                    if is_maximizing:
                        if score > best_score:
                            best_score = score
                            best_move = (mal_index, ys, shortcut)

                        # Alpha-Beta Pruning
                        alpha = max(alpha, best_score)
                        if alpha >= beta:
                            return best_score, best_move  # Prune branches
                    else:
                        if score < best_score:
                            best_score = score
                            best_move = (mal_index, ys, shortcut)

                        # Alpha-Beta Pruning
                        beta = min(beta, best_score)
                        if beta <= alpha:
                            return best_score, best_move  # Prune branches

        return best_score, best_move

    def action(self, state):
        """
        Determine the best action for the current game state.
        
        Args:
            state (tuple): Current game state 
                        (turn, my_positions, enemy_positions, available_yutscores)
        
        Returns:
            tuple: Move details (mal_index, yut_score, shortcut, extra_info)
        """
        _, my_positions, enemy_positions, available_yutscores = state
        # Run minimax with Alpha-Beta Pruning to find the best move
        _, best_move = self.minimax(
            my_positions, enemy_positions, available_yutscores, 
            depth=self.max_depth, 
            alpha=float('-inf'), 
            beta=float('inf'), 
            is_maximizing=True,
            mal_caught=0
        )

        # If no valid move found, return a default move
        if best_move is None:
            # Try to find any legal move
            for mal_index, mal_pos in enumerate(my_positions):
                if mal_pos == yut.rule.FINISHED:
                    continue
                for ys in available_yutscores:
                    for shortcut in [True, False]:
                        legal_move, _, _, _ = yut.rule.make_move(my_positions, enemy_positions, mal_index, ys, shortcut)
                        if legal_move:
                            return mal_index, ys, shortcut, ""

            # Absolute fallback
            return 0, available_yutscores[0], True, ""

        return best_move[0], best_move[1], best_move[2], ""

    
    
    def name(self):
        return "Minimax Player"

In [35]:
# create a game engine
engine = yut.engine.GameEngine()

# create two game players
player1 = MinimaxAkiraPlayer()
player2 = ExamplePlayer()

# simulate a game between two players with a given random seed
num_trial = 100
win_rate = 0
# random_numbers = [random.randint(1, 10000) for _ in range(num_trial)]
random_numbers = [x for x in range(100)]
for i, random_seed in enumerate(random_numbers):
	winner = engine.play( player1, player2, seed=random_seed )
	if winner == 0:
		win_rate += 1
		print( str(i) + ": Player 1 won!" )
	else:
		
		print( str(i) + ": Player 2 won!" )

print("Player 1 wins " + str(win_rate) + " times, with win rate of " + str(win_rate/num_trial) )

0: Player 2 won!
1: Player 2 won!
2: Player 2 won!
3: Player 2 won!
4: Player 2 won!
5: Player 2 won!
6: Player 2 won!
7: Player 2 won!
8: Player 2 won!
9: Player 1 won!
10: Player 1 won!
11: Player 1 won!
12: Player 2 won!
13: Player 2 won!
14: Player 2 won!
15: Player 2 won!
16: Player 1 won!
17: Player 2 won!
18: Player 1 won!
19: Player 1 won!
20: Player 1 won!
21: Player 2 won!
22: Player 1 won!
23: Player 2 won!
24: Player 2 won!
25: Player 1 won!
26: Player 1 won!
27: Player 2 won!
28: Player 1 won!
29: Player 1 won!
30: Player 2 won!
31: Player 2 won!
32: Player 1 won!
33: Player 2 won!
34: Player 2 won!
35: Player 2 won!
36: Player 2 won!
37: Player 2 won!
38: Player 1 won!
39: Player 1 won!
40: Player 2 won!
41: Player 1 won!
42: Player 2 won!
43: Player 1 won!
44: Player 2 won!
45: Player 1 won!
46: Player 1 won!
47: Player 2 won!
48: Player 1 won!
49: Player 2 won!
50: Player 2 won!
51: Player 1 won!
52: Player 1 won!
53: Player 1 won!
54: Player 1 won!
55: Player 2 won!
56

In [19]:
distance_to_goal = np.zeros(yut.rule.FINISHED + 1)

proximity_weight = 0.5  # Increased weight for positions closer to the goal

outcomes, probs = yut.rule.enumerate_all_cast_outcomes(depth=2)

for _ in range(10):
    for s in range(yut.rule.FINISHED):
        weighted_sum = 0.0
        for outcome, prob in zip(outcomes, probs):
            pos = s
            for ys in outcome:
                pos = yut.rule.next_position(pos, ys, True)

            # Base distance value
            base_value = 0.05 + distance_to_goal[pos]

            # Apply bonuses and penalties
            weighted_sum += base_value * prob
        distance_to_goal[s] = weighted_sum


class HeuristicPlayer(yut.engine.Player):
    def name(self):
        return "HeuristicImproved"
    
    def evaluate_score(self, my_positions, enemy_positions, throw_again):
        my_duplicates = [sum(np == p for np in my_positions) for p in my_positions]
        enemy_duplicates = [sum(np == p for np in enemy_positions) for p in enemy_positions]
        multipliers = [1, 1, 0.7, 0.4, 0.3]

        vulnerable_map = {0: [], 1: [], 2: [0], 3:[0, 1], 4: [1, 2], 5: [2, 3], 6:[3, 4], 7:[4], 8:[6], 9: [6,7], 10: [7,8], 
                          11: [], 12:[10], 13:[], 14:[5], 15:[10,11,5,13], 16: [13,14], 17:[14], 18:[8,9], 19:[9], 20:[18], 
                          21:[18,19], 22:[19,20,16], 23:[11,12], 24:[12,15], 25:[20,21,16,17], 26:[17,21,22], 27:[22,25], 28:[25,26], 29:[26,27], 30:[]}
        # Indicates which tile is risky towards other tiles

        capture_bonus = 0.07  # Bonus for capturing an enemy's mal
        shortcut_bonus = 0  # Bonus for landing on a shortcut position
        landing_penalty = -0.01

    
        evaluation = 0

        # Evaluate the player's positions
        for p, np in zip(my_positions, my_duplicates):
            multiplier = multipliers[np] if p != 0 else 1
            evaluation -= distance_to_goal[p] * multiplier
            
            # Add bonus for capturing an enemy's mal
            if throw_again>0:
                evaluation += capture_bonus
            # Penalize risky tiles
            for enemy_pos in enemy_positions:
                if enemy_pos in vulnerable_map[p]:
                    evaluation += landing_penalty*len(vulnerable_map[p])
        
        # Evaluate the enemy's positions
        for p, np in zip(enemy_positions, enemy_duplicates):
            multiplier = multipliers[np] if p != 0 else 1
            evaluation += distance_to_goal[p] * multiplier

            # Add penalty for the enemy capturing our mal
            if throw_again>0:
                evaluation -= capture_bonus
            # Penalize risky tiles
            for my_pos in my_positions:
                if my_pos in vulnerable_map[p]:
                    evaluation -= landing_penalty*len(vulnerable_map[p])

        return evaluation
    
    def _is_shortcut_possible(self, position):
        shortcut_positions = {15, 22, 29} 
        return position in shortcut_positions

    def action(self, state):
        turn, my_positions, enemy_positions, available_yutscores = state

        # Evaluate all possible moves and choose the best one
        best_score = -float('inf')
        best_action = None

        for mi, mp in enumerate(my_positions):
            if mp == yut.rule.FINISHED:
                continue
            for ys in available_yutscores:
                shortcuts = [True, False] if self._is_shortcut_possible(mi) else [False]

                for shortcut in shortcuts:
                    legal_move, next_my_positions, next_enemy_positions, num_mals_caught = yut.rule.make_move(
                        my_positions, enemy_positions, mi, ys, shortcut
                    )
                    if legal_move:
                        # Calculate the score for this move
                        throw_again = num_mals_caught > 0
                        score = self.evaluate_score(next_my_positions, next_enemy_positions, throw_again)

                        # Update best move
                        if score > best_score:
                            best_score = score
                            best_action = (mi, ys, shortcut)

        # Return the best action
        if best_action is not None:
            return best_action[0], best_action[1], best_action[2], ""

        # Fallback (no legal moves)
        return 0, available_yutscores[0], False, ""

In [66]:
# create a game engine
engine = yut.engine.GameEngine()

# create two game players
player2 = MinimaxAkiraPlayer()
player1 = MinimaxAkiraPlayer2()

# simulate a game between two players with a given random seed
num_trial = 100
win_rate = 0
# random_numbers = [random.randint(1, 10000) for _ in range(num_trial)]
random_numbers = [x for x in range(num_trial)]
for i, random_seed in enumerate(random_numbers):
	winner = engine.play( player1, player2, seed=random_seed )
	if winner == 0:
		win_rate += 1
		print( str(i) + ": Player 1 won!" )
	else:
		print( str(i) + ": Player 2 won!" )

print("Player 1 wins " + str(win_rate) + " times, with winrate of " + str(win_rate/num_trial) )

0: Player 1 won!
1: Player 1 won!
2: Player 2 won!
3: Player 2 won!
4: Player 2 won!
5: Player 1 won!
6: Player 1 won!
7: Player 2 won!
8: Player 2 won!
9: Player 2 won!
10: Player 1 won!
11: Player 1 won!
12: Player 2 won!
13: Player 1 won!
14: Player 2 won!
15: Player 2 won!
16: Player 2 won!
17: Player 2 won!
18: Player 2 won!
19: Player 1 won!
20: Player 2 won!
21: Player 2 won!
22: Player 2 won!
23: Player 1 won!
24: Player 2 won!
25: Player 1 won!
26: Player 2 won!
27: Player 2 won!
28: Player 1 won!
29: Player 1 won!
30: Player 2 won!
31: Player 2 won!
32: Player 2 won!
33: Player 2 won!
34: Player 2 won!
35: Player 1 won!
36: Player 1 won!
37: Player 2 won!
38: Player 1 won!
39: Player 1 won!
40: Player 2 won!
41: Player 1 won!
42: Player 2 won!
43: Player 1 won!
44: Player 2 won!
45: Player 1 won!
46: Player 1 won!
47: Player 2 won!
48: Player 1 won!
49: Player 2 won!
50: Player 2 won!
51: Player 2 won!
52: Player 1 won!
53: Player 2 won!
54: Player 1 won!
55: Player 2 won!
56