-
Notifications
You must be signed in to change notification settings - Fork 0
/
agents.py
401 lines (308 loc) · 13.2 KB
/
agents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
from abc import ABC, abstractmethod
from os import environ
import random
import time
environ['PYGAME_HIDE_SUPPORT_PROMPT'] = '1'
from pygame import MOUSEBUTTONDOWN
from heuristics import HEURISTICS
from bitboard_utils import get_moves_index, possible_moves, get_player_board
from constants import CELL_SIZE, INT16_NEGINF, INT16_POSINF, MCTS_BATCH_SIZE
import minmax
import search_tree
class Player(ABC):
"""
Abstract base class for a player in the Othello game.
Attributes:
id (int): The identifier for the player, either 1 or 2.
"""
def __init__(self):
self.id = None
def __repr__(self):
"""
Returns a string representation of the Player object.
Returns:
str: A string representation including the player's class name and ID.
"""
class_name = self.__class__.__name__
return f"{class_name}(id={self.id})"
def set_id(self, player_id):
"""
Sets the player's ID.
Parameters:
player_id (int): The ID to be set for the player.
"""
if player_id != 1 and player_id != 2:
raise ValueError("Player ID must be either 1 or 2.")
self.id = player_id
@abstractmethod
def get_move(self, board, events):
"""
Abstract method to be implemented by subclasses to get the player's move.
Parameters:
board (UniTuple(uint64, 2)): The current state of the game board.
events (list of pygame.event.Event): A list of Pygame events.
Returns:
int: The chosen move as a bitboard index, or None if no valid move is chosen.
"""
raise NotImplementedError('You must implement get_move()')
class Agent(Player):
"""
Class for a AI Agent in the Othello game.
"""
@abstractmethod
def copy(self):
"""
Returns a new instance of the Player with the same parameters as the current instance.
Used for simulations.
Returns:
Agent: A new instance with the same parameters.
"""
raise NotImplementedError('You must implement copy()')
class HumanPlayer(Player):
"""
Class for a human player in the Othello game.
"""
def get_move(self, board, events):
"""
Gets the move from the human player based on mouse input.
Parameters:
board (UniTuple(uint64, 2)): The current state of the game board.
events (list of pygame.event.Event): A list of Pygame events.
Returns:
int: The chosen move as a bitboard index, or None if no valid move is chosen.
"""
player_bb, opponent_bb = get_player_board(board, self.id)
empty_squares = (player_bb | opponent_bb) ^ 0xFFFFFFFFFFFFFFFF
possible_moves_bb_player = possible_moves(player_bb, opponent_bb, empty_squares)
valid_moves = get_moves_index(possible_moves_bb_player)
for event in events:
if event.type == MOUSEBUTTONDOWN and event.button == 1:
x, y = event.pos
row = y // CELL_SIZE
col = x // CELL_SIZE
bb_indice = row * 8 + col
if bb_indice in valid_moves:
return bb_indice
return None
class RandomAgent(Agent):
"""
Class for a Random Agent in the Othello game.
"""
def __init__(self, verbose=False):
self.verbose = verbose
def get_move(self, board, events):
"""
Gets a random move from the possible moves.
Parameters:
board (UniTuple(uint64, 2)): The current state of the game board.
events (list of pygame.event.Event): A list of Pygame events.
Returns:
int: The chosen move as a bitboard index, or None if no valid move is chosen.
"""
player_bb, opponent_bb = get_player_board(board, self.id)
empty_squares = (player_bb | opponent_bb) ^ 0xFFFFFFFFFFFFFFFF
possible_moves_bb_player = possible_moves(player_bb, opponent_bb, empty_squares)
valid_moves = get_moves_index(possible_moves_bb_player)
move = None
if valid_moves.shape[0] > 0:
move = random.choice(valid_moves)
if self.verbose == True:
print(f"Player {self.id} --> {move}")
return move
def copy(self):
"""
Returns a new instance of RandomAgent.
Returns:
RandomAgent: A new instance with the same parameters.
"""
return RandomAgent()
class MinmaxAgent(Agent):
"""
Class for a MinMax Agent in the Othello game.
"""
def __init__(self, depth = 5, time_limit = None, heuristic=HEURISTICS.HYBRID, verbose=False):
"""
Initializes the MinimaxAgent with a specified search depth.
Parameters:
depth (int): The depth to which the Minimax algorithm will search.
time_limit (float or int): If defined, Iterative Deepening will be applied with this as time constraint.
verbose (bool): If True, prints debug information during search.
"""
self.depth = depth
self.time_limit = time_limit
self.heuristic = heuristic
self.verbose = verbose
self.bot = None
def get_move(self, board, events):
"""
Determines the best move for the player using the Minimax algorithm.
Parameters:
board (UniTuple(uint64, 2)): The current game board.
events (list): A list of game events.
Returns:
int: The best move for the player.
"""
if self.bot is None:
self.bot = minmax.Minmax(self.id, self.heuristic)
if self.time_limit:
best_score, best_move = self.iterative_deepening_timed(board)
else:
best_score, best_move = self.bot.negamax(board, self.depth, INT16_NEGINF, INT16_POSINF, 1)
if self.verbose:
print(f"Player {self.id} --> {best_move}/{best_score:<6}")
return best_move
def iterative_deepening_timed(self, board):
"""
Adds the Iterative Deepening Process to the Negamax algorithm with time constraint.
Parameters:
board (UniTuple(uint64, 2)): The current game board.
Returns:
tuple: A tuple containing the evaluation score and the best move.
"""
start_time = time.perf_counter()
best_move = None
best_score = 0
reached_depth = -1
times = []
for depth in range(1, 20):
iteration_start_time = time.perf_counter()
best_score, best_move = self.mtdf(board, best_score, depth)
reached_depth = depth
iteration_end_time = time.perf_counter()
iteration_time = iteration_end_time - iteration_start_time
times.append(iteration_time)
elapsed_time = iteration_end_time - start_time
remaining_time = self.time_limit - elapsed_time
# Copmute the geometric progression to estimate the time for the next iteration based on the times of previous iterations.
if len(times) > 1:
ratios = [times[i] / times[i - 1] for i in range(1, len(times))]
avg_ratio = sum(ratios) / len(ratios)
else:
avg_ratio = 2 # Assume an initial doubling if we don't have enough data yet
estimated_next_iteration_time = times[-1] * avg_ratio
if remaining_time < estimated_next_iteration_time + 0.1:
break
if self.verbose:
print(f"Player {self.id} --> {best_move}/{best_score:<6} (time: {time.perf_counter() - start_time:<5.2f}, reached depth: {reached_depth:<2})")
return best_score, best_move
def mtdf(self, board, f, depth):
"""
Implements the MTD(f) algorithm to determine the best move.
Parameters:
board (UniTuple(uint64, 2)): The current game board.
f (int16): The first guess for the best value.
depth (int16): The maximum search depth.
Returns:
tuple: A tuple containing the evaluation score and the best move.
"""
g = f
best_move = -1
upperBound = INT16_POSINF
lowerBound = INT16_NEGINF
while lowerBound < upperBound:
beta = max(g, lowerBound + 1)
g, move = self.bot.negamax(board, depth, beta - 1, beta, 1)
if g < beta:
upperBound = g
else:
lowerBound = g
best_move = move
return g, best_move
def copy(self):
"""
Returns a new instance of MinimaxAgent with the same parameters as the current instance.
Returns:
MinimaxPlayer: A new instance with the same parameters.
"""
player_copy = MinmaxAgent(
depth=self.depth,
time_limit=self.time_limit,
heuristic = self.heuristic,
verbose=self.verbose
)
player_copy.set_id(self.id)
return player_copy
class MCTSAgent(Agent):
"""
Class for a Monte Carlo Tree Search (MCTS) Agent.
"""
def __init__(self, time_limit = None, nb_iterations = 100000, nb_rollouts = 1, c_param = 1.4, verbose = False):
"""
Initializes the MCTSAgent with specified parameters.
Parameters:
time_limit (float): The time limit for the MCTS search in seconds.
nb_iterations (int): The number of iterations for the MCTS search.
nb_rollouts (int): The number of rollouts for the MCTS search.
c_param (float): The exploration parameter for the MCTS search.
verbose (bool): If True, prints debug information during search.
"""
self.time_limit = time_limit
self.nb_iterations = nb_iterations
self.verbose = verbose
self.c_param = c_param
self.nb_rollouts = nb_rollouts
self.tree = None
def copy(self):
"""
Returns a new instance of MCTSAgent with the same parameters as the current instance.
Returns:
MCTSAgent: A new instance with the same parameters.
"""
player_copy = MCTSAgent(
time_limit = self.time_limit,
nb_iterations=self.nb_iterations,
c_param=self.c_param,
nb_rollouts=self.nb_rollouts,
verbose=self.verbose
)
player_copy.set_id(self.id)
return player_copy
def timed_search(self, board):
"""
Conducts a timed MCTS search to determine the best move.
Parameters:
board (UniTuple(uint64, 2)): The current game board.
Returns:
int: The best move for the player.
"""
player_board, opponent_board = get_player_board(board, self.id)
best_move = -1
nb_loops = 0
avg_exec_time = 0
root = search_tree.define_root(self.tree, player_board, opponent_board)
global_start_time = time.perf_counter()
last_loop_end = global_start_time
while last_loop_end-global_start_time < self.time_limit-avg_exec_time:
search_tree.search_batch(self.tree, root, nb_rollouts=self.nb_rollouts, c_param=self.c_param)
nb_loops += 1
loop_end = time.perf_counter()
loop_time = loop_end - last_loop_end
last_loop_end = loop_end
if avg_exec_time == 0:
avg_exec_time = loop_time
else:
avg_exec_time = (avg_exec_time*(nb_loops-1) + loop_time)/nb_loops
best_move = self.tree.moves[search_tree.best_child(self.tree, root, c_param=0)]
if self.verbose:
print(f"Player {self.id} --> move:{best_move:<2} ({time.perf_counter()-global_start_time:>6.2f} sec, {nb_loops*MCTS_BATCH_SIZE:<7} iterations, {self.nb_rollouts} rollouts)")
return best_move
def get_move(self, board, events):
"""
Determines the best move for the player using the MCTS algorithm.
Parameters:
board (UniTuple(uint64, 2)): The current game board.
events (list): A list of game events.
Returns:
int: The best move for the player.
"""
if self.tree is None:
self.tree = search_tree.SearchTree()
player_board, opponent_board = get_player_board(board, self.id)
start_time = time.perf_counter()
if self.time_limit is not None:
best_move = self.timed_search(board)
else:
best_move = search_tree.search(self.tree, player_board, opponent_board, self.nb_iterations, self.nb_rollouts, self.c_param)
if self.verbose:
print(f"Player {self.id} --> move:{best_move:<2} ({time.perf_counter()-start_time:>6.2f} sec, {self.nb_iterations} iterations, {self.nb_rollouts} rollouts)")
return best_move