-
Notifications
You must be signed in to change notification settings - Fork 1
/
solver_on_solver.py
138 lines (116 loc) · 3.85 KB
/
solver_on_solver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""
Play matches between two connect four solvers with different temperatures.
See solver implementation at 'solver_bot.py'
"""
from absl import app
import numpy as np
from itertools import combinations
import os
from shutil import copyfile
from open_spiel.python.utils import spawn
import AZ_helper_lib as AZh
import utils
# Specify the game:
game = 'pentago'
# Save all logs to:
path_logs = '/path/to/logs/directory/' + game + '_solver/'
# Create the directory, then copy the config.json file into it: (otherwise crashes)
if not os.path.exists(path_logs):
os.makedirs(path_logs)
copyfile('./config.json', path_logs + '/config.json')
def mat2str(matrix):
return np.array2string(matrix, separator=',', max_line_width=np.inf)
def set_config(model, temp1=None, temp2=None):
path_model_1 = './models/' + game + '/' + model + '/'
config = AZh.Config(
game="connect_four", # <====== change game here
MC_matches=False,
path=path_logs,
path_model_1=path_model_1,
path_model_2=path_model_1,
checkpoint_number_1=None,
checkpoint_number_2=None,
use_solver=False,
use_two_solvers=True,
solver_1_temp=temp1,
solver_2_temp=temp2,
logfile='matches',
learning_rate=0,
weight_decay=0,
temperature=0.25,
evaluators=80,
uct_c=2,
max_simulations=300,
policy_alpha=0.5, # was 0
evaluation_games=10,
evaluation_window=10,
nn_model=None,
nn_width=None,
nn_depth=None,
observation_shape=None,
output_size=None,
quiet=True,
)
return config
def main(unused_argv):
# List of temperatures used, mostly exponential but with a higher density between 0.25-3
# because performance becomes much more sensitive to temperature change at that region.
temperatures = np.array([
0,
0.1,
0.16218100973589297,
0.26302679918953814,
0.33496543915782767,
0.42657951880159267,
0.5432503314924332,
0.6918309709189364,
0.8810488730080143,
1.1220184543019636,
1.428893958511103,
1.8197008586099834,
2.3173946499684783,
2.9512092266663856,
4.786300923226383,
7.76247116628692,
12.589254117941675,
20.417379446695296,
33.11311214825911,
53.70317963702527,
87.09635899560806,
141.2537544622754,
229.08676527677724,
371.5352290971724,
602.5595860743581,
977.2372209558112]
)
n = len(temperatures)
matches = np.zeros([n, n])
timer = utils.Timer()
timer.go()
textfile = open(path_logs + "temperatures.txt", "w")
for t in temperatures:
textfile.write(str(t) + "\n")
textfile.close()
for pair in combinations(range(n), 2):
temp_1 = temperatures[pair[0]]
temp_2 = temperatures[pair[1]]
# Loading q_0_0 but not using it.
config = set_config('q_0_0', temp_1, temp_2)
AZh.run_matches(config)
n_evaluators = config.evaluators
score = 0
for ev in range(n_evaluators):
with open(config.path + 'log-' + config.logfile + '-' + str(ev) + '.txt') as f:
lines = f.readlines()
score += float(lines[-2][-7:-2])
score = score / n_evaluators
matches[pair] += (float(score) + 1) / 2
timer.stop()
matches = matches + np.tril(np.ones([n, n]) - matches.transpose(), -1)
print('Matches:', matches)
# Save matrix to file. Not as text since it's too big.
with open(path_logs + "/matrix.npy", 'wb') as f:
np.save(f, matches)
if __name__ == "__main__":
with spawn.main_handler():
app.run(main)