/
_11_submission.py
91 lines (72 loc) · 2.8 KB
/
_11_submission.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import numpy as np
import os
try:
import tensorflow as tf
except ImportError:
os.system('pip install tensorflow')
import tensorflow as tf
from tensorflow.keras import models
try:
import gsutils
except ImportError:
os.system('pip install gsutil')
MODEL_Q = 0 # global variable to checking if model is defined
# original agent
def act(observation, configuration):
board = observation.board
columns = 7 # configuration.columns
return [c for c in range(columns) if board[c] == 0][0]
def agent(observation, configuration):
rows = 6 # configuration['rows']
columns = 7 # configuration['columns']
global MODEL_Q
# converting state to 2D array
state = states_converter(observation, rows=rows, columns=columns, convolution=True)
if not state.shape == (1, rows, columns, 1):
print("State array shape error, state shape =", state.shape)
if not os.path.exists('./model_action_predictor.h5'):
os.system("gsutil cp gs://bert-pl/ConnectX/model_action_predictor.h5 ./")
while not os.path.exists('./model_action_predictor.h5'):
pass
if not isinstance(MODEL_Q, tf.keras.Model): # if model variable not exist
MODEL_Q = models.load_model('./model_action_predictor.h5')
# predict Q-values for current state
prediction = MODEL_Q.predict(state)[0]
# Choose maximum Q-value action
action = np.argmax(prediction)
action = np.int16(action).item() # converting numpy int to native python int
action = get_available_action(state, action, rows) # check if column is full
return action
# This function converts obserwation from form:
# {'board': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 2, 1, 0, 0, 2, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 1, 1, 1, 2], 'mark': 1}
# to 2D numpy array where 1- own marks, -1 oponent marks
def states_converter(observation, rows, columns, convolution=False):
board = observation['board']
board = np.array(board)
mark = observation['mark']
if mark == 1:
board[board == 1] = 5
board[board == 2] = -5
if mark == 2:
board[board == 1] = -5
board[board == 2] = 5
board = board / 5
state = np.expand_dims(board, axis=0)
state = state.reshape((1, rows, columns))
if convolution:
state = np.expand_dims(state, axis=3)
return state
# check if column is full
def get_available_action(state, action, rows=6):
# reducink dimmensions
single_board = np.squeeze(state, axis=3)
single_board = np.squeeze(single_board, axis=0)
# find columns without zero values
occurencies = np.count_nonzero(single_board, axis=0)
idx = np.argwhere(occurencies < rows).squeeze()
if action in idx:
pass
else:
action = np.random.choice(idx)
action = np.int16(action).item()
return action