In [1]:
import numpy as np
import tensorflow as tf
import random
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPool2D, Dropout, Input
from tensorflow.keras import Model
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.regularizers import l2
from sklearn.utils import shuffle

import Dataset_pb2

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
BOARD_SIZE = 9;
CHANNELS = 2;

In [3]:
data_set = Dataset_pb2.DataSet()
f = open('../resources/dataset_mcts400_forced3_all.small.bin', "rb")
data_set.ParseFromString(f.read())
f.close()

In [4]:
def toHWC(board_state):
    return np.moveaxis(board_state, 0, 2)

In [5]:
NUMBER_OF_TEST_GAMES = 100
test_data = []
train_data = []
for data_instance in data_set.data:
    instance = {
        'game_id': data_instance.game_id,
        'state': toHWC(np.array(data_instance.state).reshape(CHANNELS, BOARD_SIZE, BOARD_SIZE)),
        'policy': np.array(data_instance.policy).reshape(BOARD_SIZE, BOARD_SIZE),
        'value': data_instance.value
    }
    if(instance['game_id']<NUMBER_OF_TEST_GAMES):
        test_data.append(instance)
    else:
        train_data.append(instance)
    total_games = instance['game_id'] + 1

In [6]:
total_games

1000

In [7]:
def transform_data(data):
    states = [];
    values = [];
    policies = [];
    for instance in data:
        state = instance['state']
        value = instance['value']
        policy = instance['policy']
#         states.append(state)
#         policies.append(policy)
#         values.append(value)
                  
        for flip in range(0, 2):
            for rot in range(0, 4):
                flipped_state = state
                flipped_policy = policy
                if(flip==1):
                    flipped_state = np.flipud(flipped_state)
                    flipped_policy = np.flipud(flipped_policy)

                rotated_state = np.rot90(flipped_state, k=rot)
                rotated_policy = np.rot90(flipped_policy, k=rot)

                states.append(rotated_state)
                policies.append(rotated_policy)
                values.append(value)
                
    states = np.array(states)
    policies = np.array(policies).reshape(len(policies), BOARD_SIZE*BOARD_SIZE)
    values = np.array(values)
    
    return states, values, policies

In [8]:
states_test, values_test, policies_test = transform_data(test_data)
states_train, values_train, policies_train = transform_data(train_data)

In [12]:
# train_ds = tf.data.Dataset.from_tensor_slices((states_3d, values)).shuffle(10000).batch(32)

In [14]:
# class CNNModel(Model):
#     def __init__(self):
#         super(Model, self).__init__()
#         self.conv1 = Conv2D(32, 3, padding='same', activation='relu')
#         self.pool1 = MaxPool2D((2,2))
#         self.conv2 = Conv2D(64, 3, padding='same', activation='relu')
#         self.pool2 = MaxPool2D((2,2))
#         self.flatten = Flatten()
#         self.d1 = Dense(512, activation='relu')
#         self.dropout1 = Dropout(0.4)
#         self.d2 = Dense(128, activation='relu')
#         self.dropout2 = Dropout(0.4)
#         self.d3 = Dense(43, activation='softmax')

#     def call(self, x):
#         x = self.conv1(x)
#         x = self.pool1(x)
#         x = self.conv2(x)
#         x = self.pool2(x)
#         x = self.flatten(x)
#         x = self.d1(x)
#         x = self.dropout1(x)
#         x = self.d2(x)
#         x = self.dropout2(x)
#         x = self.d3(x)
#         return x
# model = CNNModel()
L2_VALUE = 1e-4  # coef of l2 penalty 

model = models.Sequential()
# model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(2, 9, 9)))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))
# model.add(layers.Conv2D(128, (3, 3), activation='relu'))

model.add(layers.Conv2D(32, activation="relu", kernel_size=(3, 3),
                 input_shape=(BOARD_SIZE, BOARD_SIZE, CHANNELS),
                 data_format="channels_last",
                 padding='same'))
model.add(layers.Conv2D(32, activation="relu", kernel_size=(3, 3),
                 data_format="channels_last",
                 padding='same'))
model.add(layers.MaxPooling2D((2, 2), data_format="channels_last"))
model.add(layers.Conv2D(64 * 2, activation="relu", kernel_size=(3, 3),
                 data_format="channels_last",
                 padding='same'))
model.add(layers.Conv2D(64 * 2, activation="relu", kernel_size=(3, 3),
                 data_format="channels_last",
                 padding='same'))
model.add(layers.MaxPooling2D((2, 2), data_format="channels_last"))
model.add(layers.Flatten())
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1, activation='tanh'))

# in_x = network = Input((BOARD_SIZE, BOARD_SIZE, CHANNELS),)
# # conv layers
# network = Conv2D(filters=32, kernel_size=(3, 3), padding="same", data_format="channels_last", activation="relu", kernel_regularizer=l2(L2_VALUE))(network)
# network = Conv2D(filters=64, kernel_size=(3, 3), padding="same", data_format="channels_last", activation="relu", kernel_regularizer=l2(L2_VALUE))(network)
# network = Conv2D(filters=128, kernel_size=(3, 3), padding="same", data_format="channels_last", activation="relu", kernel_regularizer=l2(L2_VALUE))(network)
# # action policy layers
# policy_net = Conv2D(filters=4, kernel_size=(1, 1), data_format="channels_last", activation="relu", kernel_regularizer=l2(L2_VALUE))(network)
# policy_net = Flatten()(policy_net)
# policy_net = Dense(BOARD_SIZE*BOARD_SIZE, activation="softmax", name="policy_output", kernel_regularizer=l2(L2_VALUE))(policy_net)
# # state value layers
# value_net = Conv2D(filters=2, kernel_size=(1, 1), data_format="channels_last", activation="relu", kernel_regularizer=l2(L2_VALUE))(network)
# value_net = Flatten()(value_net)
# value_net = Dense(64, kernel_regularizer=l2(L2_VALUE))(value_net)
# value_net = Dense(1, activation="tanh", kernel_regularizer=l2(L2_VALUE), name="value_output")(value_net)

# model = Model(in_x, [policy_net, value_net])

callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        filepath='model_mcts400_forced3_all_{epoch}.h5',
        # Path where to save the model
        # The two parameters below mean that we will overwrite
        # the current checkpoint if and only if
        # the `val_loss` score has improved.
        save_best_only=True,
        monitor='val_loss',
        verbose=1)
]


model.compile(optimizer='adam',
                  loss = "mean_squared_error",
             )
print('# Fit model on training data')

history = model.fit(states_train, values_train,
                    shuffle=True,
                    batch_size=256,
                    epochs=3,
                    callbacks=callbacks,
                    validation_data=(states_test, values_test)
                    )


# model.compile(optimizer='adam',
#                   loss = {
#                       "policy_output": "categorical_crossentropy",
#                       "value_output": "mean_squared_error",
#                   },
#                   loss_weights = {"policy_output": 1.0, "value_output": 1.0},
# #                 metrics=['accuracy','mae']
#              )

# Train the model by slicing the data into "batches"
# of size "batch_size", and repeatedly iterating over
# the entire dataset for a given number of "epochs"
# print('# Fit model on training data')
# history = model.fit(states_train, [policies_train, values_train],
#                     shuffle=True,
#                     batch_size=256,
#                     epochs=3,
#                     callbacks=callbacks,
#                     validation_data=(states_test, [policies_test, values_test])
#                     )

# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='relu'))

# Fit model on training data
Train on 335808 samples, validate on 39608 samples
Epoch 1/3
Epoch 00001: val_loss improved from inf to 0.21229, saving model to model_mcts400_forced3_all_1.h5
Epoch 2/3
Epoch 00002: val_loss improved from 0.21229 to 0.18577, saving model to model_mcts400_forced3_all_2.h5
Epoch 3/3
Epoch 00003: val_loss improved from 0.18577 to 0.18353, saving model to model_mcts400_forced3_all_3.h5


In [None]:
# The returned "history" object holds a record
# of the loss values and metric values during training
# print('\nhistory dict:', history.history)

# Evaluate the model on the test data using `evaluate`
print('\n# Evaluate on test data')
results = model.evaluate(states_test, (policies_test, values_test), batch_size=128)
print('test loss, test acc:', results)

In [None]:
# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
print('\n# Generate predictions for 3 samples')
predictions = model.predict(states[:1])
print('predictions shape:', predictions[0].shape)

In [None]:
model.save('model_mcts400_forced3_all.h5')