# SET UPS

In [0]:
#Connect to your own Google Drive
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
%cd "/content/gdrive/My Drive/DeepLearningProject/"

In [0]:
# Useful install

!pip install pybind11
!python3 -m pybind11 --includes

In [0]:
import numpy as np
import golois
import tensorflow as tf
import tensorflow.keras as keras

planes = 8
moves = 361
N = 100000

input_data = np.random.randint(2, size=(N, 19, 19, planes))
input_data = input_data.astype ('float32')

policy = np.random.randint(moves, size=(N,))
policy = keras.utils.to_categorical (policy)

value = np.random.randint(2, size=(N,))
value = value.astype ('float32')

end = np.random.randint(2, size=(N, 19, 19, 2))
end = end.astype ('float32')
golois.getBatch (input_data, policy, value, end)

# Model definition 

In [0]:
import tensorflow.keras as keras
from tensorflow.keras import layers 
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.regularizers import l2

# Neural network

planes = 8

input_layer = keras.Input(
    shape = (19, 19, planes), 
    name ='GoGrids'
)

# CONVOLUTIONAL LAYER
conv_layer_1 = keras.layers.Conv2D(
    filters=194, 
    kernel_size=(3,3), 
    padding='same', 
    use_bias=False,
    name='256ConvLayer-1'
)(input_layer) 

batch_norm1 = keras.layers.BatchNormalization(
    axis=-1, 
    momentum=0.99, 
    epsilon=0.001, 
    center=True, 
    scale=True, 
    beta_initializer='zeros', 
    gamma_initializer='ones', 
    moving_mean_initializer='zeros', 
    moving_variance_initializer='ones', 
    beta_regularizer=None, 
    gamma_regularizer=None, 
    beta_constraint=None, 
    gamma_constraint=None,
    name='BatchNorm-1'
)(conv_layer_1)

conv_layer_1_activation = keras.layers.Activation(
    'relu'
)(conv_layer_1)


# RESIDUAL LAYER
conv_layer_2 = keras.layers.Conv2D(
    filters=194, 
    kernel_size=(3,3), 
    padding='same',
    use_bias=False,
    name='256ConvLayer-2'
)(conv_layer_1_activation)

batch_norm2 = keras.layers.BatchNormalization(
    axis=-1, 
    momentum=0.99, 
    epsilon=0.001, 
    center=True, 
    scale=True, 
    beta_initializer='zeros', 
    gamma_initializer='ones', 
    moving_mean_initializer='zeros', 
    moving_variance_initializer='ones', 
    beta_regularizer=None, 
    gamma_regularizer=None, 
    beta_constraint=None, 
    gamma_constraint=None,
    name='BatchNorm-2'
)(conv_layer_2)

conv_layer_2_activation = keras.layers.Activation(
    'relu'
)(conv_layer_2)

conv_layer_3 = keras.layers.Conv2D(
    filters=194, 
    kernel_size=(3,3), 
    padding='same',
    use_bias=False, 
    name='256ConvLayer-3'
)(conv_layer_2_activation)

batch_norm3 = keras.layers.BatchNormalization(
    axis=-1, 
    momentum=0.99, 
    epsilon=0.001, 
    center=True, 
    scale=True, 
    beta_initializer='zeros', 
    gamma_initializer='ones', 
    moving_mean_initializer='zeros', 
    moving_variance_initializer='ones', 
    beta_regularizer=None, 
    gamma_regularizer=None, 
    beta_constraint=None, 
    gamma_constraint=None,
    name='BatchNorm-3'
)(conv_layer_3)


residual_layer_1 = layers.add(
    [conv_layer_1_activation, conv_layer_3], 
    name='FirstResidualLayer'
)

residual_layer_activation = keras.layers.Activation(
    'relu'
)(residual_layer_1)

# POLICY HEAD
policy_pred = keras.layers.Conv2D(
    filters=2, 
    kernel_size=(1,1), 
    padding='same',
    use_bias=False,
    name='PolicyConv'
)(residual_layer_activation)

batch_norm_policy_head = keras.layers.BatchNormalization(
    axis=-1, 
    momentum=0.99, 
    epsilon=0.001, 
    center=True, 
    scale=True, 
    beta_initializer='zeros', 
    gamma_initializer='ones', 
    moving_mean_initializer='zeros', 
    moving_variance_initializer='ones', 
    beta_regularizer=None, 
    gamma_regularizer=None, 
    beta_constraint=None, 
    gamma_constraint=None,
    name='BatchNorm-policyHead'
)(policy_pred)

policy_pred_activation = keras.layers.Activation(
    'relu'
)(batch_norm_policy_head)

flatten_policy = keras.layers.Flatten(
    name='PolicyFlat'
)(policy_pred_activation)

policy_pred_output = keras.layers.Dense(
    units=361, 
    activation = 'softmax', 
    name='PolicyPrediction'
)(flatten_policy)


# VALUE HEAD
value_pred = keras.layers.Conv2D(
    filters=1, 
    kernel_size=(1,1), 
    padding='same', 
    use_bias=False,
    name='ValueConv'
)(residual_layer_activation)

batch_norm_value_head = keras.layers.BatchNormalization(
    axis=-1, 
    momentum=0.99, 
    epsilon=0.001, 
    center=True, 
    scale=True, 
    beta_initializer='zeros', 
    gamma_initializer='ones', 
    moving_mean_initializer='zeros', 
    moving_variance_initializer='ones', 
    beta_regularizer=None, 
    gamma_regularizer=None, 
    beta_constraint=None, 
    gamma_constraint=None,
    name='BatchNorm-valueHead'
)(value_pred)

value_pred_activation = keras.layers.Activation(
    'relu'
)(batch_norm_value_head)

flatten_value = keras.layers.Flatten(
    name='ValueFlat'
)(value_pred_activation)

hidden_value_head = keras.layers.Dense(
    units=128, 
    activation = 'relu', 
    name='ValueHidden'
)(flatten_value)

value_pred_output = keras.layers.Dense(
    units=1, 
    activation = 'tanh', 
    name='ValuePrediction'
)(hidden_value_head)

model = keras.Model(inputs=[input_layer], outputs=[policy_pred_output, value_pred_output])
keras.utils.plot_model(model, show_shapes=True)
model.summary()


In [0]:
keras.utils.plot_model(
    model, to_file='model_Dumbo.png', show_shapes=False, show_layer_names=True,
    rankdir='TB', expand_nested=False, dpi=96
)

In [0]:
# Si modèle existant
from tensorflow.keras.models import load_model
model = load_model('AZORIN_COHEN_Dumbo.h5')

In [0]:
# Compilation
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss=['categorical_crossentropy', 'mse'], loss_weights=[1, 1], metrics=['accuracy'])

---------

In [0]:
total_history_dict = {'PolicyPrediction_acc': [],
  'PolicyPrediction_loss': [],
 'ValuePrediction_acc': [],
 'ValuePrediction_loss': [],
 'loss': [],
 'val_PolicyPrediction_acc': [],
 'val_PolicyPrediction_loss': [],
 'val_ValuePrediction_acc': [],
 'val_ValuePrediction_loss': [],
 'val_loss': []}

In [0]:
# Only if the model have already been trained, load the history.
import pickle
pickle_in = open("file_temp.pkl","rb") #existing history dict
total_history_dict = pickle.load(pickle_in)

In [0]:
def append_dict(total_dict, new_dict):
  for k,v in total_dict.items():
    total_dict[k] = v + new_dict[k]
  return total_dict

# Train

In [0]:
for i in range (500):
  golois.getBatch (input_data, policy, value, end)
  history = model.fit(input_data, {'PolicyPrediction': policy, 'ValuePrediction': value}, epochs=1, batch_size=60, validation_split=0.05)
  new_dict = history.history
  total_history_dict = append_dict(total_history_dict, new_dict)
  if (i%20 == 0):
    model.save('AZORIN_COHEN_Dumbo.h5')

# Plots

### Policy loss

In [0]:
import matplotlib.pyplot as plt

loss_values = total_history_dict['PolicyPrediction_loss']
val_loss_values = total_history_dict['val_PolicyPrediction_loss']

epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, loss_values, 'b', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss', color="red")

plt.title('Policy training and validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

### Policy accuracy

In [0]:
acc_values = total_history_dict['PolicyPrediction_acc']
val_acc_values = total_history_dict['val_PolicyPrediction_acc']

epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, acc_values, 'b', label='Training acc')
plt.plot(epochs, val_acc_values, 'b', label='Validation acc', color="red")

plt.title('Policy training and validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

### Value loss

In [0]:
loss_values = total_history_dict['ValuePrediction_loss']
val_loss_values = total_history_dict['val_ValuePrediction_loss']

epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, loss_values, 'b', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss', color="red")

plt.title('Value training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

### Value accuracy

In [0]:
acc_values = total_history_dict['ValuePrediction_acc']
val_acc_values = total_history_dict['val_ValuePrediction_acc']

epochs = range(1, len(loss_values) + 1)

plt.plot(epochs, acc_values, 'b', label='Training acc')
plt.plot(epochs, val_acc_values, 'b', label='Validation acc', color="red")

plt.title('Value training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Acc')
plt.legend()
plt.show()

# Evaluation

In [0]:
# Test
import golois
# Si modèle existant
from tensorflow.keras.models import load_model

def compute_metrics(models_names) : 
  golois.getBatch(input_data, policy, value, end)   
  accuracies_pol = [[] for model in models_names]
  accuracies_val = [[] for model in models_names]

  for i, model_name in enumerate(models_names):
    model = load_model(model_name)
    test = model.evaluate(x=input_data, y={'PolicyPrediction': policy, 'ValuePrediction': value}, verbose=0)
    accuracies_pol[i] += test[3]
    accuracies_val[i] += test[4]

  return accuracies_pol, accuracies_val

def print_metrics(model_name, model_metrics):
  print("======================")
  print("Model: ", model_name)
  print("accuracy_pol: ", model_metrics[0])
  print("accuracy_val: ", model_metrics[1])


### Tests

models_names = ['dumbo_26022020.h5', 'Mowgli_residual_750.h5','Mowgli_residual_720.h5', 'Mowgli_residual_735.h5', 'AZORIN_COHEN_Dumbo.h5', 'AZORIN_COHEN_Dumber.h5', 'Mowgli_795.h5', 'AZORIN_COHEN_Cheetah']

model_metrics = compute_metrics(models_names)

for i, model_name in enumerate(models_names):
  print_metrics(model_name, model_metrics[i])
