# Reading in MCTS (Monte Carlo Tree Search) data
### The goal is to train two RNNs.
 - Predict Probability of each of the moves being successful
 - Evaluate the value of the current board position

In [15]:
# uninstall numpy and then reinstall/update tensorflow
import pandas as pd
import numpy as np
print(np.__version__) # must not be the most recent. 1.26.4 works.
import tensorflow as tf
print(tf.__version__)
  
from board import Board
import math
import pickle


1.26.4
2.17.0


In [23]:
def loadall(filename, n=math.inf):
    data = []
    with open(filename, "rb") as f:
        while len(data) < n:
            try:
                df = pickle.load(f)
                data.append(df)
            except EOFError:
                break
            
            print(len(data))
            
    return data

In [94]:
import pandas as pd 
print(pd.__version__) # should work with 2.2.2
from board import Board

df_total = pd.DataFrame()

items = loadall('Data/playing_data.pkl')
for value in items:
    df_total = pd.concat([df_total, value])
print(df_total.shape, ' shape')

# items = loadall('Data/playing_data_saved.pkl')
# for value in items:
#     df_total = pd.concat([df_total, value])    
# print(df_total.shape, ' shape')

# items = loadall('Data/playing_data_saved1.pkl')
# for value in items:
#     df_total = pd.concat([df_total, value])
# print(df_total.shape, ' shape')

# items = loadall('Data/playing_data.pkl')
# for value in items:
#     df_total = pd.concat([df_total, value])


print(df_total.shape, ' shape')
print(df_total.size, ' size')

2.2.2
1
(182672, 10)  shape
(182672, 10)  shape
1826720  size


In [93]:
# pd.to_pickle(df_total, "Data/playing_data.pkl")

In [48]:
# checking out to make the data is stored correctly
  
row = 14
print(df.iloc[row])
print(df.iloc[row]['inv_left'])
x = df.iloc[row]['Board']
board = Board(14)
board.show_dots = False
board.board = x
print(board)
print(df.iloc[row]['Move_Probs'])
print(df.iloc[row]['Moves'])

print(df.iloc[row]['Board'])


Board         [[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [...
Moves         [[2, 3, 6, 0, 3, 0], [0, 2, 5, 5, 18, 3], [13,...
Move_Probs    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...
Reward                                                       -1
Weights       [13.282673596136965, 47.828967673088606, 42.32...
Num_Sims                                                     51
mc_type                                       random_monteCarlo
inv_left                       [4, 6, 8, 9, 13, 14, 15, 19, 20]
Name: 14, dtype: object
[4, 6, 8, 9, 13, 14, 15, 19, 20]
   A B C D E F G H I J K L M N
 1 □ [96m▣ [0m□ □ □ □ □ □ □ □ □ □ □ □ 
 2 □ □ [96m▣ [0m[96m▣ [0m[96m▣ [0m□ [96m▣ [0m□ [96m▣ [0m[96m▣ [0m□ □ □ □ 
 3 □ □ □ □ □ [96m▣ [0m[96m▣ [0m[96m▣ [0m□ [96m▣ [0m□ □ □ □ 
 4 □ □ □ □ □ [96m▣ [0m□ □ [96m▣ [0m[96m▣ [0m□ □ □ □ 
 5 □ □ □ □ [96m▣ [0m□ □ □ □ □ [96m▣ [0m[96m▣ [0m[96m▣ [0m□ 
 6 □ □ □ □ [96m▣ [0m[96m▣ [0m[96m▣ [0m□ □ □ □ [96m▣ [0m[96m▣ [0m□ 
 

# Working on the DCNNs
### References:
 - https://medium.com/aiguys/deep-convolutional-neural-networks-dcnns-explained-in-layman-terms-b990b2818061
  - https://nikcheerla.github.io/deeplearningschool/2018/01/01/AlphaZero-Explained/


In [71]:
def preprocess_data(games, train_test_ratio=.8):
    # Convert game records into input-output pairs
    games = games.sample(frac = 1)
    split_index = math.floor(train_test_ratio*(games.shape[0]))
    sameation_num = 10
    print("Amount of training batches: ", split_index)
    train_df = games.iloc[:split_index-sameation_num]
    test_df = games.iloc[split_index:]
    sameation_df = games.iloc[split_index-sameation_num:split_index]

    train_boards = np.array(tuple(train_df['Board'].values))
    train_rewards = np.array(tuple(train_df['Reward'].values))

    test_boards = np.array(tuple(test_df['Board'].values))
    test_rewards = np.array(tuple(test_df['Reward'].values))
    
    x_val = np.array(tuple(sameation_df['Board'].values))
    y_val = np.array(tuple(sameation_df['Reward'].values))
    
    train_boards = np.reshape(train_boards, (train_boards.shape[0], train_boards.shape[1], train_boards.shape[2], 1))

    
    processed_data = {
        "train boards": train_boards,
        "train rewards": train_rewards,
        "test boards": test_boards,
        "test rewards": test_rewards,
        "x val": x_val,
        "y val": y_val
    }
    
    # Normalize and format the data for training
    return processed_data

In [82]:
# Defining the evaluation snd predictive networks
def create_policy_network(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, kernel_size=3, activation='relu', input_shape=input_shape),
        tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(input_shape[0] * input_shape[1], activation='softmax')
    ])
    return model

def create_value_network(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, kernel_size=6, activation='relu', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(), 
        tf.keras.layers.Conv2D(64, kernel_size=3, activation='relu'),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dense(1, activation='tanh')
    ])
    return model


def create_value_network1(input_shape):
    model = tf.keras.Sequential([

    tf.keras.layers.Conv2D(filters = 32, input_shape = input_shape,  
                kernel_size = (5, 5),  
                padding = 'same'),
    tf.keras.layers.Activation('relu'),
    # Max-Pooling  
    tf.keras.layers.MaxPooling2D(pool_size = (2, 2), 
                strides = (2, 2), padding = 'same'),
    # # Batch Normalisation 
    tf.keras.layers.BatchNormalization(), 
    
    # 2nd Convolutional Layer 
    tf.keras.layers.Conv2D(filters = 30, kernel_size = (6, 6),  
                strides = (1, 1), padding = 'same'), 
    tf.keras.layers.Activation('relu'), 
    # Max-Pooling 
    tf.keras.layers.MaxPooling2D(pool_size = (2, 2),  
                padding = 'same'),
    # Batch Normalisation 
    tf.keras.layers.BatchNormalization(),
    
    # 3rd Convolutional Layer 
    tf.keras.layers.Conv2D(filters = 384, kernel_size = (3, 3),  
                strides = (1, 1), padding = 'same'),
    tf.keras.layers.Activation('relu'),
    # Batch Normalisation 
    tf.keras.layers.BatchNormalization(),
    
    # # 4th Convolutional Layer 
    # tf.keras.layers.Conv2D(filters = 384, kernel_size = (3, 3),  
    #             strides = (1, 1), padding = 'same'),
    # tf.keras.layers.Activation('relu'),
    # # Batch Normalisation 
    # tf.keras.layers.BatchNormalization(),
    
    # # 5th Convolutional Layer 
    # tf.keras.layers.Conv2D(filters = 256, kernel_size = (3, 3),  
    #             strides = (1, 1), padding = 'same'),
    # tf.keras.layers.Activation('relu'),
    # # Max-Pooling 
    # tf.keras.layers.MaxPooling2D(pool_size = (2, 2), strides = (2, 2),  
    #             padding = 'same'),
    # # Batch Normalisation 
    # tf.keras.layers.BatchNormalization(),
    
    # Flattening 
    tf.keras.layers.Flatten(),
    
    # 1st Dense Layer 
    tf.keras.layers.Dense(4096, input_shape = (14*14*1, )),
    tf.keras.layers.Activation('relu'),
    # Add Dropout to prevent overfitting 
    tf.keras.layers.Dropout(0.4),
    # Batch Normalisation 
    tf.keras.layers.BatchNormalization(),
    
    # 2nd Dense Layer 
    tf.keras.layers.Dense(4096),
    tf.keras.layers.Activation('relu'),
    # Add Dropout 
    tf.keras.layers.Dropout(0.5),
    # Batch Normalisation 
    tf.keras.layers.BatchNormalization(),
    
    # tf.keras.layers.Dense(2048),
    # tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dense(1024),
    tf.keras.layers.Activation('relu'),
    tf.keras.layers.Dense(1, activation='tanh')

    ])
    return model

In [88]:
# Code snippet for training the neural networks
def train_networks(policy_network, value_network, data, epochs=10, batch_size=32):
    # Define loss functions and optimizer
    policy_loss = tf.keras.losses.CategoricalCrossentropy()
    value_loss = tf.keras.losses.MeanSquaredError()
    optimizer = tf.keras.optimizers.Adam()
    metrics = [tf.keras.metrics.SparseCategoricalAccuracy()]

    # Compile the models
    policy_network.compile(optimizer=optimizer, loss=policy_loss, metrics=metrics)
    value_network.compile(optimizer=optimizer, loss=value_loss, metrics=metrics)

    # Train the models
    # policy_network.fit(processed_data['input'], processed_data['policy_target'], epochs=epochs, batch_size=batch_size, sameation_data=(x_val, y_val))
    value_network.fit(data['train boards'], data['train rewards'], epochs=epochs, batch_size=batch_size, validation_data=(data['x val'], data['y val']))

In [87]:
board_size = 14
print(np.shape(df_total))
data = preprocess_data(df_total, train_test_ratio=.8)
print(np.shape(data["train boards"])[0])

input_shape = (board_size, board_size, 1)

policy_network = create_policy_network(input_shape)
value_network = create_value_network1(input_shape)

train_networks(policy_network, value_network, data, epochs=10, batch_size=32)

(59926, 10)
Amount of training batches:  47940
47930
Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1498/1498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m182s[0m 119ms/step - loss: 1.9847 - sparse_categorical_accuracy: 0.0074 - val_loss: 3.6000 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 2/10
[1m1498/1498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m221s[0m 148ms/step - loss: 1.9961 - sparse_categorical_accuracy: 0.0069 - val_loss: 2.4000 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 3/10
[1m1498/1498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 126ms/step - loss: 2.0054 - sparse_categorical_accuracy: 0.0083 - val_loss: 3.2000 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 4/10
[1m1498/1498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 123ms/step - loss: 1.9932 - sparse_categorical_accuracy: 0.0076 - val_loss: 2.8000 - val_sparse_categorical_accuracy: 0.0000e+00
Epoch 5/10
[1m1498/1498[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 133ms/step - loss: 1.9822 - sparse_categorical_accuracy: 0.0080 - val_loss: 3.6000 - val_s

In [89]:
to_pred = np.array((tuple(df_total.iloc[14]['Board'])))
to_pred = to_pred[np.newaxis, :, :, np.newaxis]
print(np.shape(to_pred))
value_network.predict(to_pred)

(1, 14, 14, 1)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step


array([[1.]], dtype=float32)

In [90]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = value_network.evaluate(data['test boards'], data['test rewards'], batch_size=12)
print("test loss, test acc:", results)

# Generate predictions (probabilities -- the output of the last layer)
# on new data using `predict`
for i in range(20):
    # print("Generate predictions for 3 samples")
    to_pred = np.array(tuple(data['test boards'][i]))
    print(to_pred)
    print(data['test rewards'][i])
    to_pred = to_pred[np.newaxis, :, :, np.newaxis]

    predictions = value_network.predict(to_pred)
    print("predictions shape:", predictions.shape)
    print("prediction:", predictions, "correct reward: ", data['test rewards'][i])

Evaluate on test data
[1m999/999[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11ms/step - loss: 1.9908 - sparse_categorical_accuracy: 0.0080
test loss, test acc: [1.9754713773727417, 0.008843651041388512]
[[ 0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0  0  0  1  1  1  0  0]
 [ 0  0  0  0  1  0  0  0  0  0  0  1  1  0]
 [ 0  1  0  1  1  1  0  0  0  1  1  0  0  0]
 [ 1  1  1  0  1  0  1  1  1  0  1  0  0  0]
 [ 1 -1  0  1  0  0  0  1  0 -1  1  1  0  0]
 [-1 -1  0  1 -1  0  0  1 -1 -1 -1  0  1  0]
 [-1  0  0  1 -1 -1  0  0  0 -1  0  1  1  0]
 [-1  0  1  1  0 -1  0  0 -1  0  1  1  0  0]
 [ 0 -1 -1 -1 -1  0 -1 -1 -1  0  0  0  0  0]
 [ 0 -1  0  0  0  0  0 -1  0 -1  0 -1  0  0]
 [ 0  0  0  0  0  0  0  0  0 -1 -1 -1  0  0]]
-1
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 40ms/step
predictions shape: (1, 1)
prediction: [[1.]] correct reward:  -1
[[ 0  0  0  0