In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [None]:
df = pd.read_csv('/content/mcts7500_pool.csv')
df.head()

Unnamed: 0,board_moves,play_y
0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",4
1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",4
2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",5
3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",3
4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6


In [None]:
df.rename(columns={'board_moves':'Board','play_y':'Best_Move'},inplace=True)

In [None]:

# Convert 'board' column from string to list
df['Board'] = df['Board'].apply(lambda x: np.array(eval(x)))  # Convert string to NumPy array

# Convert to NumPy format
X = np.stack(df['Board'].values)  # Convert list of arrays to 2D NumPy array
y = df['Best_Move'].values  # Target column

print("Sample board:", X[0])  # Example board state
print("Best move:", y[0])

Sample board: [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  0
  0  0  0  0  0  0  0  0  1 -1  1 -1  1  0  0  1 -1 -1]
Best move: 4


## Transformer model trials

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Constants
BOARD_SIZE = 42  # 6x7 grid
NUM_COLUMNS = 7  # Moves are in range 0-6
EMBEDDING_DIM = 64  # Set embedding dimension to 64

# Define Positional Encoding layer
class PositionalEncoding(layers.Layer):
    def __init__(self, max_len, d_model):
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model
        self.pos_encoding = self.positional_encoding(max_len, d_model)

    def positional_encoding(self, max_len, d_model):
        angle_rads = self.get_angles(np.arange(max_len)[:, np.newaxis],
                                     np.arange(d_model)[np.newaxis, :],
                                     d_model)
        sines = np.sin(angle_rads[:, 0::2])
        cosines = np.cos(angle_rads[:, 1::2])
        pos_encoding = np.concatenate([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[np.newaxis, ...]  # Add batch dimension
        return tf.cast(pos_encoding, dtype=tf.float32)

    def get_angles(self, position, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return position * angle_rates

    def call(self, inputs):
        seq_length = tf.shape(inputs)[1]  # Get sequence length dynamically
        return inputs + self.pos_encoding[:, :seq_length, :self.d_model]  # Ensure shape matches

# Define Transformer Block
def transformer_block(x, num_heads=4, key_dim=64, ff_dim=128, dropout_rate=0.2):
    attn_output = layers.MultiHeadAttention(num_heads=num_heads, key_dim=key_dim)(x, x)
    attn_output = layers.Dropout(dropout_rate)(attn_output)
    x = layers.LayerNormalization()(x + attn_output)  # Add residual connection

    ffn_output = layers.Dense(ff_dim, activation="gelu")(x)
    ffn_output = layers.Dense(EMBEDDING_DIM)(ffn_output)  # Ensure matching output dim
    ffn_output = layers.Dropout(dropout_rate)(ffn_output)

    return layers.LayerNormalization()(x + ffn_output)  # Another residual connection

# Define Transformer Model with Positional Encoding
def create_transformer_model():
    inputs = keras.Input(shape=(BOARD_SIZE,))

    # Embed input (since board values are -1, 0, or 1)
    x = layers.Embedding(input_dim=3, output_dim=EMBEDDING_DIM, input_length=BOARD_SIZE)(inputs)

    # Add Positional Encoding (fix dimension mismatch)
    x = PositionalEncoding(BOARD_SIZE, EMBEDDING_DIM)(x)

    # Add multiple transformer blocks
    for _ in range(1):
        x = transformer_block(x, num_heads=4, key_dim=EMBEDDING_DIM)

    # Output layer (7 columns)
    x = layers.GlobalAveragePooling1D()(x)  # Aggregate to a single prediction
    x = layers.Dense(NUM_COLUMNS, activation="softmax")(x)

    model = keras.Model(inputs, x)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# Create model
model = create_transformer_model()
model.summary()




In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# Constants
BOARD_SIZE = 42  # 6x7 grid
NUM_COLUMNS = 7  # Moves are in range 0-6
D_MODEL = 128  # Increased from 64 to 256 for richer representation
NUM_HEADS = 4  # Increased number of attention heads
FF_DIM = 512  # Feedforward network expanded
NUM_LAYERS = 2  # Increased number of Transformer blocks

# Define Positional Encoding layer
class PositionalEncoding(layers.Layer):
    def __init__(self, max_len, d_model, **kwargs):
        super(PositionalEncoding, self).__init__(**kwargs)
        self.pos_encoding = self.positional_encoding(max_len, d_model)

    def positional_encoding(self, max_len, d_model):
        angle_rads = self.get_angles(np.arange(max_len)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model)
        sines = np.sin(angle_rads[:, 0::2])
        cosines = np.cos(angle_rads[:, 1::2])
        pos_encoding = np.concatenate([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)

    def get_angles(self, position, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return position * angle_rates

    def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

    def get_config(self):
        config = super(PositionalEncoding, self).get_config()
        config.update({"max_len": BOARD_SIZE, "d_model": D_MODEL})
        return config

# Transformer Block
def transformer_block(x):
    """Applies Multi-Head Attention, Layer Norm, and Feedforward layers."""
    attn_output = layers.MultiHeadAttention(num_heads=NUM_HEADS, key_dim=D_MODEL)(x, x)
    attn_output = layers.Dropout(0.3)(attn_output)
    attn_output = layers.LayerNormalization()(attn_output + x)  # Skip Connection

    ff_output = layers.Dense(FF_DIM, activation="gelu")(attn_output)
    ff_output = layers.Dense(D_MODEL)(ff_output)
    ff_output = layers.Dropout(0.3)(ff_output)

    return layers.LayerNormalization()(ff_output + attn_output)  # Skip Connection

# Define Transformer Model with Positional Encoding
def create_transformer_model():
    inputs = keras.Input(shape=(BOARD_SIZE,))

    # Embedding input (for -1, 0, 1 values + 1 padding index)
    x = layers.Embedding(input_dim=4, output_dim=D_MODEL, input_length=BOARD_SIZE)(inputs)

    # Positional Encoding
    x = PositionalEncoding(BOARD_SIZE, D_MODEL)(x)

    # Apply multiple Transformer Blocks
    for _ in range(NUM_LAYERS):  # Increased depth
        x = transformer_block(x)

    # Global Pooling
    x = layers.GlobalAveragePooling1D()(x)

    # Dense layers
    x = layers.Dense(512, activation="gelu")(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(256, activation="gelu")(x)
    x = layers.Dropout(0.3)(x)

    # Output layer (softmax over columns)
    outputs = layers.Dense(NUM_COLUMNS, activation="softmax")(x)

    model = keras.Model(inputs, outputs)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

    return model

# Create and summarize model
model = create_transformer_model()
model.summary()

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# Constants
BOARD_SIZE = 42  # 6x7 grid
NUM_COLUMNS = 7  # Moves are in range 0-6
D_MODEL = 64  # Increased from 32 for richer representation

# Define Positional Encoding layer
class PositionalEncoding(layers.Layer):
    def __init__(self, max_len, d_model, **kwargs):
        super(PositionalEncoding, self).__init__(**kwargs)
        self.pos_encoding = self.positional_encoding(max_len, d_model)

    def positional_encoding(self, max_len, d_model):
        angle_rads = self.get_angles(np.arange(max_len)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model)
        sines = np.sin(angle_rads[:, 0::2])
        cosines = np.cos(angle_rads[:, 1::2])
        pos_encoding = np.concatenate([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[np.newaxis, ...]
        return tf.cast(pos_encoding, dtype=tf.float32)

    def get_angles(self, position, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return position * angle_rates

    def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

    def get_config(self):
        config = super(PositionalEncoding, self).get_config()
        config.update({"max_len": BOARD_SIZE, "d_model": D_MODEL})
        return config

# Transformer Block
def transformer_block(x):
    """Applies Multi-Head Attention, Layer Norm, and Feedforward layers."""
    attn_output = layers.MultiHeadAttention(num_heads=8, key_dim=D_MODEL)(x, x)
    attn_output = layers.Dropout(0.3)(attn_output)
    attn_output = layers.LayerNormalization()(attn_output + x)  # Skip Connection

    ff_output = layers.Dense(256, activation="gelu")(attn_output)
    ff_output = layers.Dense(D_MODEL)(ff_output)
    ff_output = layers.Dropout(0.3)(ff_output)

    return layers.LayerNormalization()(ff_output + attn_output)  # Skip Connection

# Define Transformer Model with Positional Encoding
def create_transformer_model():
    inputs = keras.Input(shape=(BOARD_SIZE,))

    # Embedding input (for -1, 0, 1 values)
    x = layers.Embedding(input_dim=3, output_dim=D_MODEL, input_length=BOARD_SIZE)(inputs)

    # Positional Encoding
    x = PositionalEncoding(BOARD_SIZE, D_MODEL)(x)

    # Apply multiple Transformer Blocks
    for _ in range(3):  # Increased depth
        x = transformer_block(x)

    # Global Pooling
    x = layers.GlobalAveragePooling1D()(x)

    # Dense layers
    x = layers.Dense(256, activation="gelu")(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation="gelu")(x)
    x = layers.Dropout(0.3)(x)

    # Output layer (softmax over columns)
    outputs = layers.Dense(NUM_COLUMNS, activation="softmax")(x)

    model = keras.Model(inputs, outputs)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

    return model

# Create and summarize model
model = create_transformer_model()
model.summary()

## Final Transformer Model (30k params)

In [None]:

import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
# Constants
BOARD_SIZE = 42  # 6x7 grid
NUM_COLUMNS = 7  # Moves are in range 0-6
# Define Positional Encoding layer
class PositionalEncoding(layers.Layer):
    def __init__(self, max_len, d_model):
        super(PositionalEncoding, self).__init__()
        self.pos_encoding = self.positional_encoding(max_len, d_model)
    def positional_encoding(self, max_len, d_model):
        # Create a matrix of shape (max_len, d_model)
        angle_rads = self.get_angles(np.arange(max_len)[:, np.newaxis], np.arange(d_model)[np.newaxis, :], d_model)
        # Apply sine to even indices in the array; 2i
        sines = np.sin(angle_rads[:, 0::2])
        # Apply cosine to odd indices; 2i+1
        cosines = np.cos(angle_rads[:, 1::2])
        # Stack them together to get the positional encoding matrix
        pos_encoding = np.concatenate([sines, cosines], axis=-1)
        pos_encoding = pos_encoding[np.newaxis, ...]  # Add batch dimension
        return tf.cast(pos_encoding, dtype=tf.float32)
    def get_angles(self, position, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i // 2)) / np.float32(d_model))
        return position * angle_rates
    def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

# Define Transformer Model with Positional Encoding
def create_transformer_model():
    inputs = keras.Input(shape=(BOARD_SIZE,))
    # Embed input (since board values are -1, 0, or 1)
    x = layers.Embedding(input_dim=3, output_dim=32, input_length=BOARD_SIZE)(inputs)
    # Add Positional Encoding
    x = PositionalEncoding(BOARD_SIZE, 32)(x)  # Add positional encoding
    # Transformer Block
    x = layers.MultiHeadAttention(num_heads=4, key_dim=32)(x, x)
    x = layers.LayerNormalization()(x)
    x = layers.Dense(128, activation="gelu")(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(128)(x)
    x = layers.Dropout(0.2)(x)
    # Output layer (7 columns)
    x = layers.Dense(NUM_COLUMNS, activation="softmax")(x)
    x = layers.GlobalAveragePooling1D()(x)  # Aggregate to a single prediction
    model = keras.Model(inputs, x)
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model
# Create model
model = create_transformer_model()
model.summary()



In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

# Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42)

# Define checkpoint callback
checkpoint_callback = ModelCheckpoint(
    "connect4_transformer_epoch_{epoch:02d}.h5",  # Saves model with epoch number
    save_best_only=True,  # Set to True to save only the best model based on validation loss
    save_weights_only=False,  # Set to True if you only want to save weights
    verbose=1
)

# Train the model with callback
model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=40, batch_size=32, callbacks=[checkpoint_callback])

Epoch 1/40
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.2446 - loss: 1.7914
Epoch 1: val_loss improved from inf to 1.63241, saving model to connect4_transformer_epoch_01.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 5ms/step - accuracy: 0.2446 - loss: 1.7914 - val_accuracy: 0.3150 - val_loss: 1.6324
Epoch 2/40
[1m7459/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3151 - loss: 1.6088
Epoch 2: val_loss improved from 1.63241 to 1.59852, saving model to connect4_transformer_epoch_02.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 4ms/step - accuracy: 0.3151 - loss: 1.6088 - val_accuracy: 0.3202 - val_loss: 1.5985
Epoch 3/40
[1m7467/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3332 - loss: 1.5734
Epoch 3: val_loss improved from 1.59852 to 1.55333, saving model to connect4_transformer_epoch_03.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4ms/step - accuracy: 0.3332 - loss: 1.5734 - val_accuracy: 0.3479 - val_loss: 1.5533
Epoch 4/40
[1m7459/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3470 - loss: 1.5512
Epoch 4: val_loss improved from 1.55333 to 1.55331, saving model to connect4_transformer_epoch_04.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.3470 - loss: 1.5512 - val_accuracy: 0.3517 - val_loss: 1.5533
Epoch 5/40
[1m7468/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3570 - loss: 1.5368
Epoch 5: val_loss improved from 1.55331 to 1.52611, saving model to connect4_transformer_epoch_05.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.3570 - loss: 1.5367 - val_accuracy: 0.3702 - val_loss: 1.5261
Epoch 6/40
[1m7466/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3682 - loss: 1.5177
Epoch 6: val_loss improved from 1.52611 to 1.52506, saving model to connect4_transformer_epoch_06.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4ms/step - accuracy: 0.3682 - loss: 1.5177 - val_accuracy: 0.3615 - val_loss: 1.5251
Epoch 7/40
[1m7470/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3751 - loss: 1.5080
Epoch 7: val_loss improved from 1.52506 to 1.49129, saving model to connect4_transformer_epoch_07.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.3751 - loss: 1.5080 - val_accuracy: 0.3835 - val_loss: 1.4913
Epoch 8/40
[1m7467/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3806 - loss: 1.4939
Epoch 8: val_loss did not improve from 1.49129
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4ms/step - accuracy: 0.3806 - loss: 1.4939 - val_accuracy: 0.3763 - val_loss: 1.4929
Epoch 9/40
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3827 - loss: 1.4865
Epoch 9: val_loss improved from 1.49129 to 1.48822, saving model to connect4_transformer_epoch_09.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4ms/step - accuracy: 0.3827 - loss: 1.4865 - val_accuracy: 0.3792 - val_loss: 1.4882
Epoch 10/40
[1m7462/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3854 - loss: 1.4803
Epoch 10: val_loss did not improve from 1.48822
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.3854 - loss: 1.4803 - val_accuracy: 0.3808 - val_loss: 1.4925
Epoch 11/40
[1m7465/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3878 - loss: 1.4742
Epoch 11: val_loss improved from 1.48822 to 1.48365, saving model to connect4_transformer_epoch_11.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.3878 - loss: 1.4742 - val_accuracy: 0.3886 - val_loss: 1.4837
Epoch 12/40
[1m7460/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3906 - loss: 1.4698
Epoch 12: val_loss improved from 1.48365 to 1.47674, saving model to connect4_transformer_epoch_12.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.3906 - loss: 1.4698 - val_accuracy: 0.3872 - val_loss: 1.4767
Epoch 13/40
[1m7465/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3940 - loss: 1.4649
Epoch 13: val_loss improved from 1.47674 to 1.47382, saving model to connect4_transformer_epoch_13.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.3940 - loss: 1.4649 - val_accuracy: 0.3855 - val_loss: 1.4738
Epoch 14/40
[1m7464/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3938 - loss: 1.4639
Epoch 14: val_loss improved from 1.47382 to 1.46491, saving model to connect4_transformer_epoch_14.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.3938 - loss: 1.4639 - val_accuracy: 0.3936 - val_loss: 1.4649
Epoch 15/40
[1m7469/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3990 - loss: 1.4579
Epoch 15: val_loss did not improve from 1.46491
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 4ms/step - accuracy: 0.3990 - loss: 1.4579 - val_accuracy: 0.3900 - val_loss: 1.4684
Epoch 16/40
[1m7464/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3992 - loss: 1.4561
Epoch 16: val_loss improved from 1.46491 to 1.46012, saving model to connect4_transformer_epoch_16.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 4ms/step - accuracy: 0.3992 - loss: 1.4561 - val_accuracy: 0.3947 - val_loss: 1.4601
Epoch 17/40
[1m7464/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3981 - loss: 1.4546
Epoch 17: val_loss improved from 1.46012 to 1.45873, saving model to connect4_transformer_epoch_17.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.3981 - loss: 1.4546 - val_accuracy: 0.3968 - val_loss: 1.4587
Epoch 18/40
[1m7465/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.3999 - loss: 1.4506
Epoch 18: val_loss improved from 1.45873 to 1.45546, saving model to connect4_transformer_epoch_18.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.3999 - loss: 1.4506 - val_accuracy: 0.3965 - val_loss: 1.4555
Epoch 19/40
[1m7469/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.4020 - loss: 1.4488
Epoch 19: val_loss improved from 1.45546 to 1.44964, saving model to connect4_transformer_epoch_19.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 4ms/step - accuracy: 0.4020 - loss: 1.4488 - val_accuracy: 0.3994 - val_loss: 1.4496
Epoch 20/40
[1m7464/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.4035 - loss: 1.4486
Epoch 20: val_loss improved from 1.44964 to 1.44292, saving model to connect4_transformer_epoch_20.h5




[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.4035 - loss: 1.4486 - val_accuracy: 0.4040 - val_loss: 1.4429
Epoch 21/40
[1m7465/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.4036 - loss: 1.4466
Epoch 21: val_loss did not improve from 1.44292
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 4ms/step - accuracy: 0.4036 - loss: 1.4466 - val_accuracy: 0.3994 - val_loss: 1.4545
Epoch 22/40
[1m7459/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.4033 - loss: 1.4449
Epoch 22: val_loss did not improve from 1.44292
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.4033 - loss: 1.4449 - val_accuracy: 0.4056 - val_loss: 1.4485
Epoch 23/40
[1m7460/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.4046 - loss: 1.4



[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 4ms/step - accuracy: 0.4093 - loss: 1.4330 - val_accuracy: 0.4057 - val_loss: 1.4358
Epoch 31/40
[1m7463/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.4098 - loss: 1.4319
Epoch 31: val_loss did not improve from 1.43581
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.4098 - loss: 1.4319 - val_accuracy: 0.4037 - val_loss: 1.4448
Epoch 32/40
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4103 - loss: 1.4300
Epoch 32: val_loss did not improve from 1.43581
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.4103 - loss: 1.4300 - val_accuracy: 0.4089 - val_loss: 1.4366
Epoch 33/40
[1m7464/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.4121 - loss: 1.4



[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 4ms/step - accuracy: 0.4118 - loss: 1.4256 - val_accuracy: 0.4119 - val_loss: 1.4331
Epoch 40/40
[1m7462/7471[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 4ms/step - accuracy: 0.4136 - loss: 1.4246
Epoch 40: val_loss did not improve from 1.43305
[1m7471/7471[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 4ms/step - accuracy: 0.4136 - loss: 1.4246 - val_accuracy: 0.4071 - val_loss: 1.4333


<keras.src.callbacks.history.History at 0x7bb2895de5d0>

## Data Cleaning

In [None]:
import pickle
import pandas as pd

# Load the pickle file
with open('/content/mcts7500_pool.pickle', 'rb') as file:
    my_dict = pickle.load(file)

# Convert the dictionary to a DataFrame
df2 = pd.DataFrame(my_dict)

# Display the DataFrame
df2.head()

Unnamed: 0,board_x,play_y,README
0,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....",4,this is with MCTS7500 - but it looks for wins ...
1,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0], [...",4,this is with MCTS7500 - but it looks for wins ...
2,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....",5,this is with MCTS7500 - but it looks for wins ...
3,"[[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0], [...",3,this is with MCTS7500 - but it looks for wins ...
4,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....",6,this is with MCTS7500 - but it looks for wins ...


In [None]:
df2['board_moves'] = df2['board_x'].apply(lambda x: [int(item) for sublist in x for item in sublist])

# Display the updated DataFrame
print(df2.head())

                                             board_x  play_y  \
0  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....       4   
1  [[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0], [...       4   
2  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....       5   
3  [[-0.0, -0.0, -0.0, -0.0, -0.0, -0.0, -0.0], [...       3   
4  [[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], [0.0, 0....       6   

                                              README  \
0  this is with MCTS7500 - but it looks for wins ...   
1  this is with MCTS7500 - but it looks for wins ...   
2  this is with MCTS7500 - but it looks for wins ...   
3  this is with MCTS7500 - but it looks for wins ...   
4  this is with MCTS7500 - but it looks for wins ...   

                                         board_moves  
0  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
1  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
2  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
3  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...  
4  

In [None]:
df_final=df2[['board_moves','play_y']]
df_final.to_csv('/content/mcts7500_pool.csv',index=False)

In [None]:
df_final.rename(columns={'board_moves':'Board','play_y':'Best_Move'},inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_final.rename(columns={'board_moves':'Board','play_y':'Best_Move'},inplace=True)


In [None]:
df.head()

Unnamed: 0,Board,Best_Move
0,"[0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",3
1,"[0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",2
2,"[0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",4
3,"[0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0...",4
4,"[0, 0, 1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0...",2


In [None]:
df_final.head()

Unnamed: 0,Board,Best_Move
0,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",4
1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",4
2,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",5
3,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",3
4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",6


In [None]:
# Use pd.concat() to append the dataframes
final_df = pd.concat([df, df_final], ignore_index=True)

In [None]:
final_df.head()

Unnamed: 0,Board,Best_Move
0,"[0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",3
1,"[0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",2
2,"[0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",4
3,"[0, 0, 0, -1, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0, 0...",4
4,"[0, 0, 1, -1, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0...",2


In [None]:
final_df.to_csv('/content/final_df.csv',index=False)