<a href="https://colab.research.google.com/github/MichaelCrosson/Connect4AI/blob/main/opti_transform.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pickle
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import KFold
from collections import Counter


In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [2]:
# Load the dataset
save_path = "mcts7500_pool.pickle"
with open(save_path, "rb") as f:
    dataset_original = pickle.load(f)


print(f"Loaded dataset from {save_path} with {len(dataset_original)} entries.")

Loaded dataset from mcts7500_pool.pickle with 3 entries.


In [3]:
dataset_original.keys()

dict_keys(['board_x', 'play_y', 'README'])

In [4]:
dataset_original['board_x'][0]

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1., -1.,  1.],
       [-1.,  1.,  0.,  0.,  1., -1., -1.]])

In [5]:
len(dataset_original['play_y'])

265620

In [6]:
x = np.array(dataset_original['board_x'])
y = np.array(dataset_original['play_y'])

In [7]:
x.shape

(265620, 6, 7)

In [8]:
import numpy as np

# Load original dataset
# x = np.load("x.npy")  # Shape: (num_samples, 6, 7)
# y = np.load("y.npy")  # Shape: (num_samples,)

# Function to mirror board positions
def mirror_board(board):
    return np.flip(board, axis=1)  # Flip along columns

# Function to mirror move positions
def mirror_move(move):
    return 6 - move  # Since columns are indexed 0-6

# Create mirrored dataset
x_mirrored = np.array([mirror_board(board) for board in x])
y_mirrored = np.array([mirror_move(move) for move in y])

# Concatenate original and mirrored data
x_augmented = np.concatenate([x, x_mirrored], axis=0)
y_augmented = np.concatenate([y, y_mirrored], axis=0)

# Save the augmented dataset
np.save("x_augmented.npy", x_augmented)
np.save("y_augmented.npy", y_augmented)

print("Dataset has been successfully augmented. New size:", x_augmented.shape[0])


Dataset has been successfully augmented. New size: 531240


In [10]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Function to compute sinusoidal positional encoding
def positional_encoding(seq_length, model_dim):
    positions = np.arange(seq_length)[:, np.newaxis]
    div_term = np.exp(np.arange(0, model_dim, 2) * (-np.log(10000.0) / model_dim))

    pos_enc = np.zeros((seq_length, model_dim))
    pos_enc[:, 0::2] = np.sin(positions * div_term)  # Apply sin to even indices
    pos_enc[:, 1::2] = np.cos(positions * div_term)  # Apply cos to odd indices

    return tf.convert_to_tensor(pos_enc, dtype=tf.float32)

# Apply positional encoding
def add_positional_encoding(x, model_dim=64):
    pos_encoding = positional_encoding(x.shape[1], model_dim)
    return x + pos_encoding


In [11]:
def build_transformer_model():
    inputs = keras.Input(shape=(6, 7, 1))  # Input shape: (6,7)

    # Flatten the board into a sequence
    x = layers.Reshape((42, 1))(inputs)

    # Embedding layer to map to higher dimensions
    x = layers.Dense(64, activation="relu")(x)

    # Add positional encoding
    x = add_positional_encoding(x, model_dim=64)

    # Transformer block 1
    attn_output = layers.MultiHeadAttention(num_heads=4, key_dim=32)(x, x)
    x = layers.Add()([x, attn_output])
    x = layers.LayerNormalization(epsilon=1e-6)(x)

    # Transformer block 2
    attn_output = layers.MultiHeadAttention(num_heads=4, key_dim=32)(x, x)
    x = layers.Add()([x, attn_output])
    x = layers.LayerNormalization(epsilon=1e-6)(x)

    # Feedforward network
    x = layers.Dense(128, activation="relu")(x)
    x = layers.Dense(64, activation="relu")(x)

    # Output layer for move prediction (7 classes, one for each column)
    outputs = layers.Dense(7, activation="softmax")(x[:, 0, :])  # Predict from the first token

    model = keras.Model(inputs, outputs)
    return model


In [12]:
# Load augmented dataset
x_train = np.load("x_augmented.npy")  # Shape: (num_samples, 6, 7)
y_train = np.load("y_augmented.npy")  # Shape: (num_samples,)

# Reshape input to (num_samples, 6, 7, 1)
x_train = x_train[..., np.newaxis]

# Compile and train the model
model = build_transformer_model()
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

# Train the model
model.fit(x_train, y_train, epochs=30, batch_size=32, validation_split=0.1)

# Save the trained model
model.save("connect4_transformer_positional.h5")

print("Training completed with positional encoding and model saved.")


Epoch 1/30
[1m14942/14942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m79s[0m 5ms/step - accuracy: 0.2587 - loss: 1.7707 - val_accuracy: 0.3541 - val_loss: 1.5516
Epoch 2/30
[1m14942/14942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m77s[0m 5ms/step - accuracy: 0.3570 - loss: 1.5308 - val_accuracy: 0.3749 - val_loss: 1.4924
Epoch 3/30
[1m14942/14942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 5ms/step - accuracy: 0.3788 - loss: 1.4852 - val_accuracy: 0.3965 - val_loss: 1.4525
Epoch 4/30
[1m14942/14942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 5ms/step - accuracy: 0.3982 - loss: 1.4452 - val_accuracy: 0.4080 - val_loss: 1.4282
Epoch 5/30
[1m14942/14942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m70s[0m 5ms/step - accuracy: 0.4098 - loss: 1.4255 - val_accuracy: 0.4104 - val_loss: 1.4138
Epoch 6/30
[1m14942/14942[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 5ms/step - accuracy: 0.4177 - loss: 1.4073 - val_accuracy: 0.4201 - val_loss: 1.404



Training completed with positional encoding and model saved.
