In [1]:
# %pip install tensorflow

In [15]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
# from tensorflow.keras import callbacks
 

import copy
import random
from json.encoder import INFINITY
import pandas as pd
import math
from ayo_game import play, is_illegal_move, assign_reward, print_game_play, end_game
from agents import random_agent as ra
from agents import minimax_agent as ma
from agents import mcts_agent as mctsa
seed = 37

randint = random.randint
random.seed(seed)

In [2]:
random_agent = ra.agent
minimax_agent = ma.agent
minimax_value = ma.get_action_value

In [3]:
def custom_train_test_split(data, test_size=0.2, shuffle=True, random_state=None):
    """
    Custom function to split data into training and test sets.

    Parameters:
    - X: Input features (numpy array or list)
    - y: Target labels (numpy array or list)
    - test_size: Proportion of the data to include in the test split (default 0.2)
    - shuffle: Whether to shuffle the data before splitting (default True)
    - random_state: Seed for the random number generator (optional, for reproducibility)

    Returns:
    - X_train, X_test, y_train, y_test: Split training and testing data
    """
    X = np.array([d[0] for d in data])
    y = np.array([d[1] for d in data])

    # Set random seed if provided (to ensure reproducibility)
    if random_state is not None:
        np.random.seed(random_state)

    # Get the number of samples
    num_samples = len(X)

    # Shuffle the data if requested
    if shuffle:
        indices = np.random.permutation(num_samples)
        X = X[indices]
        y = y[indices]

    # Compute the split index
    split_index = int(num_samples * (1 - test_size))

    # Split the data into training and testing sets
    X_train, X_test = X[:split_index], X[split_index:]
    y_train, y_test = y[:split_index], y[split_index:]

    return X_train, X_test, y_train, y_test


In [4]:
class PolicyModel:
    def __init__(self):
        self.num_actions=12
        self.X_shape=14
        self.model = Sequential([
            Dense(64, input_dim=self.X_shape, activation='relu'),  # First hidden layer
            Dense(32, activation='relu'),  # Second hidden layer
            Dense(self.num_actions, activation='softmax')  # Output layer for 12 possible actions
        ])

        # Compile the model
        self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'], )

    def train(self, training_examples):
                # Train the model (Assume more training data is available)
        # For example purposes, we'll use the same sample data multiple times
        X_train, X_test, y_train, y_test = custom_train_test_split(training_examples, test_size=0.3, shuffle=True, random_state=None)

        # Convert target (y) to categorical (for classification)
        num_actions = self.num_actions # Assuming 12 possible actions (0 to 11)
        y_train = tf.keras.utils.to_categorical(y_train, num_classes=num_actions)
        y_test = tf.keras.utils.to_categorical(y_test, num_classes=num_actions)

        callbacks = [EarlyStopping(patience=20, monitor='loss', verbose=0),
             ReduceLROnPlateau(monitor='val_accuracy',factor=0.01, min_Ir=0.00001, verbose=0),
             ModelCheckpoint('latest_policy_model.keras', verbose=0, save_best_only=True, save_weights_only=False)]

        # Train the model (Assume more training data is available)
        # For example purposes, we'll use the same sample data multiple times
        self.model.fit(X_train, y_train, epochs=200, batch_size=64,  callbacks=callbacks, validation_data=(X_test,y_test))


    def predict(self,data):
        data = np.array(data)
        prediction = self.model.predict(data)
        return prediction
    

class ValueModel:
    def __init__(self):
        self.X_shape= 14
        self.model = Sequential([
            Dense(64, input_dim=self.X_shape, activation='relu'),  # First hidden layer
            Dense(32, activation='relu'),  # Second hidden layer
            Dense(1)  # Output layer for 12 possible actions
        ])

        # Compile the model
        self.model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])

    def train(self, training_examples):
        # Train the model (Assume more training data is available)
        # For example purposes, we'll use the same sample data multiple times
        X_train, X_test, y_train, y_test = custom_train_test_split(training_examples, test_size=0.3, shuffle=True, random_state=None)

        callbacks = [EarlyStopping(patience=20, monitor='loss', verbose=0),
             ReduceLROnPlateau(monitor='val_accuracy',factor=0.01, min_Ir=0.00001, verbose=0),
             ModelCheckpoint('latest_value_model.keras', verbose=0, save_best_only=True, save_weights_only=False)]

        self.model.fit(X_train, y_train, epochs=200, batch_size=64,  callbacks=callbacks, validation_data=(X_test,y_test))


    def predict(self,data):
        # Predict action for a new data point
        data = np.array(data)
        prediction = self.model.predict(data)
        return prediction


In [5]:
policy_model = PolicyModel()
value_model = ValueModel()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [6]:
def format_state(state):
    x = []
    x.extend(state['board'])
    x.append(state['current_player'])
    x.append(state['player_territory'][1])

    return x

In [7]:

def remove_duplicates(arr):
    # Convert the list of lists to a NumPy array
    np_arr = np.array(arr, dtype=object)
    
    # Create a set to keep track of unique first elements (list of numbers)
    seen = set()
    unique_arr = []

    for item in np_arr:
        # Convert the list of numbers (first element) to a tuple so it can be added to a set
        num_tuple = tuple(item[0])
        if num_tuple not in seen:
            unique_arr.append(item)
            seen.add(num_tuple)
    
    return np.array(unique_arr, dtype=object)

In [8]:
def generate_action(state,agent_1, agent_2):
   if state['current_player'] == 0:
      func = agent_1['func']
      arg = agent_1['arg']
      return func(state, arg)

   if state['current_player'] == 1:
      func = agent_2['func']
      arg = agent_2['arg']
      return func(state, arg)

In [9]:
def execute_episode(state,  agent_1, agent_2,show=False):
    state = copy.deepcopy(state)
    reward = [0,0,0]
    path = []
    train_example_policy = []
    train_example_value = []

    while True:
        action = generate_action(state, agent_1, agent_2)
        
        if is_illegal_move(state, action):
            continue

        best_action, value = minimax_value(state, agent_2['arg'])
        train_example_value.append([format_state(state), value])
        train_example_policy.append([format_state(state), best_action])
        state, new_reward = play(state, action)
        reward = assign_reward(reward, new_reward)
        if show:
            print_game_play(state, reward, action)

        if end_game(state):
            break
        path.append(action)

    return  train_example_value, train_example_policy

In [12]:
import pandas as pd
import os

def add_data_to_csv(file_name, new_data, y_column=None):
    columns = ['Pit_1', 'Pit_2', 'Pit_3','Pit_4', 'Pit_5', 'Pit_6', 'Pit_7', 'Pit_8', 'Pit_9', 'Pit_10', 'Pit_11', 'Pit_12', 'current_player', 'player_territory', y_column]  # Define columns (used only if file doesn't exist)
    """
    Adds new data to a CSV file by first converting the new data and the existing data to a DataFrame.

    Parameters:
    - file_name: The name of the CSV file (e.g., 'data.csv').
    - new_data: A list or tuple representing a new row (or multiple rows) to be added.
    - columns: A list of column names for the CSV file (used when the file does not already exist).
    """
    # Check if the CSV file already exists
    if os.path.exists(file_name):
        # Load existing CSV into a DataFrame
        df_existing = pd.read_csv(file_name)
    else:
        # If the file doesn't exist, create an empty DataFrame with the specified columns
        df_existing = pd.DataFrame(columns=columns)

    # Create a DataFrame from the new data
    # Check if new_data is a list of lists (multiple rows) or a single row
    if isinstance(new_data[0], (list, tuple)):
        df_new = pd.DataFrame(new_data, columns=columns)
    else:
        df_new = pd.DataFrame([new_data], columns=columns)

    # Append the new data to the existing DataFrame
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)

    # Write the combined DataFrame back to the CSV file
    df_combined.to_csv(file_name, index=False)


In [13]:
def generate_training_data():
    num_of_eps = 100
    value_data = []
    policy_data = []
    state = {
   # 'board' :[6, 6, 2, 7, 1, 6, 1, 6, 6, 6, 0, 1],
   'board' :[4,4,4,4,4,4,4,4,4,4,4,4],
   'current_player': 0,
   'player_territory': (0,6)
    }
    
    agent_1 = {
    'func': random_agent,
    'arg': {},
    'name': 'random_agent',
    'elo': 1200
    }
        
    agent_2 = {
    'func': minimax_agent,
    'arg': {
        'max_dept': 9,
    },
    'name': 'minimax_agent_dept_3',
    'elo': 1200
    }

    for i in range(num_of_eps):
        state['current_player'] = i%2
        state_value_data, state_policy_data = execute_episode(state,  agent_1, agent_2)
        # Example usage
        add_data_to_csv(file_name='value_data.csv', new_data=state_value_data, y_column='value')
        add_data_to_csv(file_name='policy_data.csv', new_data=state_policy_data, y_column='policy')
        value_data.extend(state_value_data)
        policy_data.extend(state_policy_data)
        print((i/num_of_eps) * 100,'%')
        
    return remove_duplicates(value_data), remove_duplicates(policy_data)


In [227]:
value_data, policy_data = generate_training_data()
len(value_data)

state_value_data:  [[[4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 6], 0], [[6, 0, 1, 6, 6, 2, 7, 1, 6, 1, 6, 6, 1, 6], -12], [[10, 0, 0, 1, 9, 2, 10, 0, 0, 3, 0, 1, 0, 6], -8], [[11, 1, 1, 0, 0, 3, 11, 1, 1, 0, 1, 2, 1, 6], -8], [[12, 2, 2, 1, 1, 0, 0, 2, 2, 1, 2, 3, 0, 6], -4], [[12, 2, 0, 2, 0, 1, 1, 2, 2, 1, 2, 3, 1, 6], -12], [[1, 0, 1, 3, 1, 2, 2, 3, 3, 2, 1, 1, 0, 6], -4], [[1, 0, 0, 0, 1, 2, 2, 3, 3, 2, 1, 1, 1, 6], -8], [[1, 0, 0, 0, 1, 2, 0, 0, 0, 2, 1, 1, 0, 6], 0], [[1, 0, 0, 0, 0, 0, 1, 1, 1, 2, 1, 1, 1, 6], -8], [[0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 2, 2, 0, 6], -8], [[0, 0, 0, 1, 1, 0, 0, 0, 2, 0, 2, 2, 1, 6], -8], [[1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 3, 0, 6], -8], [[1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 3, 1, 6], -8], [[1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 3, 0, 6], -8], [[1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 3, 1, 6], -8], [[1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 3, 0, 6], -8], [[1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 3, 1, 6], -8], [[1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 3, 0, 6], -8], [[0, 0, 1, 1, 1, 0, 0, 0

1455

In [228]:
value_model.train(value_data)

Epoch 1/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.0754 - loss: 53.1424 - val_accuracy: 0.2929 - val_loss: 31.5358 - learning_rate: 0.0010
Epoch 2/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2147 - loss: 32.9959 - val_accuracy: 0.2975 - val_loss: 26.8407 - learning_rate: 0.0010
Epoch 3/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2026 - loss: 30.8113 - val_accuracy: 0.2975 - val_loss: 24.8409 - learning_rate: 0.0010
Epoch 4/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.2069 - loss: 27.5998 - val_accuracy: 0.2975 - val_loss: 22.4870 - learning_rate: 0.0010
Epoch 5/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.2091 - loss: 25.7180 - val_accuracy: 0.2975 - val_loss: 20.8843 - learning_rate: 0.0010
Epoch 6/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [229]:
policy_model.train(policy_data)

Epoch 1/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.2139 - loss: 2.4220 - val_accuracy: 0.2586 - val_loss: 2.2734 - learning_rate: 0.0010
Epoch 2/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.3133 - loss: 2.1527 - val_accuracy: 0.3249 - val_loss: 2.1544 - learning_rate: 0.0010
Epoch 3/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.3852 - loss: 2.0512 - val_accuracy: 0.3295 - val_loss: 2.0877 - learning_rate: 0.0010
Epoch 4/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3698 - loss: 1.9958 - val_accuracy: 0.3455 - val_loss: 2.0323 - learning_rate: 0.0010
Epoch 5/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.3990 - loss: 1.9245 - val_accuracy: 0.3730 - val_loss: 1.9711 - learning_rate: 0.0010
Epoch 6/200
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

In [None]:
value_model_file = 'latest_value_model.keras'
policy_model_file = 'latest_policy_model.keras'

In [237]:
# play games
# store outcome of games
# train model
data = [
    [6, 6, 0, 1, 6, 6, 2, 7, 1, 6, 1, 6, 0, 6],# 4
    [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 6],# 0
    [3, 1, 3, 0, 2, 1, 0, 2, 2, 0, 2, 0, 0, 6],# 0
    [0, 0, 0, 0, 1, 1, 0, 1, 3, 1, 0, 1, 1, 6],# -8
    [2, 0, 3, 1, 3, 1, 2, 2, 1, 2, 3, 12, 1, 6],# -8
]
# data = [[0, 0, 3, 11, 1, 1, 0, 1, 2, 11, 1, 1, 1, 6]]
value_model.predict(data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step


array([[-0.5171886 ],
       [-0.60279393],
       [-0.45811522],
       [ 0.17267704],
       [-2.2415743 ]], dtype=float32)

In [14]:
value_model_file = 'latest_value_model.keras'
policy_model_file = 'latest_policy_model.keras'

In [238]:
# play games
# store outcome of games
# train model
model = load_model(policy_model_file)
data = [
    [6, 6, 0, 1, 6, 6, 2, 7, 1, 6, 1, 6, 0, 6],# 5
    [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 1, 6],# 0
    [3, 1, 3, 0, 2, 1, 0, 2, 2, 0, 2, 0, 0, 6],# 0
    [0, 0, 0, 0, 1, 1, 0, 1, 3, 1, 0, 1, 1, 6],# 
    [2, 0, 3, 1, 3, 1, 2, 2, 1, 2, 3, 12, 1, 6],# 
]
# data = [[0, 0, 3, 11, 1, 1, 0, 1, 2, 11, 1, 1, 1, 6]]
model.predict(data)
policy_model.predict(data)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step


array([[5.2679342e-01, 7.1213566e-02, 6.4253244e-03, 1.2586103e-01,
        4.5541286e-02, 3.1446680e-02, 2.6536989e-04, 8.5751287e-04,
        6.3383617e-02, 2.0111579e-02, 8.8617474e-02, 1.9483132e-02],
       [1.7417115e-01, 1.3089302e-01, 2.0366447e-02, 1.3953571e-01,
        7.4816070e-02, 1.3296317e-01, 4.3573100e-03, 2.2769095e-03,
        3.4483958e-02, 9.6534640e-02, 1.6738093e-01, 2.2220707e-02],
       [2.2504026e-01, 9.8554231e-02, 5.9721295e-02, 1.2214665e-01,
        6.1137985e-02, 1.2365016e-01, 5.4713435e-02, 2.4853293e-02,
        4.9331594e-02, 7.9850055e-02, 6.2100124e-02, 3.8900811e-02],
       [1.8599790e-01, 4.2891502e-02, 7.8129895e-02, 1.6894448e-01,
        7.0407063e-02, 9.9496409e-02, 1.7387049e-02, 3.2284297e-02,
        3.3723067e-02, 1.1094923e-01, 1.2614696e-01, 3.3642218e-02],
       [3.4366849e-01, 1.5593900e-02, 6.8957299e-02, 8.6609103e-02,
        1.4749469e-02, 3.8453758e-02, 3.6670384e-03, 1.1586434e-02,
        5.8926877e-02, 3.0489638e-02, 3.1972