# New Game Play

In [1]:
import copy
import random
from json.encoder import INFINITY
import pandas as pd
import math
from ayo_game import play, is_illegal_move, assign_reward, print_game_play, end_game
from agents import random_agent as ra
from agents import minimax_agent as ma
seed = 37

randint = random.randint
random.seed(seed)

## Agents

In [2]:
random_agent = ra.agent
minimax_agent = ma.agent
# minimax_agent = ma.agent

## Simulation

In [3]:
def generate_action(state,agent_1, agent_2):
   if state['current_player'] == 0:
      func = agent_1['func']
      arg = agent_1['arg']
      return func(state, arg)

   if state['current_player'] == 1:
      func = agent_2['func']
      arg = agent_2['arg']
      return func(state, arg)


In [13]:
def simulate_game(state,  agent_1, agent_2,show=False):
    state = copy.deepcopy(state)
    reward = [0,0,0]
    path = []

    while True:
        action = generate_action(state, agent_1, agent_2)

        if is_illegal_move(state, action):
            continue

        state, new_reward = play(state, action)
        reward = assign_reward(reward, new_reward)
        if show:
            print_game_play(state, reward, action)

        if end_game(state):
            break
        path.append(action)

    return  (reward, path)

In [14]:
def get_valid_actions_mct(state):
    board = state['board']
    territory = state['player_territory'][1]
    current_player = state['current_player']
    valid_actions = []
    for i,a in enumerate(board):
        if current_player == 0 and i < territory and a != 0:
                    valid_actions.append(i)
        if current_player == 1 and i >= territory and a != 0:
                    valid_actions.append(i)
    return valid_actions

In [None]:
def format_state(state, action):
    board = state['board']
    data = []
    for i,pit in enumerate(board):
        num = str(i)
        data[f'pit {num}'] = pit
    data['current_player'] = state['current_player']
    data['player_territory'] = state['player_territory'][1]
    data['action'] = action
    return [data]

In [None]:
state = {
   'board' :[4,4,4,4,4,4,4,4,4,4,4,4],
   'current_player': 0,
   'player_territory': (0,6)
}

action = 0
format_state(state, action)

## Elo

In [None]:
def update_score(actual_result, expected_result):
    k = 20
    return k * (actual_result-expected_result)

def expected_score(elo_1,elo_2):
    diff = elo_2 - elo_1
    a = diff/400
    d = 1 + 10**a
    return 1/d

def calculate_elo(elo_0,elo_1,result):
    expected_result = expected_score(elo_0,elo_1)
    change = update_score(result, expected_result)

    new_elo_0 = elo_0 + change
    new_elo_1 = elo_1 - change

    return (new_elo_0, new_elo_1)

## Deep Reinforcement Learning (DRL)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
# from tensorflow.keras.models import
from tensorflow.keras.layers import Dense

In [44]:
def format_state(state):
    x = []
    x.extend(state['board'])
    x.append(state['current_player'])
    x.append(state['player_territory'][1])

    return x

In [80]:
class ayo_drl_model:
    def __init__(self, X_shape):
      self.num_actions=12
      self.model = Sequential([
          Dense(64, input_dim=X_shape, activation='relu'),  # First hidden layer
          Dense(32, activation='relu'),  # Second hidden layer
          Dense(self.num_actions, activation='softmax')  # Output layer for 12 possible actions
      ])

            # Compile the model
      self.model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    def train_policy(self, training_examples):
      X = np.array([d[0] for d in training_examples])
      y = np.array([d[1] for d in training_examples])

      # Convert target (y) to categorical (for classification)
      num_actions = self.num_actions # Assuming 12 possible actions (0 to 11)
      y = tf.keras.utils.to_categorical(y, num_classes=num_actions)

      # Train the model (Assume more training data is available)
      # For example purposes, we'll use the same sample data multiple times
      self.model.fit(X, y, epochs=10, batch_size=1)


    def predict(self,data):
      # Predict action for a new data point
      formatted_data = np.array([format_state(data)])
      prediction = self.model.predict(formatted_data)
      return prediction


In [None]:
def ayo_drl_agent(state,arg):
    model = arg['model']

    action = model.predict(state)
    print(prediction)
    predicted_action = np.argmax(prediction)
    print(f"Predicted action: {predicted_action}")
    return np.argmax(action)

## Test

### Agents Details

In [81]:
random_agent_details = {
   'func': random_agent,
   'arg': {},
   'name': 'random_agent',
   'elo': 1200
}

mcts_agent_details_10 = {
   'func': mcts_agent,
   'arg': {
      'max_iterations': 10,
   },
   'name': 'mcts_agent_10',
   'elo': 1200
}

mcts_agent_details_100 = {
   'func': mcts_agent,
   'arg': {
      'max_iterations': 100,
   },
   'name': 'mcts_agent_100',
   'elo': 1200
}

mcts_agent_details_1000 = {
   'func': mcts_agent,
   'arg': {
      'max_iterations': 1000,
   },
   'name': 'mcts_agent_1000',
   'elo': 1200
}

minimax_agent_details_3 = {
   'func': minimax_agent,
   'arg': {
      'max_dept': 3,
   },
   'name': 'minimax_agent_dept_3',
   'elo': 1200
}

minimax_agent_details_6 = {
   'func': minimax_agent,
   'arg': {
      'max_dept': 6,
   },
   'name': 'minimax_agent_dept_6',
   'elo': 1200
}

minimax_agent_details_9 = {
   'func': minimax_agent,
   'arg': {
      'max_dept': 9,
   },
   'name': 'minimax_agent_dept_9',
   'elo': 1200
}

dlr_agent_details = {
   'func': ayo_drl_agent,
   'arg': {
      'model': ayo_drl_model,
   },
   'name': 'dlr_agent_details',
   'elo': 1200
}

NameError: name 'random_agent' is not defined

In [None]:
def match_up(origin_state,number_of_games, player_1, player_2 ):
   results = {
         'player_1_name': player_1['name'],
         'player_2_name': player_2['name'],
         'player_1_wins': 0,
         'player_2_wins': 0,
         'ties': 0,
         'player_1_elo': player_1['elo'],
         'player_2_elo': player_2['elo'],
      }

   for i in range(number_of_games):
      reward, path = simulate_game(origin_state, player_1, player_2)
      # paths.append([reward,path])

      if reward[0] > reward[1]:
         results['player_1_wins'] += 1
         elo_0, elo_1 = calculate_elo(player_1['elo'],player_2['elo'],1)
      elif reward[0] < reward[1]:
         results['player_2_wins'] += 1
         elo_0, elo_1 = calculate_elo(player_1['elo'],player_2['elo'],0)
      elif reward[0] == reward[1]:
         results['ties'] += 1
         elo_0, elo_1 = calculate_elo(player_1['elo'],player_2['elo'],0.5)

      player_1['elo'],player_2['elo'] = (elo_0, elo_1)

   results['player_1_elo'] = player_1['elo']
   results['player_2_elo'] = player_2['elo']
   results['player_1_wins'] = results['player_1_wins']/number_of_games * 100
   results['player_2_wins'] = results['player_2_wins']/number_of_games * 100
   results['ties'] = results['ties']/number_of_games * 100

   return results

In [None]:
train_examples_policy

random_agent wins: 0.0
minimax_agent_dept_3 wins: 98.0
ties:  2.0
random_agent elo:  940.7576240420559
minimax_agent_dept_3 elo:  1459.2423759579453
-----------------------------------------
random_agent wins: 0.0
minimax_agent_dept_6 wins: 98.0
ties:  2.0
random_agent elo:  803.6745837740348
minimax_agent_dept_6 elo:  1337.0830402680217
-----------------------------------------
random_agent wins: 0.0
minimax_agent_dept_9 wins: 100.0
ties:  0.0
random_agent elo:  701.4395706632646
minimax_agent_dept_9 elo:  1302.23501311077
-----------------------------------------
random_agent wins: 15.0
mcts_agent_10 wins: 65.0
ties:  20.0
random_agent elo:  825.023880098059
mcts_agent_10 elo:  1076.4156905652064
-----------------------------------------
random_agent wins: 16.0
mcts_agent_100 wins: 62.0
ties:  22.0
random_agent elo:  907.5428721280138
mcts_agent_100 elo:  1117.4810079700449
-----------------------------------------
random_agent wins: 19.0
mcts_agent_1000 wins: 56.99999999999999
ties:

In [None]:
# random_agent
# 858.3045276282141
# minimax_agent_dept_3
# 1681.9156371155366
# minimax_agent_dept_6
# 1503.6793796282027
# minimax_agent_dept_9
# 1537.041191594447
# mcts_agent_10
# 986.7838763502426
# mcts_agent_100
# 963.4735214134787
# mcts_agent_1000
# 868.8018662698848

In [None]:
df

### Policy Network

In [None]:
# def exceute_episode(self):

#         train_examples = []
#         current_player = 1
#         state = self.game.get_init_board()

#         while True:
#             canonical_board = self.game.get_canonical_board(state, current_player)

#             self.mcts = MCTS(self.game, self.model, self.args)
#             root = self.mcts.run(self.model, canonical_board, to_play=1)

#             action_probs = [0 for _ in range(self.game.get_action_size())]
#             for k, v in root.children.items():
#                 action_probs[k] = v.visit_count

#             action_probs = action_probs / np.sum(action_probs)
#             train_examples.append((canonical_board, current_player, action_probs))

#             action = root.select_action(temperature=0)
#             state, current_player = self.game.get_next_state(state, current_player, action)
#             reward = self.game.get_reward_for_player(state, current_player)

#             if reward is not None:
#                 ret = []
#                 for hist_state, hist_current_player, hist_action_probs in train_examples:
#                     # [Board, currentPlayer, actionProbabilities, Reward]
#                     ret.append((hist_state, hist_action_probs, reward * ((-1) ** (hist_current_player != current_player))))

#                 return ret

In [None]:
def format_data(state, action):
    x = []
    y = action
    x.extend(state['board'])
    x.append(state['current_player'])
    x.append(state['player_territory'][1])

    return (x,y)

In [None]:
def exceute_episode(state,  agent_1, agent_2,show=False):
    state = copy.deepcopy(state)
    train_examples_policy = []
    train_examples_value = []
    reward = [0,0,0]
    path = []

    while True:
        action = generate_action(state, agent_1, agent_2)
        train_examples_policy.append(format_data(state, action))

        if is_illegal_move(state, action):
            continue

        state, new_reward = play(state, action)
        reward = assign_reward(reward, new_reward)
        if show:
            print_game_play(state, reward, action)

        if end_game(state):
            break
        path.append(action)

    # print(train_examples_policy)

    for hist_state, _ in train_examples_policy:
        # [Board, currentPlayer, actionProbabilities, Reward]
        train_examples_value.append((hist_state, reward[hist_state[12]]))

    return  (train_examples_policy, train_examples_value)

In [None]:
def match_up(origin_state,number_of_games, player_1, player_2 ):
   results = {
         'player_1_name': player_1['name'],
         'player_2_name': player_2['name'],
         'player_1_wins': 0,
         'player_2_wins': 0,
         'ties': 0,
         'player_1_elo': player_1['elo'],
         'player_2_elo': player_2['elo'],
      }

   for i in range(number_of_games):
      reward, path = simulate_game(origin_state, player_1, player_2)
      # paths.append([reward,path])

      if reward[0] > reward[1]:
         results['player_1_wins'] += 1
         elo_0, elo_1 = calculate_elo(player_1['elo'],player_2['elo'],1)
      elif reward[0] < reward[1]:
         results['player_2_wins'] += 1
         elo_0, elo_1 = calculate_elo(player_1['elo'],player_2['elo'],0)
      elif reward[0] == reward[1]:
         results['ties'] += 1
         elo_0, elo_1 = calculate_elo(player_1['elo'],player_2['elo'],0.5)

      player_1['elo'],player_2['elo'] = (elo_0, elo_1)

   results['player_1_elo'] = player_1['elo']
   results['player_2_elo'] = player_2['elo']
   results['player_1_wins'] = results['player_1_wins']/number_of_games * 100
   results['player_2_wins'] = results['player_2_wins']/number_of_games * 100
   results['ties'] = results['ties']/number_of_games * 100

   return results

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
# from tensorflow.keras.models import
from tensorflow.keras.layers import Dense

In [None]:
X = np.array([[d['pit 0'], d['pit 1'], d['pit 2'], d['pit 3'], d['pit 4'], d['pit 5'],
               d['pit 6'], d['pit 7'], d['pit 8'], d['pit 9'], d['pit 10'], d['pit 11'],
               d['current_player'], d['player_territory']] for d in data])

y = np.array([d['action'] for d in data])


# Convert target (y) to categorical (for classification)
num_actions = 12  # Assuming 12 possible actions (0 to 11)
y = tf.keras.utils.to_categorical(y, num_classes=num_actions)

# Build the model
model = Sequential([
    Dense(64, input_dim=X.shape[1], activation='relu'),  # First hidden layer
    Dense(32, activation='relu'),  # Second hidden layer
    Dense(num_actions, activation='softmax')  # Output layer for 12 possible actions
])


# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model (Assume more training data is available)
# For example purposes, we'll use the same sample data multiple times
model.fit(X, y, epochs=10, batch_size=1)

In [None]:
# Convert target (y) to categorical (for classification)
num_actions = 12  # Assuming 12 possible actions (0 to 11)
y = tf.keras.utils.to_categorical(y, num_classes=num_actions)

In [None]:
# Build the model
model = Sequential([
    Dense(64, input_dim=X.shape[1], activation='relu'),  # First hidden layer
    Dense(32, activation='relu'),  # Second hidden layer
    Dense(num_actions, activation='softmax')  # Output layer for 12 possible actions
])

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model (Assume more training data is available)
# For example purposes, we'll use the same sample data multiple times
model.fit(X, y, epochs=10, batch_size=1)

In [None]:
# Predict action for a new data point
new_data = np.array([[4, 0, 4, 0, 4, 0, 0, 1, 2, 0, 10, 4, 0, 6]])
prediction = model.predict(new_data)
print(prediction)
predicted_action = np.argmax(prediction)
print(f"Predicted action: {predicted_action}")