# Reinforcement Learning Agent

In [1]:
!pip install imageio
!pip install PILLOW
!pip install pyglet
!pip install pyvirtualdisplay
!pip install dm-acme



In [1]:
import numpy as np
import tensorflow as tf

In [2]:
import os
import sys
sys.path.insert(0, os.path.join(os.getcwd(), '..'))
from Game.AI.Ai import AI
from Game.Board import GameBoard
from Game.PlayerEnum import PlayerTurn
from Game.AI.ReinforcementAgent import ReinforcementAI
from Core.Index import Index

In [13]:
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense, merge
import numpy
import os

In [14]:
def create_new_model():
    model = Sequential()
    model.add(Dense(64, activation='relu', kernel_initializer='random_uniform', bias_initializer='zeros'))
    model.add(Dense(32, activation='relu', kernel_initializer='random_uniform', bias_initializer='zeros'))
    model.add(Dense(16, activation='sigmoid', kernel_initializer='random_uniform', bias_initializer='zeros'))
    model.add(Dense(8, activation='relu', kernel_initializer='random_uniform', bias_initializer='zeros'))
    model.add(Dense(1, activation='sigmoid', kernel_initializer='random_uniform', bias_initializer='zeros'))
    
    return model

In [15]:
def compete_models(model1, model2) -> tuple[int, GameBoard]:
    """If this is true player1 won, otherwise player 2 won"""
    
    # Black player
    agent1 = ReinforcementAI(model=model1)
    
    # White player
    agent2 = ReinforcementAI(model=model2)
    
    #Create game items
    game_board = GameBoard()
    
    print("Game Start")

    # Actual gameplay
    while game_board.get_winner() is None:
        if game_board.current_turn == PlayerTurn.BLACK:
            move = agent1.generate_move(game_board)
        if game_board.current_turn == PlayerTurn.WHITE:
            move = agent2.generate_move(game_board)
        x, y = Index.from_zero_based(move[0]), Index.from_zero_based(move[1])
        game_board.place(x, y)
        
    # After game is complete
    print("Game Complete")
    black, white = game_board.get_score()
    result = int(black > white) if black != white else -1
    return result, game_board

In [11]:
%%time
"""Training the agent"""

# Hyper parameters
NO_PLAYERS = 7
GENERATIONS = 20

# Name to save the data
TOP_NAME = "./player_model"

# Do not change this
TOP_X = 2

# Create Players
players = [create_new_model() for _ in range(NO_PLAYERS)]
top_player = None
try:
    top_player = load_model(TOP_NAME)
    players[0] = top_player
except OSError:
    pass

for generation in range(GENERATIONS):
    
    print(f"Training Generation {generation}")
    
    # Score board to keep track
    score = {}
    for n in range(NO_PLAYERS):
        score[n] = 0
    
    # Compete players with one another
    for m1 in range(NO_PLAYERS):
        for m2 in range(m1 + 1, NO_PLAYERS):
            print(f"Player {m1} vs Player {m2}")
            p1 = players[m1]
            p2 = players[m2]
            winner, board= compete_models(p1, p2)
            b, w = board.get_score()
            if winner == 1:
                score[m1] += 1
            elif winner == 0:
                score[m2] += 1
            print(f"Game Score: {board.get_score()}")
            print(f"Currect Score: {score}")
            print(f"Current Board: {board.board}")
        if 7 in score.values():
            break
    
    # Get the player with the highest score
    sorted_index = sorted(
                tuple(
                    map(
                        tuple,
                        score.items()
                    )
                ),
                key=lambda x: x[1],
                reverse=True
            )
    print(f"Sorted Index: {sorted_index}")
    sorted_players = list(
        map(
            lambda x: players[x[0]],
            sorted_index
        )
    )[:TOP_X]
    
    # Save the file of the top player
    top = players[0]
    top.save(TOP_NAME)
    
    #sorted_players += cross_breed(*sorted_players)
    players = sorted_players + [create_new_model() for _ in range(NO_PLAYERS - len(sorted_players))]
    
# Save the file of the top player
top = players[0]
top.save(TOP_NAME)

Training Generation 0
Player 0 vs Player 1
Game Start
Game Complete
Game Score: (33, 31)
Currect Score: {0: 1, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0}
Current Board: [[-1. -1. -1.  1.  1.  1.  1.  1.]
 [-1. -1. -1. -1.  1.  1.  1.  1.]
 [-1.  1. -1.  1. -1.  1.  1.  1.]
 [-1.  1. -1.  1. -1. -1.  1.  1.]
 [-1.  1.  1. -1.  1.  1. -1.  1.]
 [-1.  1. -1.  1. -1.  1. -1.  1.]
 [-1.  1.  1. -1.  1. -1.  1.  1.]
 [-1. -1. -1. -1. -1. -1. -1.  1.]]
Player 0 vs Player 2
Game Start
Game Complete
Game Score: (33, 31)
Currect Score: {0: 2, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0}
Current Board: [[-1. -1. -1.  1.  1.  1.  1.  1.]
 [-1. -1. -1. -1.  1.  1.  1.  1.]
 [-1.  1. -1.  1. -1.  1.  1.  1.]
 [-1.  1. -1.  1. -1. -1.  1.  1.]
 [-1.  1.  1. -1.  1.  1. -1.  1.]
 [-1.  1. -1.  1. -1.  1. -1.  1.]
 [-1.  1.  1. -1.  1. -1.  1.  1.]
 [-1. -1. -1. -1. -1. -1. -1.  1.]]
Player 0 vs Player 3
Game Start
Game Complete
Game Score: (41, 0)
Currect Score: {0: 3, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0}
Current Boar

KeyboardInterrupt: 