In [388]:
import numpy as np
import random
import tensorflow as tf
from tqdm import tqdm
from tensorflow import keras

In [249]:
class GameState():
    
    all_moves = ['alr', 'arl', 'arr', 'all', 's1', 's2', 's3', 's-1', 's-2', 's-3']
    moves_dict = dict(zip(all_moves, np.arange(len(all_moves))))
    
    def __init__(self, playerleft = 1, playerright = 1, oppleft = 1, oppright = 1, turn = 0):
        self.player = {"l": playerleft, "r": playerright}
        self.opponent = {"l": oppleft, "r": oppright}
        self.turn = turn # 0 for player, 1 for opponent
        self.turn_number = 0
    
    def get_state(self):
        return [self.player["l"], self.player["r"], self.opponent["l"], self.opponent["r"], self.turn, self.turn_number / 100]
    
    def __str__(self):
        printleft = self.player["l"]
        printright = self.player["r"]
        printoppleft = self.opponent["l"]
        printoppright = self.opponent["r"]
        
        
        if printleft == 0:
            printleft = "~"
        
        if printright == 0:
            printright = "~"
            
        if printoppleft == 0:
            printoppleft = "~"
            
        if printoppright == 0:
            printoppright = "~"
        
        
        return "Player: " + str(printleft) + " " + str(printright) + " Opponent: " + str(printoppleft) + " " + str(printoppright)
    
    def hand_order(self):
        if self.player["l"] > self.player["r"]:
            self.player["l"], self.player["r"] = self.player["r"], self.player["l"]
        if self.opponent["l"] > self.opponent["r"]:
            self.opponent["l"], self.opponent["r"] = self.opponent["r"], self.opponent["l"]
    
    def play_turn(self, string):
        if string[0] == "a":
            return self.play_attack_turn(string[1], string[2])
        elif string[0] == "s":
            return self.play_shift_turn(int(string[1:]))
        elif string[0] == "r":
            vmoves = self.get_valid_moves()
            move = random.choice(vmoves)
            print("Move:", move)
            return self.play_turn(move)
    
    def play_attack_turn(self, turnhand, receivehand):
        
        assert turnhand in ["l", "r"], "Invalid hand"
        assert receivehand in ["l", "r"], "Invalid hand"
        
        
        if self.turn == 0:
            if self.player[turnhand] == 0:
                print("Cannot attack with an empty hand!")
                return False

            if self.opponent[receivehand] == 0:
                print("Cannot hit an empty hand!")
                return False
            
            self.opponent[receivehand] = (self.player[turnhand] + self.opponent[receivehand]) % 5
                    
        elif self.turn == 1:
            if self.opponent[turnhand] == 0:
                print("Cannot attack with an empty hand!")
                return False

            if self.player[receivehand] == 0:
                print("Cannot hit an empty hand!")
                return False
            
            self.player[receivehand] = (self.player[receivehand] + self.opponent[turnhand]) % 5
        
        self.turn = 1 - self.turn
        self.turn_number += 1
        return True
    
    def play_shift_turn(self, torightcount):
        
        
        if self.turn == 0: 
            tempr = self.player["r"] + torightcount
            templ = self.player["l"] - torightcount
            
            if (tempr < 0) or (templ < 0) or (tempr >= 5) or (templ >= 5):
                print("Invalid move!")
                return False
            else:
                self.player["r"] = tempr
                self.player["l"] = templ
            
        elif self.turn == 1:
            tempr = self.opponent["r"] + torightcount
            templ = self.opponent["l"] - torightcount
            
            if (tempr < 0) or (templ < 0) or (tempr >= 5) or (templ >= 5):
                print("Invalid move!")
                return False
            else:
                self.opponent["r"] = tempr
                self.opponent["l"] = templ
        
        self.turn = 1 - self.turn
        self.turn_number += 1
        return True
    
    def is_game_over(self):
        if self.player["l"] == 0 and self.player["r"] == 0:
            return "opponent"
        elif self.opponent["l"] == 0 and self.opponent["r"] == 0:
            return "player"
        elif self.turn_number >= 100:
            return "limit"
        else:
            return 0
    
    def get_valid_moves(self):
        
        vmoves = []

        if self.turn == 0:
            
            if self.player["l"] != 0:
                if self.opponent["l"] != 0:
                    vmoves.append("all")
                if self.opponent["r"] != 0:
                    vmoves.append("alr")
                
                for i in np.arange(1, self.player["l"] + 1):
                    if self.player["r"] + i < 5:
                        vmoves.append("s" + str(i))
                
            if self.player["r"] != 0:
                if self.opponent["l"] != 0:
                    vmoves.append("arl")
                if self.opponent["r"] != 0:
                    vmoves.append("arr")
    
                for i in range(1, self.player["r"] + 1):
                    if self.player["l"] + i < 5:
                        vmoves.append("s" + str(-i))
            
            d = self.player["r"] - self.player["l"]
            if d != 0:
                vmoves.remove("s" + str(-d))
            
        elif self.turn == 1:
            if self.opponent["l"] != 0:
                if self.player["l"] != 0:
                    vmoves.append("all")
                if self.player["r"] != 0:
                    vmoves.append("alr")
                
                for i in np.arange(1, self.opponent["l"] + 1):
                    if self.opponent["r"] + i < 5:
                        vmoves.append("s" + str(i))
                
            if self.opponent["r"] != 0:
                if self.player["l"] != 0:
                    vmoves.append("arl")
                if self.player["r"] != 0:
                    vmoves.append("arr")
    
                for i in range(1, self.opponent["r"] + 1):
                    if self.opponent["l"] + i < 5:
                        vmoves.append("s" + str(-i))

            d = self.opponent["r"] - self.opponent["l"]
            if d != 0:
                vmoves.remove("s" + str(-d))
            
        return vmoves

In [250]:

class Agent():
    def reset(self):
        pass
    def reward(self, value):
        pass
    
class RandomAgent(Agent):
    def __init__(self):
        self.total_reward = 0
    
    def move(self, state):
        return random.choice(state.get_valid_moves())
    
    def reward(self, value):
        self.total_reward += value
        return value

class HumanAgent(Agent):
    def __init__(self):
        self.total_reward = 0
    
    def ask_turn(self, gs):
        print("Enter your move: ", end = '')
        res = input()
        print(res)
        if res == "p":
            print(gs)
            return self.ask_turn(gs)
        if res == "q":
            return False
        
        vmoves = gs.get_valid_moves()
        if res == "g":
            print("Valid Moves:", vmoves)
            return self.ask_turn(gs)
        if res not in vmoves:
            print("Invalid move!")
            return self.ask_turn(gs)
        return res
    
    def move(self, state):
        res = self.ask_turn(state)
        if res:
            return res
        else:
            print("Quitting game!")
            return False
    
    def reward(self, value):
        self.total_reward += value
        return value

class SmartAgent(Agent):
    def __init__(self):
        self.total_reward = 0
    
    def move(self, state):
        moves = state.get_valid_moves()
        moves2 = moves.copy()
        shiftmoves = [i for i in moves if i[0] == "s"]
        
        if state.turn == 0:
            for h in ["l", "r"]:
                for h2 in ["l", "r"]:
                    if state.player[h] + state.opponent[h2] == 5:
                        nonh = ["l", "r"]
                        nonh.remove(h)
                        nonh = nonh[0]
                        if (state.player[h] + state.opponent[h] == 5) and (state.player[nonh] == 0):
                            continue
                        else:
                            return "a" + h + h2
                    
            for sh in shiftmoves:
                numsh = int(sh[1:])
                
                if numsh > 0:
                    final_r = state.player["r"] + numsh
                    if (state.opponent["l"] + final_r) % 5 == 0:
                        moves2.remove(sh)
                    elif (state.opponent["r"] + final_r) % 5 == 0:
                        moves2.remove(sh)
                
                if numsh < 0:
                    final_l = state.player["l"] - numsh
                    if (state.opponent["l"] + final_l) % 5 == 0:
                        moves2.remove(sh)
                    elif (state.opponent["r"] + final_l) % 5 == 0:
                        moves2.remove(sh)
            
            moves3 = moves2.copy()
            
            for h in ["l", "r"]:
                for h2 in ["l", "r"]:
                    if (2 * state.player[h] + state.opponent[h2]) % 5 == 0:
                        if "a" + h + h2 in moves2:
                            moves2.remove("a" + h + h2)
                if (state.player["r"] + state.player["l"] + state.opponent[h]) % 5 == 0:
                    if "al" + h in moves2:
                        moves2.remove("al" + h)
                    if "ar" + h in moves2:
                        moves2.remove("ar" + h)
            
            if len(moves2) == 0:
                return random.choice(moves3)
            
            return random.choice(moves2)
        
        if state.turn == 1:
            for h in ["l", "r"]:
                for h2 in ["l", "r"]:
                    if state.opponent[h] + state.player[h2] == 5:
                        nonh = ["l", "r"]
                        nonh.remove(h)
                        nonh = nonh[0]
                        if (state.opponent[h] + state.player[h] == 5) and (state.opponent[nonh] == 0):
                            continue
                        else:
                            return "a" + h + h2
                    
            for sh in shiftmoves:
                numsh = int(sh[1:])
                
                if numsh > 0:
                    final_r = state.opponent["r"] + numsh
                    if (state.player["l"] + final_r) % 5 == 0:
                        moves2.remove(sh)
                    elif (state.player["r"] + final_r) % 5== 0:
                        moves2.remove(sh)
                
                if numsh < 0:
                    final_l = state.opponent["l"] - numsh
                    if (state.player["l"] + final_l) % 5 == 0:
                        moves2.remove(sh)
                    elif (state.player["r"] + final_l) % 5 == 0:
                        moves2.remove(sh)
            
            moves3 = moves2.copy()
            
            for h in ["l", "r"]:
                for h2 in ["l", "r"]:
                    if (2 * state.opponent[h] + state.player[h2]) % 5 == 0:
                        if "a" + h + h2 in moves2:
                            moves2.remove("a" + h + h2)
                if (state.opponent["r"] + state.opponent["l"] + state.player[h]) % 5 == 0:
                    if "al" + h in moves2:
                        moves2.remove("al" + h)
                    if "ar" + h in moves2:
                        moves2.remove("ar" + h)
            
            if len(moves2) == 0:
                return random.choice(moves3)
            
            return random.choice(moves2)
    
    def reward(self, value):
        self.total_reward += value
        return value

In [321]:
def play_single_game(state, playerA, playerB, verbose = False):
    playerA.reset()
    playerB.reset()
    if verbose > 0:
        print(state)
        
    while not state.is_game_over():
        move = playerA.move(state)
        if verbose > 0:
            print("*" * 20)
            print("Player A move:", move)
        state.play_turn(move)
        if verbose > 0:
            print(state)
        
        goc = state.is_game_over()
        if goc != 0:
            if goc == "limit":
                playerA.reward(0)
                playerB.reward(0)
                break
            playerA.reward(1)
            playerB.reward(-1)
            break
        
        move = playerB.move(state)
        if verbose > 0:
            print("*" * 20)
            print("Player B move:", move)
        state.play_turn(move)
        if verbose > 0:
            print(state)
        
        goc = state.is_game_over()
        if goc != 0:
            if goc == "limit":
                playerA.reward(0)
                playerB.reward(0)
                break
            playerA.reward(-1)
            playerB.reward(1)
            break
        
    if verbose < 0:
        print(playerA.q_history[-1])
    return goc

In [365]:
def evaluateAgents(A, B, n = 1000, state = None, verbose = False):
    if state is None:
        state = GameState()
    
    Awin, Bwin, Draw = 0, 0, 0
    for game_no in tqdm(range(n)):
        winner = play_single_game(GameState(), A, B)
        if winner == "player":
            Awin += 1
        elif winner == "opponent":
            Bwin += 1
        elif winner == "limit":
            Draw += 1
    
    if verbose:
        print("*" * 45)
        aw = round(Awin / n * 100, 2)
        bw = round(Bwin / n * 100, 2)
        dw = round(Draw / n * 100, 2)
        
        print(f"* P1 Win: {aw}% | P2 Win: {bw}% | Draw: {dw}% *" )
        print("*" * 45)
    
    return Awin / n, Bwin / n, Draw / n

randA = RandomAgent()
randB = RandomAgent()

evaluateAgents(randA, randB, 10000, verbose=True)
evaluateAgents(randB, randA, 10000, verbose=True)

agentA = SmartAgent()
randB = RandomAgent()

evaluateAgents(agentA, randB, 10000, verbose=True)
evaluateAgents(randB, agentA, 10000, verbose=True)


agentA = SmartAgent()
agentB = SmartAgent()

evaluateAgents(agentA, agentB, 10000, verbose=True)
evaluateAgents(agentB, agentA, 10000, verbose=True)


agentA = SmartAgent()
randB = HumanAgent()

play_single_game(GameState(), randB, agentA, verbose=True)

gs = GameState(2, 3, 2, 2)

In [349]:
class RLAgent(Agent):
    def __init__(self, training = True):
        self.model = tf.keras.Sequential()
        #self.model.add(tf.keras.layers.Dense(6))
        self.model.add(tf.keras.layers.Dense(len(GameState.all_moves)))
        self.model.compile(optimizer='sgd', loss='mean_squared_error')
        self.training = training
        self.last_move = None
        self.gamehistory = []
        self.q_history = []
    
    def predict_q(self, state):
        return self.model.predict(np.array([state.get_state()]), verbose = 0)

    def fit_q(self, raw_state, q_values):
        self.model.fit(raw_state, q_values, verbose = 0)
    
    def reset(self):
        self.last_move = None
        self.gamehistory = []
        self.q_history = []
        
    def move(self, state):
        # always ask the agent to play the same side
        q_values = self.predict_q(state)
        temp_q = q_values.copy()
        
        invalid_move_idx = np.array(
            [GameState.moves_dict[i] 
             for i in GameState.moves_dict 
             if i not in state.get_valid_moves()]
        )
        
        temp_q[:, invalid_move_idx] = temp_q.min() - 1 # no illegal moves

        move = GameState.all_moves[np.argmax(temp_q)]
        value = temp_q.max()
        
        if self.training and self.last_move is not None:
            self.reward(value)
            
        self.gamehistory.append(state.get_state().copy())
        self.q_history.append(q_values) # Should probably be temp_q?
        self.last_move = move
        return move

    
    def reward(self, value):
        if not self.training:
            return
        new_q = self.q_history[-1].copy()
        new_q[:, GameState.moves_dict[self.last_move]] = value
        self.fit_q(np.array([self.gamehistory[-1]]), new_q)

In [356]:
playerA = RLAgent()
playerB = RandomAgent()

In [364]:
playerA.training = False
evaluateAgents(playerA, playerB, n = 100, verbose = True)

*********************************************
* P1 Win: 47.0% | P2 Win: 53.0% | Draw: 0.0% *
*********************************************


(0.47, 0.53, 0.0)

In [366]:
evaluateAgents(RandomAgent(), RandomAgent(), n = 100, verbose = True)

100%|██████████| 100/100 [00:00<00:00, 8132.12it/s]

*********************************************
* P1 Win: 52.0% | P2 Win: 48.0% | Draw: 0.0% *
*********************************************





(0.52, 0.48, 0.0)

In [367]:
playerA.training = True
for game_no in tqdm(range(100)):
    gs = GameState()
    play_single_game(gs, playerA, playerB)

100%|██████████| 100/100 [02:20<00:00,  1.41s/it]


In [368]:
playerA.training = False
evaluateAgents(playerA, playerB, n = 100, verbose = True)

100%|██████████| 100/100 [01:05<00:00,  1.53it/s]

*********************************************
* P1 Win: 58.0% | P2 Win: 42.0% | Draw: 0.0% *
*********************************************





(0.58, 0.42, 0.0)

In [370]:
playerA.training = True
for game_no in tqdm(range(100)):
    gs = GameState()
    play_single_game(gs, playerA, playerB)

100%|██████████| 100/100 [02:03<00:00,  1.23s/it]


In [371]:
playerA.training = False
evaluateAgents(playerA, playerB, n = 100, verbose = True)

100%|██████████| 100/100 [00:55<00:00,  1.81it/s]

*********************************************
* P1 Win: 69.0% | P2 Win: 31.0% | Draw: 0.0% *
*********************************************





(0.69, 0.31, 0.0)

In [372]:
playerA.training = True
for game_no in tqdm(range(100)):
    gs = GameState()
    play_single_game(gs, playerA, playerB)

100%|██████████| 100/100 [02:19<00:00,  1.39s/it]


In [373]:
playerA.training = False
evaluateAgents(playerA, playerB, n = 100, verbose = True)

100%|██████████| 100/100 [01:03<00:00,  1.57it/s]

*********************************************
* P1 Win: 75.0% | P2 Win: 24.0% | Draw: 1.0% *
*********************************************





(0.75, 0.24, 0.01)

In [374]:
playerA.training = True
for game_no in tqdm(range(100)):
    gs = GameState()
    play_single_game(gs, playerA, playerB)

100%|██████████| 100/100 [01:52<00:00,  1.12s/it]


In [375]:
playerA.training = False
evaluateAgents(playerA, playerB, n = 100, verbose = True)

100%|██████████| 100/100 [01:05<00:00,  1.53it/s]

*********************************************
* P1 Win: 76.0% | P2 Win: 23.0% | Draw: 1.0% *
*********************************************





(0.76, 0.23, 0.01)

In [376]:
playerA.training = True
for game_no in tqdm(range(100)):
    gs = GameState()
    play_single_game(gs, playerA, playerB)

100%|██████████| 100/100 [02:04<00:00,  1.24s/it]


In [377]:
playerA.training = False
evaluateAgents(playerA, playerB, n = 100, verbose = True)

100%|██████████| 100/100 [01:06<00:00,  1.51it/s]

*********************************************
* P1 Win: 81.0% | P2 Win: 19.0% | Draw: 0.0% *
*********************************************





(0.81, 0.19, 0.0)

In [378]:
playerA.training = True
for game_no in tqdm(range(100)):
    gs = GameState()
    play_single_game(gs, playerA, playerB)

100%|██████████| 100/100 [02:01<00:00,  1.21s/it]


In [379]:
playerA.training = False
evaluateAgents(playerA, playerB, n = 100, verbose = True)

100%|██████████| 100/100 [00:59<00:00,  1.67it/s]

*********************************************
* P1 Win: 87.0% | P2 Win: 12.0% | Draw: 1.0% *
*********************************************





(0.87, 0.12, 0.01)

In [380]:
playerA.training = True
for game_no in tqdm(range(100)):
    gs = GameState()
    play_single_game(gs, playerA, playerB)

100%|██████████| 100/100 [01:55<00:00,  1.16s/it]


In [381]:
playerA.training = False
evaluateAgents(playerA, playerB, n = 100, verbose = True)

100%|██████████| 100/100 [00:49<00:00,  2.02it/s]

*********************************************
* P1 Win: 82.0% | P2 Win: 18.0% | Draw: 0.0% *
*********************************************





(0.82, 0.18, 0.0)

In [383]:
Chinni = HumanAgent()
gs = GameState()
play_single_game(gs, playerA, Chinni)

Enter your move: p
Player: 1 1 Opponent: 2 1
Enter your move: arl
Enter your move: p
Player: 2 1 Opponent: 3 1
Enter your move: all
Enter your move: p
Player: ~ 1 Opponent: 4 1
Enter your move: alr


'opponent'

In [386]:
playerA.model.save("RLAgentBestModel.keras")

In [389]:
playerRL = RLAgent()
playerRL.model = keras.models.load_model("RLAgentBestModel.keras")

In [390]:
playerRL.training = False
evaluateAgents(playerRL, playerB, n = 100, verbose = True)

100%|██████████| 100/100 [01:00<00:00,  1.66it/s]

*********************************************
* P1 Win: 84.0% | P2 Win: 15.0% | Draw: 1.0% *
*********************************************





(0.84, 0.15, 0.01)