In [1]:
import numpy as np
from numpy.random import choice


class RPSTrainer:
    def __init__(self):

        self.NUM_ACTIONS = 3
        self.possible_actions = np.arange(self.NUM_ACTIONS)
        # Order left to right, and up to down is Rock, Paper, Scissors
        self.actionUtility = np.array([
                    [0, -1, 1],
                    [1, 0, -1],
                    [-1, 1, 0]
                ])
        self.regret_sum = np.zeros(self.NUM_ACTIONS)
        self.strategy_sum = np.zeros(self.NUM_ACTIONS)

        self.opponent_regret_sum = np.zeros(self.NUM_ACTIONS)
        self.opponent_strategy_sum = np.zeros(self.NUM_ACTIONS)

    def get_strategy(self, regret_sum):
        new_sum = np.clip(regret_sum, a_min=0, a_max=None)
        normalizing_sum = np.sum(new_sum)
        if normalizing_sum > 0:
            new_sum /= normalizing_sum
        else:
            new_sum = np.repeat(1/self.NUM_ACTIONS, self.NUM_ACTIONS)
        return new_sum

    def get_average_strategy(self, strategy_sum):
        average_strategy = [0, 0, 0]
        normalizing_sum = sum(strategy_sum)
        for a in range(self.NUM_ACTIONS):
            if normalizing_sum > 0:
                average_strategy[a] = strategy_sum[a] / normalizing_sum
            else:
                average_strategy[a] = 1.0 / self.NUM_ACTIONS
        return average_strategy

    def get_action(self, strategy):
        return choice(self.possible_actions, p=strategy)

    def get_reward(self, my_action, opponent_action):
        return self.actionUtility[my_action, opponent_action]

    def train(self, iterations):

        for i in range(iterations):
            strategy = self.get_strategy(self.regret_sum)
            opp_strategy = self.get_strategy(self.opponent_regret_sum)
            self.strategy_sum += strategy
            self.opponent_strategy_sum += opp_strategy

            opponent_action = self.get_action(opp_strategy)
            my_action = self.get_action(strategy)

            my_reward = self.get_reward(my_action, opponent_action)
            opp_reward = self.get_reward(opponent_action, my_action)

            for a in range(self.NUM_ACTIONS):
                my_regret = self.get_reward(a, opponent_action) - my_reward
                opp_regret = self.get_reward(a, my_action) - opp_reward
                self.regret_sum[a] += my_regret
                self.opponent_regret_sum[a] += opp_regret


def main():
    trainer = RPSTrainer()
    trainer.train(100000)
    target_policy = trainer.get_average_strategy(trainer.strategy_sum)
    opp_target_policy = trainer.get_average_strategy(trainer.opponent_strategy_sum)
    print('player 1 policy: %s' % target_policy)
    print('player 2 policy: %s' % opp_target_policy)


if __name__ == "__main__":
    main()

player 1 policy: [0.3329716911964922, 0.33477133113423474, 0.3322569776692732]
player 2 policy: [0.33262953143817625, 0.3348624514209909, 0.33250801714083283]


In [8]:
import numpy as np
from numpy.random import choice


class FCTrainer:
    def __init__(self):

        self.NUM_ACTIONS = 2
        self.possible_actions = np.arange(self.NUM_ACTIONS)
        # Order left to right, and up to down is Heads and Tails
        self.actionUtility = np.array([
                    [1, -1,],
                    [1, -1,]
                ])
        self.regret_sum = np.zeros(self.NUM_ACTIONS)
        self.strategy_sum = np.zeros(self.NUM_ACTIONS)

        self.opponent_regret_sum = np.zeros(self.NUM_ACTIONS)
        self.opponent_strategy_sum = np.zeros(self.NUM_ACTIONS)

    def get_strategy(self, regret_sum):
        new_sum = np.clip(regret_sum, a_min=0, a_max=None)
        normalizing_sum = np.sum(new_sum)
        if normalizing_sum > 0:
            new_sum /= normalizing_sum
        else:
            new_sum = np.repeat(1/self.NUM_ACTIONS, self.NUM_ACTIONS)
        return new_sum

    def get_average_strategy(self, strategy_sum):
        average_strategy = [0, 0]
        normalizing_sum = sum(strategy_sum)
        for a in range(self.NUM_ACTIONS):
            if normalizing_sum > 0:
                average_strategy[a] = strategy_sum[a] / normalizing_sum
            else:
                average_strategy[a] = 1.0 / self.NUM_ACTIONS
        return average_strategy
    
    def get_action(self, strategy):
        return choice(self.possible_actions, p=strategy)

    def get_reward(self, my_action, opponent_action):
        return self.actionUtility[my_action, opponent_action]

    def train(self, iterations):

        for i in range(iterations):
            strategy = self.get_strategy(self.regret_sum)
            opp_strategy = self.get_strategy(self.opponent_regret_sum)
            self.strategy_sum += strategy
            self.opponent_strategy_sum += opp_strategy

            opponent_action = self.get_action(opp_strategy)
            my_action = self.get_action(strategy)

            my_reward = self.get_reward(my_action, opponent_action)
            opp_reward = self.get_reward(opponent_action, my_action)

            for a in range(self.NUM_ACTIONS):
                my_regret = self.get_reward(a, opponent_action) - my_reward
                opp_regret = self.get_reward(a, my_action) - opp_reward
                self.regret_sum[a] += my_regret
                self.opponent_regret_sum[a] += opp_regret


def main():
    trainer = FCTrainer()
    trainer.train(1000)
    target_policy = trainer.get_average_strategy(trainer.strategy_sum)
    opp_target_policy = trainer.get_average_strategy(trainer.opponent_strategy_sum)
    print('player 1 policy: %s' % target_policy)
    print('player 2 policy: %s' % opp_target_policy)


if __name__ == "__main__":
    main()

player 1 policy: [0.5, 0.5]
player 2 policy: [0.5, 0.5]


In [11]:
class RPSLSTrainer:
    def __init__(self):

        self.NUM_ACTIONS = 5
        self.possible_actions = np.arange(self.NUM_ACTIONS)
        # Order left to right, and up to down is Rock, Paper, Scissors, Lizard, Spock
        self.actionUtility = np.array([
                    [0, -1, 1, 1, -1],
                    [1, 0, -1, -1, 1],
                    [-1, 1, 0, 1, -1],
                    [-1, 1, -1, 0, 1],
                    [1, -1, 1, -1, 0]
                ])
        self.regret_sum = np.zeros(self.NUM_ACTIONS)
        self.strategy_sum = np.zeros(self.NUM_ACTIONS)

        self.opponent_regret_sum = np.zeros(self.NUM_ACTIONS)
        self.opponent_strategy_sum = np.zeros(self.NUM_ACTIONS)

    def get_strategy(self, regret_sum):
        new_sum = np.clip(regret_sum, a_min=0, a_max=None)
        normalizing_sum = np.sum(new_sum)
        if normalizing_sum > 0:
            new_sum /= normalizing_sum
        else:
            new_sum = np.repeat(1/self.NUM_ACTIONS, self.NUM_ACTIONS)
        return new_sum

    def get_average_strategy(self, strategy_sum):
        average_strategy = [0, 0, 0, 0, 0]
        normalizing_sum = sum(strategy_sum)
        for a in range(self.NUM_ACTIONS):
            if normalizing_sum > 0:
                average_strategy[a] = strategy_sum[a] / normalizing_sum
            else:
                average_strategy[a] = 1.0 / self.NUM_ACTIONS
        return average_strategy
    
    def get_action(self, strategy):
        return choice(self.possible_actions, p=strategy)

    def get_reward(self, my_action, opponent_action):
        return self.actionUtility[my_action, opponent_action]

    def train(self, iterations):

        for i in range(iterations):
            strategy = self.get_strategy(self.regret_sum)
            opp_strategy = self.get_strategy(self.opponent_regret_sum)
            self.strategy_sum += strategy
            self.opponent_strategy_sum += opp_strategy

            opponent_action = self.get_action(opp_strategy)
            my_action = self.get_action(strategy)

            my_reward = self.get_reward(my_action, opponent_action)
            opp_reward = self.get_reward(opponent_action, my_action)

            for a in range(self.NUM_ACTIONS):
                my_regret = self.get_reward(a, opponent_action) - my_reward
                opp_regret = self.get_reward(a, my_action) - opp_reward
                self.regret_sum[a] += my_regret
                self.opponent_regret_sum[a] += opp_regret


def main():
    trainer = RPSLSTrainer()
    trainer.train(100000)
    target_policy = trainer.get_average_strategy(trainer.strategy_sum)
    opp_target_policy = trainer.get_average_strategy(trainer.opponent_strategy_sum)
    print('player 1 policy: %s' % target_policy)
    print('player 2 policy: %s' % opp_target_policy)


if __name__ == "__main__":
    main()

player 1 policy: [0.19996059189375384, 0.2010574434610152, 0.19948202741812673, 0.2002652507377983, 0.19923468648930587]
player 2 policy: [0.20383175103564422, 0.2047558728576919, 0.19951046831117386, 0.19459898473769208, 0.19730292305779792]


In [26]:
class RPSLSTrainer:
    def __init__(self):
        rf= 0.00000154
        sfl= 0.000039
        kf= 0.00024
        fh= 0.001441
        fl= 0.001965
        st= 0.003925
        k3= 0.021128
        p2= 0.047539
        k2= 0.422569
        hc= 0.501177
        self.NUM_ACTIONS = 10
        self.possible_actions = np.arange(self.NUM_ACTIONS)
        # Order left to right, and up to down is 
        self.actionUtility = np.array([
                    [0   , -1*k2,  -1*p2, -1*k3, -1*st, -1*fl, -1*fh, -1, -1, -1],
                    [1*k2,    0,   -1*k2*p2, -1*k2*k3, -1*k2*st,-1*k2*fl, -1*k2*fh, -1*k2, -1*k2, -1*k2],
                    [1*p2, 1*p2*k2,    0, -1*p2*k3, -1*p2*st, -1*p2*fl, -1*p2*fh, -1*p2, -1*p2, -1*p2],
                    [1*k3, 1*k3*k2, 1*k3*p2, 0, -1*k3*st, -1*k3*fl, -1*k3*fh, -1*k3, -1*k3, -1*k3],
                    [1*st, 1*st*k2, 1*st*p2, 1*st*k3, 0, -1*st*fl, -1*st*fh, -1*st, -1*st, -1*st],
                    [1*fl, 1*fl*k2, 1*fl*p2, 1*fl*k3, 1*fl*st, 0, -1*fl*fh, -1*fl, -1*fl, -1*fl],
                    [1*fh, 1*fh*k2, 1*fh*p2, 1*fh*k3, 1*fh*st, 1*fh*fl, 0, -1*fh, -1*fh, -1*fh],
                    [1*kf, 1*kf*k2, 1*kf*p2, 1*kf*k3, 1*kf*st, 1*kf*fl, 1*kf*fh, 0, -1*kf, -1*kf],
                    [1*sfl, 1*sfl*k2, 1*sfl*p2, 1*sfl*k3, 1*sfl*st, 1*sfl*fl, 1*sfl*fh, 1*sfl, 0, -1*sfl],
                    [1*rf, 1*rf*k2, 1*rf*p2, 1*rf*k3, 1*rf*st, 1*rf*fl, 1*rf, 1*rf*fh, 1*rf, 0]
                ])
        self.regret_sum = np.zeros(self.NUM_ACTIONS)
        self.strategy_sum = np.zeros(self.NUM_ACTIONS)

        self.opponent_regret_sum = np.zeros(self.NUM_ACTIONS)
        self.opponent_strategy_sum = np.zeros(self.NUM_ACTIONS)

    def get_strategy(self, regret_sum):
        new_sum = np.clip(regret_sum, a_min=0, a_max=None)
        normalizing_sum = np.sum(new_sum)
        if normalizing_sum > 0:
            new_sum /= normalizing_sum
        else:
            new_sum = np.repeat(1/self.NUM_ACTIONS, self.NUM_ACTIONS)
        return new_sum

    def get_average_strategy(self, strategy_sum):
        average_strategy = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        normalizing_sum = sum(strategy_sum)
        for a in range(self.NUM_ACTIONS):
            if normalizing_sum > 0:
                average_strategy[a] = strategy_sum[a] / normalizing_sum
            else:
                average_strategy[a] = 1.0 / self.NUM_ACTIONS
        return average_strategy
    
    def get_action(self, strategy):
        return choice(self.possible_actions, p=strategy)

    def get_reward(self, my_action, opponent_action):
        return self.actionUtility[my_action, opponent_action]

    def train(self, iterations):

        for i in range(iterations):
            strategy = self.get_strategy(self.regret_sum)
            opp_strategy = self.get_strategy(self.opponent_regret_sum)
            self.strategy_sum += strategy
            self.opponent_strategy_sum += opp_strategy

            opponent_action = self.get_action(opp_strategy)
            my_action = self.get_action(strategy)

            my_reward = self.get_reward(my_action, opponent_action)
            opp_reward = self.get_reward(opponent_action, my_action)

            for a in range(self.NUM_ACTIONS):
                my_regret = self.get_reward(a, opponent_action) - my_reward
                opp_regret = self.get_reward(a, my_action) - opp_reward
                self.regret_sum[a] += my_regret
                self.opponent_regret_sum[a] += opp_regret


def main():
    trainer = RPSLSTrainer()
    trainer.train(100)
    target_policy = trainer.get_average_strategy(trainer.strategy_sum)
    opp_target_policy = trainer.get_average_strategy(trainer.opponent_strategy_sum)
    print('player 1 policy: %s' % target_policy)
    print('player 2 policy: %s' % opp_target_policy)


if __name__ == "__main__":
    main()

player 1 policy: [0.001, 0.001, 0.001, 0.001, 0.01296925250032232, 0.006986029274360108, 0.0031003332810680927, 0.031906818528839725, 0.4526331945623697, 0.48840437185304003]
player 2 policy: [0.001, 0.001, 0.001, 0.001, 0.0063488264862158516, 0.012568059386965185, 0.03316730038226133, 0.2506172200525107, 0.34515665202913426, 0.3481419416629127]


In [1]:
import numpy as np
from random import shuffle
import time
import sys


class PokerB:

    def __init__(self):
        self.nodeMap = {}
        self.expected_game_value = 0
        self.n_cards = 10
        self.nash_equilibrium = dict()
        self.current_player = 0
        self.deck = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        self.n_actions = 2
    
    def randomHand(self):
        for i in range(1,2):
            r = np.random(2598960)
            if r<1302540:
                  self.deck[i]=0
            if 1302540<r<1302540+1098240:
                self.deck[i]=1
            if 1302540+1098240<r<1302540+1098240+123552:
                self.deck[i]=2
            if 1302540+1098240+123552+54912<r<1302540+1098240+123552+54912+10200:
                self.deck[i]=3
            if 1302540+1098240+123552+54912+10200<r<1302540+1098240+123552+54912+10200+5108:
                self.deck[i]=4
            if 1302540+1098240+123552+54912+10200+5108<r<1302540+1098240+123552+54912+10200+5108+3744:
                self.deck[i]=5
            if 1302540+1098240+123552+54912+10200+5108+3744<r<1302540+1098240+123552+54912+10200+5108+3744+624:
                self.deck[i]=6
            if 1302540+1098240+123552+54912+10200+5108+3744+624<r<1302540+1098240+123552+54912+10200+5108+3744+624+36:
                self.deck[i]=7
            if 1302540+1098240+123552+54912+10200+5108+3744+624+36<r<1302540+1098240+123552+54912+10200+5108+3744+624+36+4:
                self.deck[i]=8
            if r>1302540+1098240+123552+54912+10200+5108+3744+624+36+4:
                self.deck[i]=9
        
        
    def train(self, n_iterations=50000):
        expected_game_value = 0
        for _ in range(n_iterations):
            shuffle(self.deck)
            expected_game_value += self.cfr('', 1, 1)
            for _, v in self.nodeMap.items():
                v.update_strategy()

        expected_game_value /= n_iterations
        display_results(expected_game_value, self.nodeMap)

    def cfr(self, history, pr_1, pr_2):
        n = len(history)
        is_player_1 = n % 2 == 0
        player_card = self.deck[0] if is_player_1 else self.deck[1]  

        if self.is_terminal(history):
            card_player = self.deck[0] if is_player_1 else self.deck[1]
            card_opponent = self.deck[1] if is_player_1 else self.deck[0]
            reward = self.get_reward(history, card_player, card_opponent)
            return reward

        node = self.get_node(player_card, history)
        strategy = node.strategy

        # Counterfactual utility per action.
        action_utils = np.zeros(self.n_actions)

        for act in range(self.n_actions):
            next_history = history + node.action_dict[act]
            if is_player_1:
                action_utils[act] = -1 * self.cfr(next_history, pr_1 * strategy[act], pr_2)
            else:
                action_utils[act] = -1 * self.cfr(next_history, pr_1, pr_2 * strategy[act])

        # Utility of information set.
        util = sum(action_utils * strategy)
        regrets = action_utils - util
        if is_player_1:
            node.reach_pr += pr_1
            node.regret_sum += pr_2 * regrets
        else:
            node.reach_pr += pr_2
            node.regret_sum += pr_1 * regrets

        return util

    @staticmethod
    def is_terminal(history):
        if history[-2:] == 'pp' or history[-2:] == "bb" or history[-2:] == 'bp':
            return True

    @staticmethod
    def get_reward(history, player_card, opponent_card):
        terminal_pass = history[-1] == 'p'
        double_bet = history[-2:] == "bb"
        if terminal_pass:
            if history[-2:] == 'pp':
                if player_card > opponent_card:
                    return 1
                if player_card==opponent_card:
                    return (0)
                else :
                    return (-1)
            else:
                return 1
        elif double_bet:
            if player_card > opponent_card:
                return 2
            if player_card==opponent_card:
                return (0)
            else :
                return (-2)


    def get_node(self, card, history):
        key = str(card) + " " + history
        if key not in self.nodeMap:
            action_dict = {0: 'p', 1: 'b'}
            info_set = Node(key, action_dict)
            self.nodeMap[key] = info_set
            return info_set
        return self.nodeMap[key]


class Node:
    def __init__(self, key, action_dict, n_actions=2):
        self.key = key
        self.n_actions = n_actions
        self.regret_sum = np.zeros(self.n_actions)
        self.strategy_sum = np.zeros(self.n_actions)
        self.action_dict = action_dict
        self.strategy = np.repeat(1/self.n_actions, self.n_actions)
        self.reach_pr = 0
        self.reach_pr_sum = 0

    def update_strategy(self):
        self.strategy_sum += self.reach_pr * self.strategy
        self.reach_pr_sum += self.reach_pr
        self.strategy = self.get_strategy()
        self.reach_pr = 0

    def get_strategy(self):
        regrets = self.regret_sum
        regrets[regrets < 0] = 0
        normalizing_sum = sum(regrets)
        if normalizing_sum > 0:
            return regrets / normalizing_sum
        else:
            return np.repeat(1/self.n_actions, self.n_actions)

    def get_average_strategy(self):
        strategy = self.strategy_sum / self.reach_pr_sum
        # Re-normalize
        total = sum(strategy)
        strategy /= total
        return strategy

    def __str__(self):
        strategies = ['{:03.2f}'.format(x)
                      for x in self.get_average_strategy()]
        return '{} {}'.format(self.key.ljust(6), strategies)


def display_results(ev, i_map):
    print('player 1 expected value: {}'.format(ev))
    print('player 2 expected value: {}'.format(-1 * ev))

    print()
    print('player 1 strategies:')
    sorted_items = sorted(i_map.items(), key=lambda x: x[0])
    for _, v in filter(lambda x: len(x[0]) % 2 == 0, sorted_items):
        print(v)
    print()
    print('player 2 strategies:')
    for _, v in filter(lambda x: len(x[0]) % 2 == 1, sorted_items):
        print(v)


if __name__ == "__main__":
    time1 = time.time()
    trainer = PokerB()
    trainer.train(n_iterations=25000)
    print(abs(time1 - time.time()))
    print(sys.getsizeof(trainer))

player 1 expected value: -0.0668603768627914
player 2 expected value: 0.0668603768627914

player 1 strategies:
0      ['0.50', '0.50']
0 pb   ['1.00', '0.00']
1      ['0.62', '0.38']
1 pb   ['0.92', '0.08']
2      ['0.85', '0.15']
2 pb   ['0.69', '0.31']
3      ['0.95', '0.05']
3 pb   ['0.54', '0.46']
4      ['0.92', '0.08']
4 pb   ['0.39', '0.61']
5      ['0.57', '0.43']
5 pb   ['0.14', '0.86']
6      ['0.25', '0.75']
6 pb   ['0.04', '0.96']
7      ['0.36', '0.64']
7 pb   ['0.02', '0.98']
8      ['0.33', '0.67']
8 pb   ['0.01', '0.99']
9      ['0.37', '0.63']
9 pb   ['0.00', '1.00']

player 2 strategies:
0 b    ['1.00', '0.00']
0 p    ['0.08', '0.92']
1 b    ['0.95', '0.05']
1 p    ['0.55', '0.45']
2 b    ['0.78', '0.22']
2 p    ['0.84', '0.16']
3 b    ['0.67', '0.33']
3 p    ['0.95', '0.05']
4 b    ['0.23', '0.77']
4 p    ['0.93', '0.07']
5 b    ['0.13', '0.87']
5 p    ['0.39', '0.61']
6 b    ['0.03', '0.97']
6 p    ['0.07', '0.93']
7 b    ['0.02', '0.98']
7 p    ['0.04', '0.96']
8 b

In [None]:
import numpy as np
from random import shuffle
import time
import sys


class Kunh:

    def __init__(self):
        self.nodeMap = {}
        self.expected_game_value = 0
        self.n_cards = 10
        self.nash_equilibrium = dict()
        self.current_player = 0
        self.deck = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
        self.n_actions = 2
    
    def randomHand(self):
        for i in range(1,2):
            r = np.random(2598960)
            if r<1302540:
                  self.deck[i]=0
            if 1302540<r<1302540+1098240:
                self.deck[i]=1
            if 1302540+1098240<r<1302540+1098240+123552:
                self.deck[i]=2
            if 1302540+1098240+123552+54912<r<1302540+1098240+123552+54912+10200:
                self.deck[i]=3
            if 1302540+1098240+123552+54912+10200<r<1302540+1098240+123552+54912+10200+5108:
                self.deck[i]=4
            if 1302540+1098240+123552+54912+10200+5108<r<1302540+1098240+123552+54912+10200+5108+3744:
                self.deck[i]=5
            if 1302540+1098240+123552+54912+10200+5108+3744<r<1302540+1098240+123552+54912+10200+5108+3744+624:
                self.deck[i]=6
            if 1302540+1098240+123552+54912+10200+5108+3744+624<r<1302540+1098240+123552+54912+10200+5108+3744+624+36:
                self.deck[i]=7
            if 1302540+1098240+123552+54912+10200+5108+3744+624+36<r<1302540+1098240+123552+54912+10200+5108+3744+624+36+4:
                self.deck[i]=8
            if r>1302540+1098240+123552+54912+10200+5108+3744+624+36+4:
                self.deck[i]=9
        
        
    def train(self, n_iterations=50000):
        expected_game_value = 0
        for _ in range(n_iterations):
            shuffle(self.deck)
            expected_game_value += self.cfr('', 1, 1)
            for _, v in self.nodeMap.items():
                v.update_strategy()

        expected_game_value /= n_iterations
        display_results(expected_game_value, self.nodeMap)

    def cfr(self, history, pr_1, pr_2):
        n = len(history)
        is_player_1 = n % 2 == 0
        player_card = self.deck[0] if is_player_1 else self.deck[1]  

        if self.is_terminal(history):
            card_player = self.deck[0] if is_player_1 else self.deck[1]
            card_opponent = self.deck[1] if is_player_1 else self.deck[0]
            reward = self.get_reward(history, card_player, card_opponent)
            return reward

        node = self.get_node(player_card, history)
        strategy = node.strategy

        # Counterfactual utility per action.
        action_utils = np.zeros(self.n_actions)

        for act in range(self.n_actions):
            next_history = history + node.action_dict[act]
            if is_player_1:
                action_utils[act] = -1 * self.cfr(next_history, pr_1 * strategy[act], pr_2)
            else:
                action_utils[act] = -1 * self.cfr(next_history, pr_1, pr_2 * strategy[act])

        # Utility of information set.
        util = sum(action_utils * strategy)
        regrets = action_utils - util
        if is_player_1:
            node.reach_pr += pr_1
            node.regret_sum += pr_2 * regrets
        else:
            node.reach_pr += pr_2
            node.regret_sum += pr_1 * regrets

        return util

    @staticmethod
    def is_terminal(history):
        if history[-2:] == 'pp' or history[-2:] == "bb" or history[-2:] == 'bp':
            return True
    
    def suitR(a):
        if a==s:
            return(4)
        if a==h:
            return(3)
        if a==d:
            return(2)
        if a==c:
            return(1)
    
    @staticmethod
    def Handchecker(a,b):
        a=sorted(a,key=lambda l:l[1], reverse=True)
        k=0
        s=0
        for i in range(0,4):
            if a[i,0]==14-i:
                k=k+1
        if k==5:
            for i in range(0,3):
                if a[i,1]==a[i+1,1]:
                    s=s+1  
        if s==3:
            return([10,suitR(a[0,1]),0,0,0,0])
        
        k=0
        s=0
        for i in range(0,3):
            if a[i,0]==a[i+1]-1:
                k=k+1
        if k==5:
            for i in range(0,3):
                if a[i,1]==a[i+1,1]:
                    s=s+1 
        if s==3:
            return([9,suitR(a[0,1]),0,0,0,0])
        
        k=0
        s=0
        if i in range(0,4):
            if a[i,0]==a[i+1,0]:
                k=k+1
            else:
                s=i
        if k==3:
            return([8,0,0,0,a[s,0],suitR(a[0,1])])
        
        k=0
        s=0
        if i in range(0,4):
            for j in range(0,4):
                k=0
                if a[i,0]==a[j,0]:
                    k=k+1
                if k==3:
                    s=1
        if s=1:
            
        
        
    
    @staticmethod
    def get_reward(history, player_card, opponent_card):
        terminal_pass = history[-1] == 'p'
        double_bet = history[-2:] == "bb"
        if terminal_pass:
            if history[-2:] == 'pp':
                if player_card > opponent_card:
                    return 1
                if player_card==opponent_card:
                    return (0)
                else :
                    return (-1)
            else:
                return 1
        elif double_bet:
            if player_card > opponent_card:
                return 2
            if player_card==opponent_card:
                return (0)
            else :
                return (-2)


    def get_node(self, card, history):
        key = str(card) + " " + history
        if key not in self.nodeMap:
            action_dict = {0: 'p', 1: 'b'}
            info_set = Node(key, action_dict)
            self.nodeMap[key] = info_set
            return info_set
        return self.nodeMap[key]


class Node:
    def __init__(self, key, action_dict, n_actions=2):
        self.key = key
        self.n_actions = n_actions
        self.regret_sum = np.zeros(self.n_actions)
        self.strategy_sum = np.zeros(self.n_actions)
        self.action_dict = action_dict
        self.strategy = np.repeat(1/self.n_actions, self.n_actions)
        self.reach_pr = 0
        self.reach_pr_sum = 0

    def update_strategy(self):
        self.strategy_sum += self.reach_pr * self.strategy
        self.reach_pr_sum += self.reach_pr
        self.strategy = self.get_strategy()
        self.reach_pr = 0

    def get_strategy(self):
        regrets = self.regret_sum
        regrets[regrets < 0] = 0
        normalizing_sum = sum(regrets)
        if normalizing_sum > 0:
            return regrets / normalizing_sum
        else:
            return np.repeat(1/self.n_actions, self.n_actions)

    def get_average_strategy(self):
        strategy = self.strategy_sum / self.reach_pr_sum
        # Re-normalize
        total = sum(strategy)
        strategy /= total
        return strategy

    def __str__(self):
        strategies = ['{:03.2f}'.format(x)
                      for x in self.get_average_strategy()]
        return '{} {}'.format(self.key.ljust(6), strategies)


def display_results(ev, i_map):
    print('player 1 expected value: {}'.format(ev))
    print('player 2 expected value: {}'.format(-1 * ev))

    print()
    print('player 1 strategies:')
    sorted_items = sorted(i_map.items(), key=lambda x: x[0])
    for _, v in filter(lambda x: len(x[0]) % 2 == 0, sorted_items):
        print(v)
    print()
    print('player 2 strategies:')
    for _, v in filter(lambda x: len(x[0]) % 2 == 1, sorted_items):
        print(v)


if __name__ == "__main__":
    time1 = time.time()
    trainer = Kunh()
    trainer.train(n_iterations=25000)
    print(abs(time1 - time.time()))
    print(sys.getsizeof(trainer))