In [1]:
import re


from time import time
from random import shuffle
from typing import List
from collections import OrderedDict
from dataclasses import dataclass

In [2]:
# ver se o shuffle é bom

# Node and CFR definition

In [68]:
class Node:
    
    def __init__(self, info_set_id, possible_actions):
        self.info_set = info_set_id
        self.regret_sum = [0] * NUM_ACTIONS
        self.strategy = [0] * NUM_ACTIONS
        self.strategy_sum = [0] * NUM_ACTIONS
        self.possible_actions = possible_actions
        self.n_visits = 0
    
    
    def get_strategy(self, realization_weight):
        # compute strategy through regret-matching
        
        normalizing_sum = 0
        
        for a in self.possible_actions:
            self.strategy[a] = self.regret_sum[a] if self.regret_sum[a] > 0 else 0
            normalizing_sum += self.strategy[a]
    
        for a in self.possible_actions:
            if normalizing_sum > 0:
                self.strategy[a] /= normalizing_sum
            else:
                self.strategy[a] = 1/len(self.possible_actions)
                
            self.strategy_sum[a] += realization_weight*self.strategy[a]
            
        return self.strategy
    
    
    def get_average_strategy(self):
        avg_strategy = [0] * NUM_ACTIONS
        normalizing_sum = sum(self.strategy_sum)
            
        for a in self.possible_actions:
            if normalizing_sum > 0:
                avg_strategy[a] = self.strategy_sum[a]/normalizing_sum
            else:
                avg_strategy[a] = 1/len(self.possible_actions)
                
        return avg_strategy
    
    
    def __str__(self):
        # o Infomation Set é definido pela carta na mao do jogador e a sequencia de acoes
        return f'(V: {self.n_visits}) {self.info_set}: {self.get_average_strategy()}'

In [79]:
def get_string(action, set_name):
    string_action = {
        0: 'c',
        1: 'p',
        2: 'b'
    }
    
    if action < 3:
        return string_action[action]
    
    cards = set_name.split('#')[0]
    played_card = cards[0]
    return played_card


def get_infoset_name(history, cards):
    card_name = {
        10: 'Z',
        5: 'N',
        0: 'R'
    }
    
    player = len(history) % 2
    player_actions = history[player::2]

    
    players_cards = cards[player:4:2]
    players_cards = [card_name[card] for card in players_cards]
    players_cards = ''.join(players_cards)
    
    n_cards = len(re.findall(r'[ZNR]', player_actions))
    
    if n_cards < 2:
        return f"{players_cards[n_cards:]}#{history}"
    else:
        return f"#{history}"
    
def ChanceSampling_CFR(cards, history, p0, p1):
    
    n_plays = len(history)
    
    player = n_plays % 2
    opponent = 1 - player
    
    if game.is_terminal(history, cards):
        return game.payoff_value
    
    # get infoset node or create new
    info_set = get_infoset_name(history, cards)
    node = node_map.get(info_set)
    
    if node is None:
        node = Node(info_set, game.possible_actions(history, cards))
        node_map[info_set] = node
        
    node.n_visits += 1
        
    # recursively call CFR with +action +history for each action  
    player_probability = (p0 if player == 0 else p1)
    strategy = node.get_strategy(player_probability)
    
    util = [0] * NUM_ACTIONS
    node_util = 0
    
    possible_actions = node.possible_actions
    for action in possible_actions:
        next_history = history + get_string(action, info_set)
        
        util[action] = -ChanceSampling_CFR(cards, next_history, p0*strategy[action], p1)\
                       if player == 0 else \
                       -ChanceSampling_CFR(cards, next_history, p0, p1*strategy[action])
        
        node_util += strategy[action]*util[action]
        
    # accumulate counterfactual regret for each action
    for action in possible_actions:
        regret = util[action] - node_util
        node.regret_sum[action] += player_probability * regret
        
    return node_util

# Simplified Truco

* Quatro ações principais: `c, p, b, D` (call, pass, bet e draw card)
* `D` é jogar uma carta válida e será representada pela string que define a carta
* O baralho será composto por 5 cartas: 1 zap (`Z`), 2 cartas médias (`N`) e 2 cartas fracas (`R`)
* A força será : `Z` $>$ `N` $>$ `R`
* A força será representada pelos valores inteiros $10$, $5$ e $0$

In [65]:
class SimpleTruco():

    def __init__(self):
        self.card_name = {
            10: 'Z',
            5: 'N',
            0: 'R'
        }
        
        self.payoff_value = 0
        self.regex_pattern = re.compile("(((b(bb)?c)|(bb))?[ZNR]){4}")
        
    
    def is_terminal(self, history, cards):
        if history == '':
            return False
        
        player = len(history) % 2
        
        terminal_state = False
        player_ran = (history[-1] == 'p')
        times_raised = history.count('b')
         
        all_cards_played = self.regex_pattern.search(history)
        
        if player_ran:
            terminal_state = True
            self.payoff_value = 3*max(1, (times_raised-1))
        elif all_cards_played:
            terminal_state = True
            stake_points =  max(1, 3*times_raised)
            
            winner_player = self.get_winner_player(cards)
            
            if winner_player is None:
                reward = 0
            elif winner_player == player:
                reward = 1
            else:
                reward = -1
                
            self.payoff_value = stake_points*reward
            
        return terminal_state
    
    
    def get_winner_player(self, cards):
        player0_cards = cards[:3:2]
        player1_cards = cards[1:3:2]
        
        
        player0_max = max(player0_cards)
        player1_max = max(player1_cards)
        
        if player0_max > player1_max:
            winner = 0
        elif player0_max > player1_max:
            winner = 1
        else:
            winner = None
        
        return winner
    
    
    def possible_actions(self, history, cards):   
        '''
        call -> 0
        pass -> 1
        bet  -> 2
        play -> 3
        '''
        
        actions = []
        
        player = len(history) % 2
        player_cards = cards[player:4:2]
        player_actions = history[player::2]
        
        next_card = len(re.findall(r'[ZNR]', player_actions)) % 2
        
        raise_sequence = re.search(r'((?<=[pcZNR])(b+)$)|^b+$', history)
        times_raised = history.count('b')
        
        if raise_sequence:
            raise_sequence = raise_sequence.group()
            actions.append(1)
            
            if times_raised < 2:
                actions.append(2)
            
            if (len(raise_sequence) % 2):
                actions.append(0)
            else:
                card_string = self.card_name[player_cards[next_card]]
                actions.append(3)
        else:
            card_string = self.card_name[player_cards[next_card]]
            actions.append(3)
            
            player_lastbet = player_actions.rfind('b')
            
            if player_lastbet == -1:
                actions.append(2)
        
        return actions
    
# identificar se nao foram dois bets seguidos

In [80]:
# GLOBAIS
string_action = {
    0: 'c',
    1: 'p',
    2: 'b',
    3: 'D'
}
      
NUM_ACTIONS = 4
node_map = OrderedDict()
game = SimpleTruco()

def train(iterations): 
    cards = [10, 5, 5, 0, 0] 
    util = 0
    
    start_time = time()
    
    for i in range(iterations):
        shuffle(cards)
        util += ChanceSampling_CFR(cards, "", 1, 1)
        
    end_time = time()
    
    print(f'Elapsed time: {end_time - start_time:.2f}s')
    print(f'Avg game value: {util/iterations}')
    
    for node in node_map.values():
        print(node)

In [83]:
train(1.000.000)

Elapsed time: 211.28s
Avg game value: 1.962803981364245
(V: 220429) RN#: [0, 0, 0.9999977316959202, 2.2683040797717178e-06]
(V: 73221) RN#R: [0, 0, 0.9999833971586202, 1.6602841379745414e-05]
(V: 73377) N#RR: [0, 0, 0.5, 0.5]
(V: 36591) N#RRN: [0, 0, 0.20564678960829186, 0.7943532103917081]
(V: 73377) #RRNb: [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0]
(V: 36591) N#RRNbb: [0, 0.5, 0, 0.5]
(V: 36591) N#RRNbc: [0, 0, 0, 1.0]
(V: 73221) N#RRb: [0.13709785973886124, 0.28359722035267193, 0.5793049199084669, 0]
(V: 73377) N#RRbb: [0, 0.5, 0, 0.5]
(V: 36591) N#RRbbN: [0, 0, 0, 1.0]
(V: 73377) N#RRbc: [0, 0, 0, 1.0]
(V: 36591) N#RRbcN: [0, 0, 0.5, 0.5]
(V: 73377) #RRbcNb: [0.5, 0.5, 0, 0]
(V: 36591) N#RRbcNbc: [0, 0, 0, 1.0]
(V: 220429) N#Rb: [0.3333333333333333, 0.3333333333333333, 0.3333333333333333, 0]
(V: 73221) RN#Rbb: [0, 0.9999965856222836, 0, 3.414377716491985e-06]
(V: 73377) N#RbbR: [0, 0, 0, 1.0]
(V: 36591) N#RbbRN: [0, 0, 0, 1.0]
(V: 73221) RN#Rbc: [0, 0, 0, 1.0]


## Total de estados

* Considerando um jogador com cartas na mão, existem os possíveis cenários:
    
    1. D (adversário jogou uma carta)
    2. b (adversário pediu 'truco')
    3. bc (jogador atual pediu 'truco' e o adversário aceitou)
    4. Dbc (adversário jogou uma carta, jogador pediu truco e adversário aceitou)
    5. bcD (adversário pediu truco, jogador aceitou e adversário jogou a carta)
    
* Considerando que o baralho possui 5 cartas, 3 tipos diferentes, o total de combinações possíveis para a mão de um jogador é:

$$\frac{P(5,2)}{2! \cdot 2!} = \frac{5\cdot 4}{2 \cdot 2} = 5$$

In [13]:
regex_pattern = "((bc)?[ZNR]){4}(bc)?"
x = re.search(regex_pattern, 'bcZNNbcR')

In [14]:
x.group()

'bcZNNbcR'

In [32]:
game_testing = SimpleTruco()
cards = [10, 5, 5, 0, 0]
shuffle(cards)
history = ''


while not game_testing.is_terminal(history, cards):
    acoes_possiveis = game_testing.possible_actions(history, cards)
    acao = '-'
    
    while acao not in acoes_possiveis:
        print(f'Escolha acao: {acoes_possiveis}')
        acao = int(input('-> '))
    
    history += acao
    print(f'Mesa: {history}')
    
print(game_testing.get_winner_player(cards))

Escolha acao: [3, 2]
-> 3


TypeError: can only concatenate str (not "int") to str