## CFR

In [1]:
import numpy as np

In [2]:
class Node:
    TERMINALES = ['pp', 'bp', 'bb', 'pbb', 'pbp']
    
    def __init__(self, history, cards) -> None:
        self.history = history
        self.player = len(history) % 2
        self.cards = cards
        self.actions = ['p', 'b']
        self.is_terminal = history in self.TERMINALES
        self.payments = 0 if not self.is_terminal else self._payments()
        self.strategy = np.repeat(1/len(self.actions), len(self.actions))
        self.regrets = np.zeros(2)  # Dos acciones posibles: pasar y apostar
        self.strategy_sum = np.zeros(2)
        
    def _payments(self) -> float:
        # Ajuste de la lógica de pagos según las reglas del juego
        payment = 4 if self.history.count('b') == 2 else 2
        # Lógica para determinar el ganador
        if self.history in ['pp', 'bb', 'pbb']:
            return payment if self.cards[self.player] > self.cards[(self.player+1)%2] else -payment
        return payment
    
    def get_current_strategy(self):
        regrets_positivos = [r if r >= 0 else 0 for r in self.regrets ]  # Solo consideramos regrets positivos
        total_positivo = np.sum(regrets_positivos)
        if total_positivo > 0:
            strategy = regrets_positivos / total_positivo
        else:
            strategy = np.ones(len(self.actions)) / len(self.actions)
        return strategy

In [3]:
infoset_nodes = {}


def get_node(history, cards):
    global infoset_nodes
    key = f"{cards[len(history) % 2]} {history}"
    if key not in infoset_nodes:
        infoset_nodes[key] = Node(history, cards)
    return infoset_nodes[key]

def cfr(history, update_p, cards, p1, p2):
    node = get_node(history, cards)
    if node.is_terminal:
        return node.payments
    strategy = node.get_current_strategy()
    actions_utils = np.zeros(len(node.actions))
    node_util = 0
    
    for i, action in enumerate(node.actions):
        new_history = history + action
        if node.player == 0:
            actions_utils[i] = -cfr(new_history, update_p, cards, p1 * strategy[i], p2)
        else:
            actions_utils[i] = -cfr(new_history, update_p,  cards, p1, p2 * strategy[i])
        node_util += strategy[i] * actions_utils[i]
    
    if update_p == node.player:
        for i, action in enumerate(node.actions):
            regret = actions_utils[i] - node_util
            node.regrets[i] += (p2 if node.player == 0 else p1) * regret
            node.strategy_sum[i] += (p1 if node.player == 0 else p2)*strategy[i]
    return node_util

In [4]:
utilidades = []
HANDS = ['JQ', 'JK', 'QJ', 'KJ', 'QK', 'KQ']
iteraciones = 10_000

for p in [0,1]:
    for _ in range(iteraciones):
        for cards in HANDS:
            utilidades.append(cfr('',p, cards, 1/6, 1/6))

# Imprimir resultados y estrategias de los nodos no terminales
for key, node in infoset_nodes.items():
    if not node.is_terminal:
        print(f"{key}: Estrategia acumulada: {node.strategy_sum}, Estrategia actual: {node.get_current_strategy()}\n")

J : Estrategia acumulada: [8.33333333e-02 3.33325000e+03], Estrategia actual: [0. 1.]

Q p: Estrategia acumulada: [1666.66666667 1666.66666667], Estrategia actual: [0.5 0.5]

J pb: Estrategia acumulada: [0.04166667 0.04166667], Estrategia actual: [1. 0.]

Q b: Estrategia acumulada: [8.33333333e-02 3.33325000e+03], Estrategia actual: [0. 1.]

K p: Estrategia acumulada: [1666.66666667 1666.66666667], Estrategia actual: [0.5 0.5]

K b: Estrategia acumulada: [8.33333333e-02 3.33325000e+03], Estrategia actual: [0. 1.]

Q : Estrategia acumulada: [8.33333333e-02 3.33325000e+03], Estrategia actual: [0. 1.]

J p: Estrategia acumulada: [1666.66666667 1666.66666667], Estrategia actual: [0.5 0.5]

Q pb: Estrategia acumulada: [0.04166667 0.04166667], Estrategia actual: [0. 1.]

J b: Estrategia acumulada: [3.33325000e+03 8.33333333e-02], Estrategia actual: [1. 0.]

K : Estrategia acumulada: [8.33333333e-02 3.33325000e+03], Estrategia actual: [0. 1.]

K pb: Estrategia acumulada: [0.04166667 0.0416666