# Imports

In [None]:
from collections import Counter
from dataclasses import dataclass
import numpy as np
import random

# Card and Deck Environment

In [None]:
@dataclass
# Card class consists of rank and suit
class Card:
  rank: str
  suit: str

  # Represent card as one char for rank and one char for suit
  def __repr__(self):
    return self.rank + self.suit

  # Hash for each state representation
  def __hash__(self):
    return hash((self.rank, self.suit))

# Deck class consists of cards and card functions
class Deck:
  def __init__(self):
    ranks = "23456789TJQKA" # Numbers, Ten, Jack, Queen, King, Ace
    suits = "SHDC" # Spades, Hearts, Diamonds, Clubs

    # Generate one card of each rank/suit pair
    self.cards = [Card(rank, suit) for suit in suits for rank in ranks]
    self.shuffle()

    # If number of cards isn't 52, something is wrong
    assert len(self.cards) == 52

  # Shuffle deck of cards
  def shuffle(self):
    random.shuffle(self.cards)

  # Draw random card without replacement
  def draw_card(self):
    if len(self.cards) == 0:
      raise ValueError("This shouldn't really happen. Maybe activate print_mode to figure out why.")

    return self.cards.pop()

# Print out 5 cards
deck = Deck()
print(deck.cards[:5])

[2S, 7H, KD, JH, 4H]


# Agent (Player) State

In [None]:
# Player class consists of several player properties
class Player:
  def __init__(self, player_name, bet = 20, starting_chips = 1000):
    self.player_name = player_name
    self.hand = []
    self.bet = bet  # Default bet amount is 20 chips
    self.chips = starting_chips # Default starting chips is 1000 chips
    self.active = True  # Player playing current game is True

  # Draw starting hand (two cards)
  def draw_hand(self, deck):
    self.hand = [deck.draw_card() for i in range(2)]

  # Show player hand
  def show_hand(self):
    return self.hand

# Q-Learning Agent

In [None]:
# QLearning class consists of similar setup/params as the RL HW
class QLearning:
  def __init__(self, alpha, gamma, n_actions, epsilon = 1.0, decay = 0.999, min_epsilon = 0.01):
    self.alpha = alpha
    self.gamma = gamma
    self.n_actions = n_actions
    self.epsilon = epsilon
    self.epsilon_decay = decay # Decay close to 1 becaure we're running thousands of simulations
    self.min_epsilon = min_epsilon # Want a small value for epsilon
    self.q_table = {}

  def choose_action(self, state):
    if np.random.rand() < self.epsilon:
      return np.random.randint(self.n_actions) # Explores new action
    else:
      return np.argmax(self.q_table.get(state, np.zeros(self.n_actions))) # Exploits optimal action

  def learn(self, state, action, reward, next_state):
    # Update q-table based on new info obtained
    current_q = self.q_table.get(state, np.zeros(self.n_actions))[action]
    highest_future_q = np.max(self.q_table.get(next_state, np.zeros(self.n_actions)))
    new_q = (1 - self.alpha) * current_q + self.alpha * (reward + self.gamma * highest_future_q)  # Bellman equation

    self.q_table.setdefault(state, np.zeros(self.n_actions))[action] = new_q
    self.epsilon = max(self.min_epsilon, self.epsilon * self.epsilon_decay)

# Poker Game Setup

In [None]:
# PokerGame class consists of several game properties
class PokerGame:
  def __init__(self, players, print_mode=False):
    self.players = [Player(player_name) for player_name in players]
    self.deck = Deck()
    self.community_cards = [] # Face up cards
    self.pot = 0
    self.starting_chips = 1000
    self.current_bet = 20

    # Default agent parameters
    self.agent = QLearning(alpha = 0.1, gamma = 0.9, n_actions = 4)

    # Displaying output text mode (toggle)
    self.print_mode = print_mode

  # Start new game (resets ALL deck and chip components)
  def start_new_game(self):
    self.deck = Deck()
    self.community_cards = []
    self.pot = 0

    if self.print_mode:
      print("Starting new game")

    # All players are reinitialized and reset to default state
    for player in self.players:
      player.hand = []
      player.chips = self.starting_chips
      player.draw_hand(self.deck)

      if self.print_mode:
        print(f"{player.player_name}'s Hand: {player.show_hand()}")

  # Update chip amount in pot
  def update_pot(self, amount):
    self.pot += amount

  # Update game/player properties based on player's move
  def player_action(self, player, action):
    if action == 0: # Player folds
      player.active = False
    elif action == 2: # Player checks
      self.update_pot(player.bet)
    elif action == 3: # Player raises
      raise_amount = self.current_bet
      self.current_bet = raise_amount
      self.update_pot(player.bet)

  # Choose winner at end of game based on player's hand
  def determine_winner(self):
    active_players = [p for p in self.players if p.active]
    if not active_players:
      return None

    best_hand = max(active_players, key = lambda p: self.evaluate_hand(p.hand + self.community_cards))
    best_hand.chips += self.pot
    return best_hand

# Test player's starting hand
game = PokerGame(["Andrie", "Dhruv", "Lawrence", "Vishnu", "Matthew"], print_mode = True)
game.start_new_game()

Starting new game
Andrie's Hand: [KD, KS]
Dhruv's Hand: [AD, KC]
Lawrence's Hand: [AC, 7D]
Vishnu's Hand: [7C, QC]
Matthew's Hand: [4D, 3C]


# Poker Game Setup (Deprecated Version)

In [None]:
class PokerGame(PokerGame):
  # NOTE: This implementation is deprecated because there are too many states
  #       in poker that it would be impossible to map them all

  '''def get_current_state(self):
    # Defaults to a single player vs AI
    # Different implementation necessary for several people
    player = self.players[0]

    # Current state is tuple of values
    state = (tuple(player.show_hand()),
             tuple(self.community_cards),
             player.chips / self.starting_chips,
             self.pot / player.chips)

    return state'''

  # Initialize poker hand rankings
  def extract_features(self, player, stage):
    hand_cards = player.hand + self.community_cards
    hand_rank_tuple = self.evaluate_hand(hand_cards)

    hand_strength = {
        "High Card": 1,
        "One Pair": 2,
        "Two Pair": 3,
        "Three of a Kind": 4,
        "Straight": 5,
        "Flush": 6,
        "Full House": 7,
        "Four of a Kind": 8,
        "Straight Flush": 9,
        "Royal Flush": 10
    }[hand_rank_tuple[0]]

    '''pot_odds = None

    # Prevents division by 0 errors
    if player.chips:
      pot_odds = self.pot / player.chips
    else:
      pot_odds = 1

    relative_chips = player.chips / self.starting_chips
    return tuple([hand_strength, pot_odds, relative_chips])'''

    active_players = len([player for player in self.players if player.active])
    return tuple([hand_strength, active_players, stage])

  # New representation method abstracts some detail so RL agent can learn feasibly
  def get_current_state(self, player, stage):
    return self.extract_features(player, stage)

  # Deal community cards
  def deal_community_cards(self, number):
    for i in range(number):
      self.community_cards.append(self.deck.draw_card())

    # Debugging purposes
    if self.print_mode:
      print(f"Comm Cards: {[str(card) for card in self.community_cards]}")

  # Simulate one full game of poker
  def simulate_game(self):
    self.start_new_game()
    stages = [("Pre flop", 0), ("Flop", 3), ("Turn", 1), ("River", 1)]

    # Loop through each stage in game
    for stage, num_cards in stages:
      if num_cards > 0:
        self.deal_community_cards(num_cards)

      for player in self.players:
        # These commands don't matter much in this function since it only simulates one game
        state = self.get_current_state(player, stage)
        action = self.agent.choose_action(state)
        reward = self.evaluate_action(action, player)

        next = self.get_current_state(player, stage)
        self.agent.learn(state, action, reward, next)
        self.player_action(player, action)

        action_maps = ["Fold", "Check", "Call", "Raise"][action]
        if self.print_mode:
          print(f"{player.name} decided to {action_maps}")
          print(f"Pot after betting: {self.pot}")
          print(f"Q values for current state: {self.agent.q_table.get(state, 'No actions')}")

        # Player folds
        if action == 0:
          break

    winner = self.determine_winner()
    if winner and self.print_mode:
      print(f"Winner is {winner.name} with {winner.chips} chips")

    self.pot = 0

  # Compare strengths of a player's hand compared to another player's hand
  def compare_hands(self, hand_1, hand_2):
    eval_1 = self.evaluate_hand(hand_1)
    eval_2 = self.evaluate_hand(hand_2)

    # Integer representation is fine for now
    hand_strength_mappings = {
        "High Card": 1,
        "One Pair": 2,
        "Two Pair": 3,
        "Three of a Kind": 4,
        "Straight": 5,
        "Flush": 6,
        "Full House": 7,
        "Four of a Kind": 8,
        "Straight Flush": 9,
        "Royal Flush": 10
    }

    if hand_strength_mappings[eval_1[0]] > hand_strength_mappings[eval_2[0]]:
      return 1
    elif hand_strength_mappings[eval_1[0]] < hand_strength_mappings[eval_2[0]]:
      return -1
    else:
      if eval_1[1] > eval_2[1]:
        return 1
      elif eval_1[1] < eval_2[1]:
        return -1
      else:
        return 0 # Tie game

# Poker Class Evaluation Functions

In [None]:
class PokerGame(PokerGame):
  # Simulates many poker games and returns overall win rate
  def monte_carlo_eval(self, player, num_simulations = 1000):
    wins = 0
    original_deck = self.deck.cards.copy()

    # Might need to lower this if you want to run extensive operations
    for i in range(num_simulations):  # Lower number of simulations if running extensive operations
      self.deck.cards = [card for card in original_deck if card not in player.hand and card not in self.community_cards]
      random.shuffle(self.deck.cards)

      #  Opponent draws cards
      opponent_hand = [self.deck.draw_card() for i in range(2)]
      missing = 5 - len(self.community_cards)

      simulated_comm_cards = self.community_cards + [self.deck.draw_card() for i in range(missing)]

      if self.compare_hands(player.hand + simulated_comm_cards, opponent_hand + simulated_comm_cards) > 0:
        wins += 1

    win_rate = wins / num_simulations

    return win_rate

  # Evaluate based on many different factors
  # Averages both the monte carlo win rate and the static hand strength
  def evaluate_action(self, action, player, base_reward = 10):
    # Probabilistic strength of the cards
    monte_carlo_strength = self.monte_carlo_eval(player, num_simulations=500)

    # This part evaluates the static strength of the hand
    hand = player.hand + self.community_cards
    hand_rank = self.evaluate_hand(hand)[0]

    hand_strength_mappings = {
        "High Card": 0.1,
        "One Pair": 0.2,
        "Two Pair": 0.3,
        "Three of a Kind": 0.4,
        "Straight": 0.5,
        "Flush": 0.6,
        "Full House": 0.7,
        "Four of a Kind": 0.8,
        "Straight Flush": 0.9,
        "Royal Flush": 1.0,
    }
    hand_strength = hand_strength_mappings[hand_rank]

    # Add the mapping with the monte carlo strength
    # Monte carlo strength is just the win rate
    combined_strength = (monte_carlo_strength + hand_strength) / 2

    pot_odds = self.pot / (self.pot + player.chips)

    # Fold
    if action == 0:
      if combined_strength > 0.5 or pot_odds < combined_strength:
        return base_reward * -0.5 # Penalize for folding with strong hand
      else:
        return base_reward * 0.1 # Small Reward

    # Call
    elif action == 1:
      if combined_strength > pot_odds:
        return base_reward * combined_strength # Reward based on strength
      else:
        return -base_reward * combined_strength # Penalize for calling with weak hand

    # Raise
    elif action == 2:
      if combined_strength > 0.7:
        return base_reward * 2 * combined_strength # raise with strong hand
      else:
        return -base_reward * (1 - combined_strength) # penalty for bluffing with weak hand

    else:
      return 0

  def evaluate_hand(self, cards):
    ranks = "23456789TJQKA" # The number at the front of card

    rank_counter = Counter(card.rank for card in cards)
    suit_counter = Counter(card.suit for card in cards)

    rank_vals = {
        rank : index for index, rank in enumerate(ranks, start=2)
    }

    sorted_ranks = sorted((rank_vals[rank], rank) for rank in rank_counter)
    sorted_ranks.reverse() # desc


    # Check for straight
    is_flush = max(suit_counter.values()) >= 5
    rank_seq = [rank_vals[rank] for rank in sorted(ranks, key=lambda rank: rank_vals[rank])]
    straight_high = None

    for i in range(len(rank_seq) - 4):
      if rank_seq[i] - rank_seq[i + 4] == 4:
        straight_high = rank_seq[i] # Hand has a straight
        break

    # I apologize for such inelegant code but hey, it works :D
    if 14 in rank_seq and 2 in rank_seq and 3 in rank_seq and 4 in rank_seq and 5 in rank_seq:
      straight_high = 5 # A2345


    counts = sorted(rank_counter.values(), reverse=True)

    # hand_strength_mappings = {
    #     "High Card": 0.1,
    #     "One Pair": 0.2,
    #     "Two Pair": 0.3,
    #     "Three of a Kind": 0.4,
    #     "Straight": 0.5,
    #     "Flush": 0.6,
    #     "Full House": 0.7,
    #     "Four of a Kind": 0.8,
    #     "Straight Flush": 0.9,
    #     "Royal Flush": 1.0,
    # }
    # if straight_high and is is_flush:

    if straight_high and is_flush:
      hand_type = "Straight Flush"
    elif counts[0] == 4:
      hand_type = "Four of a Kind"
    elif counts[0] == 3 and counts[1] == 2:
      hand_type = "Full House"
    elif is_flush:
      hand_type = "Flush"
    elif straight_high:
      hand_type = "Straight"
    elif counts[0] == 3:
      hand_type = "Three of a Kind"
    elif counts[0] == 2 and counts[1] == 2:
      hand_type = "Two Pair"
    elif counts[0] == 2:
      hand_type = "One Pair"
    elif counts[0] == 2:
      hand_type = "Straight Flush"
    else:
      hand_type = "High Card"

    return (hand_type, straight_high if straight_high else sorted_ranks[0][0])

# Run Several Simulations and Run Reinforcement Learning over many Iterations

In [None]:
class PokerGame(PokerGame):
  def simulate_several_games(self, num_games):
    for game in range(num_games):
      self.start_new_game()

      stages = [("Pre flop", 0), ("Flop", 3), ("Turn", 1), ("River", 1)]

      for stage, num_cards in stages:
        if num_cards > 0:
          # print(num_cards, len(self.community_cards))
          self.deal_community_cards(num_cards)

        for player in self.players:
          state = self.get_current_state(player, stage)
          action = self.agent.choose_action(state)

          reward = self.evaluate_action(action, player)

          next = self.get_current_state(player, stage)
          self.agent.learn(state, action, reward, next)

          # Uncomment this for the betting functionality
          # self.player_action(player, action)

          # action_maps = ["Fold", "Check", "Call", "Raise"][action]

      # Print game updates and q table every 100 games
      if game % 100 == 0:
        print(f"Game {game}: Epsilon = {self.agent.epsilon}")
        # print(f"State: {state or 'No actions yet'}")
        # print(f"Q Values for State: {self.agent.q_table.get(state, 'No actions yet')}")

        # Print out the current Q Table
        for key, value in self.agent.q_table.items():
          print(f"State: {key}, Q-values: {value}")

    # After running all he iterations, show the entire table
    print("Final Q Table")
    for key, value in self.agent.q_table.items():
      print(f"State: {key}, Q-values: {value}")


# Simulate games

In [None]:
# game = PokerGame(["Andrie", "Dhruv", "Lawrence", "Vishnu", "Matthew"], print_mode=False)
game = PokerGame(["Andrie", "Dhruv"], print_mode=False)
# game.start_new_game()

num_games = 1000
game.simulate_several_games(num_games)

Game 0: Epsilon = 0.9920279440699441
State: (5, 2, 'Pre flop'), Q-values: [-0.5    0.    -0.732  0.   ]
State: (5, 2, 'Flop'), Q-values: [-0.5   0.   -0.75  0.  ]
State: (5, 2, 'Turn'), Q-values: [-0.4775  0.25    0.      0.    ]
State: (5, 2, 'River'), Q-values: [-0.5   0.   -0.75  0.  ]
Game 100: Epsilon = 0.4455685064764183
State: (5, 2, 'Pre flop'), Q-values: [ 4.65863967 16.97608564  2.16433055  9.6071688 ]
State: (5, 2, 'Flop'), Q-values: [ 3.09654555 15.76030294  1.67967788  8.93457475]
State: (5, 2, 'Turn'), Q-values: [ 6.70968315 16.77453877  2.98589728  8.73935167]
State: (5, 2, 'River'), Q-values: [ 3.84908347 14.03456245 -0.32653633  7.87479731]
State: (7, 2, 'Turn'), Q-values: [0.      0.596   0.      0.05364]
State: (7, 2, 'River'), Q-values: [0.       0.948935 1.2215   0.109935]
State: (9, 2, 'Turn'), Q-values: [0.    0.    1.828 0.   ]
State: (9, 2, 'River'), Q-values: [-0.19155826  0.944       3.4271304   0.        ]
State: (7, 2, 'Flop'), Q-values: [0. 0. 0. 0.]
Game 