# Set Environment

In [2]:
import gym
from gym import spaces
from gym.utils import seeding
import random

# Full deck with distinct face cards
CARDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, '10', 'J', 'Q', 'K'] * 4

def card_value(card):
    return 10 if card in ['10', 'J', 'Q', 'K'] else card

def draw_card(deck):
    return deck.pop()

def draw_hand(deck):
    return [draw_card(deck), draw_card(deck)]

def usable_ace(hand):
    return 1 in hand and sum(card_value(c) for c in hand) + 10 <= 21

def sum_hand(hand):
    total = sum(card_value(c) for c in hand)
    return total + 10 if usable_ace(hand) else total

def is_bust(hand):
    return sum_hand(hand) > 21

def score(hand):
    return 0 if is_bust(hand) else sum_hand(hand)

def is_natural(hand):
    return set(hand) == {1, '10'} or set(hand) == {1, 'J'} or set(hand) == {1, 'Q'} or set(hand) == {1, 'K'}

def can_double_down(hand, actionstaken):
    return len(hand) == 2 and actionstaken == 0

class BlackjackEnv(gym.Env):
    metadata = {"render.modes": ["human"]}

    def __init__(self, numdecks=4, natural=True):
        super().__init__()
        self.action_space = spaces.Discrete(4)  # 0: Stick, 1: Hit, 2: Double Down, 3: Split
        self.observation_space = spaces.Tuple((
            spaces.Tuple((spaces.Discrete(32), spaces.Discrete(32))),  # Player hand (2 cards)
            spaces.Discrete(11),  # Dealer's showing card
            spaces.Discrete(2),   # Usable ace
            spaces.Discrete(2)    # Can double down
        ))

        self.natural = natural
        self.numdecks = numdecks
        self.decks = CARDS * self.numdecks
        random.shuffle(self.decks)
        self.seed()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        random.seed(seed)
        return [seed]

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        if seed is not None:
            self.seed(seed)

        if self._deck_is_out():
            self.decks = CARDS * self.numdecks
            random.shuffle(self.decks)

        self.dealer = draw_hand(self.decks)
        first_hand = draw_hand(self.decks)
        self.hands = [first_hand]
        self.current_hand = 0
        self.actionstaken = 0
        self.hand_results = []
        return self._get_obs()

    def step(self, action):
        assert self.action_space.contains(action), f"Invalid action: {action}"
        if self._deck_is_out():
            self.decks = CARDS * self.numdecks
            random.shuffle(self.decks)

        done = False
        reward = 0
        hand = self.hands[self.current_hand]

        if action == 0:  # Stick
            self._finalize_current_hand()

        elif action == 1:  # Hit
            hand.append(draw_card(self.decks))
            if is_bust(hand):
                self.hand_results.append(-1)
                self._advance_hand()

        elif action == 2:  # Double Down
            if not can_double_down(hand, self.actionstaken):
                raise ValueError("Invalid double down attempt.")
            hand.append(draw_card(self.decks))
            if is_bust(hand):
                self.hand_results.append(-2)
            else:
                self._finalize_current_hand(double=True)

        elif action == 3:  # Split
            if len(hand) != 2 or hand[0] != hand[1]:
                raise ValueError("Invalid split attempt.")
            card = hand[0]
            self.hands[self.current_hand] = [card, draw_card(self.decks)]
            self.hands.insert(self.current_hand + 1, [card, draw_card(self.decks)])

        self.actionstaken += 1

        if self.current_hand >= len(self.hands):
            while sum_hand(self.dealer) < 17:
                self.dealer.append(draw_card(self.decks))

            if len(self.hand_results) < len(self.hands):
                self._finalize_current_hand()

            reward = sum(self.hand_results)
            done = True
        
        return self._get_obs(), reward, done, {}

    def _finalize_current_hand(self, double=False):
        hand = self.hands[self.current_hand]
        player_score = score(hand)
        dealer_score = score(self.dealer)
        result = float(player_score > dealer_score) - float(player_score < dealer_score)
        if is_natural(hand) and result == 1 and self.natural:
            result = 1.5
        self.hand_results.append(result * (2 if double else 1))
        self._advance_hand()

    def _advance_hand(self):
        self.current_hand += 1
        self.actionstaken = 0

    def _get_obs(self):
        if self.current_hand >= len(self.hands):
            return ((0, 0), self.dealer[0], 0, 0)

        hand = self.hands[self.current_hand]
        padded = hand[:2] + [0] * (2 - len(hand))
        return (
            tuple(card_value(c) if c != 0 else 0 for c in padded[:2]),
            card_value(self.dealer[0]),
            int(usable_ace(hand)),
            int(can_double_down(hand, self.actionstaken))
        )

    def _deck_is_out(self):
        return len(self.decks) < self.numdecks * len(CARDS) * 0.1

# Set the Mixed Model

In [3]:
import random
import pandas as pd

# === Load Strategy Tables ===
basic_data = [['H']*10, ['H']*10, ['H']*10, ['H']*10, ['H']*10,
              ['H'] + ['D']*4 + ['H']*5,
              ['D']*8 + ['H']*2, ['D']*10, ['H']*2 + ['S']*3 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*10, ['S']*10, ['S']*10, ['S']*10, ['S']*10]

strategy_basic = pd.DataFrame(
    index=[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21],
    columns=[2,3,4,5,6,7,8,9,10,'A'],
    data=basic_data
)

ace_data = [['H']*2 + ['S']*3 + ['H']*5,
            ['H']*3 + ['D']*2 + ['H']*5, ['H']*3 + ['D']*2 + ['H']*5,
            ['H']*2 + ['D']*3 + ['H']*5, ['H']*2 + ['D']*3 + ['H']*5,
            ['H'] + ['D']*4 + ['H']*5, ['S'] + ['D']*4 + ['S']*2 + ['H']*3,
            ['S']*10, ['S']*10, ['S']*10]

strategy_ace = pd.DataFrame(
    index=[12,13,14,15,16,17,18,19,20,21],
    columns=[2,3,4,5,6,7,8,9,10,'A'],
    data=ace_data
)

pair_data = [['P']*5 + ['H']*5, ['P']*5 + ['H']*5, ['H']*3 + ['P']*2 + ['H']*5,
             ['D']*8 + ['H']*2, ['P']*5 + ['H']*5, ['P']*6 + ['H']*4,
             ['P']*10, ['P']*5 + ['S'] + ['P']*2 + ['S']*2, ['S']*10, ['P']*10]

strategy_pair = pd.DataFrame(
    index=[4,6,8,10,12,14,16,18,20,22],
    columns=[2,3,4,5,6,7,8,9,10,'A'],
    data=pair_data
)

# === Strategy-based Action Selection ===
def get_action_from_strategy(player_hand, dealer_card):
    value = card_value(dealer_card)
    dealer_val = 'A' if dealer_card in ['A', 1] else value
    dealer_val = 'A' if dealer_val == 11 else dealer_val

    values = [card_value(c) for c in player_hand]
    total = sum_hand(player_hand)

    # Check for pair
    if len(player_hand) == 2 and values[0] == values[1]:
        pair_total = values[0] * 2
        if pair_total in strategy_pair.index:
            action = strategy_pair.loc[pair_total, dealer_val]
            return convert_action(action)

    # Check for usable ace
    if usable_ace(player_hand) and total in strategy_ace.index:
        action = strategy_ace.loc[total, dealer_val]
        return convert_action(action)

    # Default basic strategy
    if total in strategy_basic.index:
        action = strategy_basic.loc[total, dealer_val]
        return convert_action(action)

    return 1  # Default to Hit if no match

def convert_action(action_str):
    """
    Converts strategy action string to BlackjackEnv-compatible action ID.
    """
    action_map = {
        'S': 0,  # Stick
        'H': 1,  # Hit
        'D': 2,  # Double
        'P': 3   # Split
    }
    return action_map.get(action_str, None)

## Evaluation 1

In [7]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=False)
    player_natural = 0
    dealer_natural = 0
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0
    

    for game in range(1, num_games+1):
        obs = env.reset(seed=game)  # Slightly different seed per game
        done = False

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            
            if is_natural(current_hand):
                player_natural += 1
            if is_natural(env.dealer):
                dealer_natural += 1
            
            # Determine valid actions
            valid_actions = [0, 1]  # Stick, Hit always valid
            
            if can_double_down(current_hand, env.actionstaken):
                valid_actions.append(2)  # Double down valid

            # Check if split is valid (same rank and 2 cards)
            if len(current_hand) == 2 and card_value(current_hand[0]) == card_value(current_hand[1]):
                valid_actions.append(3)  # Split valid
            
            # Get recommended action from strategy
            recommended_action = get_action_from_strategy(current_hand, dealer_card)
            
            # Use the recommended action only if it's valid, otherwise fall back to hit
            if recommended_action in valid_actions:
                action = recommended_action
            else:
                # If double down was recommended but not valid, hit instead
                if recommended_action == 2:
                    action = 1  # Hit
                # If split was recommended but not valid, hit instead
                elif recommended_action == 3:
                    action = 1  # Hit
                else:
                    action = 1  # Default to Hit if invalid
            
            obs, reward, done, _ = env.step(action)

            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": num_games,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / num_games) * 100, 4),
        "Loss Rate (%)": round((losses / num_games) * 100, 4),
        "Draw Rate (%)": round((draws / num_games) * 100, 4),
        "Average Reward": round(total_reward / num_games, 4),
        "Player Natural": player_natural,
        "Dealer Natural": dealer_natural,
    })

# Convert to DataFrame
df_mixed_1 = pd.DataFrame(results)
df_mixed_1

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Player Natural,Dealer Natural
0,1,10000,6069,674,3257,3378.0,60.69,32.57,6.74,0.3378,494,852
1,2,10000,6154,679,3167,3631.0,61.54,31.67,6.79,0.3631,480,769
2,3,10000,6151,711,3138,3644.0,61.51,31.38,7.11,0.3644,445,779
3,4,10000,6240,669,3091,3812.0,62.4,30.91,6.69,0.3812,516,737
4,5,10000,6101,751,3148,3632.0,61.01,31.48,7.51,0.3632,474,759
5,6,10000,6051,705,3244,3437.0,60.51,32.44,7.05,0.3437,485,763


## Evaluation 2

In [6]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=False)
    
    money = 100
    wins = 0
    losses = 0
    draws = 0
    player_natural = 0
    dealer_natural = 0
    total_reward = 0

    for game in range(1, num_games+1):
        obs = env.reset(seed=game)  # Slightly different seed per game
        done = False
        money -= 1

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            if is_natural(current_hand):
                player_natural += 1
            if is_natural(env.dealer):
                dealer_natural += 1
            
            # Determine valid actions
            valid_actions = [0, 1]  # Stick, Hit always valid
            
            if can_double_down(current_hand, env.actionstaken):
                valid_actions.append(2)  # Double down valid

            # Check if split is valid (same rank and 2 cards)
            if len(current_hand) == 2 and card_value(current_hand[0]) == card_value(current_hand[1]):
                valid_actions.append(3)  # Split valid
            
            # Get recommended action from strategy
            recommended_action = get_action_from_strategy(current_hand, dealer_card)
            
            # Use the recommended action only if it's valid, otherwise fall back to hit
            if recommended_action in valid_actions:
                action = recommended_action
            else:
                # If double down was recommended but not valid, hit instead
                if recommended_action == 2:
                    action = 1  # Hit
                # If split was recommended but not valid, hit instead
                elif recommended_action == 3:
                    action = 1  # Hit
                else:
                    action = 1  # Default to Hit if invalid
            
            obs, reward, done, _ = env.step(action)
            
            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                    money += 2
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1
                    money += 1
        
        if money <= 0:
            break

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": game,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / game) * 100, 4),
        "Loss Rate (%)": round((losses / game) * 100, 4),
        "Draw Rate (%)": round((draws / game) * 100, 4),
        "Average Reward": round(total_reward / game, 4),
        "Final Money": money,
        "Player Natural": player_natural,
        "Dealer Natural": dealer_natural,
    })

# Convert to DataFrame
df_mixed_2 = pd.DataFrame(results)
df_mixed_2

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Final Money,Player Natural,Dealer Natural
0,1,10000,6069,674,3257,3378.0,60.69,32.57,6.74,0.3378,2912,494,852
1,2,10000,6154,679,3167,3631.0,61.54,31.67,6.79,0.3631,3087,480,769
2,3,10000,6151,711,3138,3644.0,61.51,31.38,7.11,0.3644,3113,445,779
3,4,10000,6240,669,3091,3812.0,62.4,30.91,6.69,0.3812,3249,516,737
4,5,10000,6101,751,3148,3632.0,61.01,31.48,7.51,0.3632,3053,474,759
5,6,10000,6051,705,3244,3437.0,60.51,32.44,7.05,0.3437,2907,485,763


# Set the Simple Model

In [8]:
# Reset Basic
basic_data = [['H']*10, ['H']*10, ['H']*10, ['H']*10, ['H']*10,
              ['H'] + ['D']*4 + ['H']*5,
              ['D']*8 + ['H']*2, ['D']*10, ['H']*2 + ['S']*3 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*10,['S']*10, ['S']*10, ['S']*10,['S']*10]
strategy_basic = pd.DataFrame(index=[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21],
                        columns=[2,3,4,5,6,7,8,9,10,'A'], data=basic_data)

# === Strategy-based Action Selection ===
def get_action_from_strategy(player_hand, dealer_card):
    value = card_value(dealer_card)
    dealer_val = 'A' if dealer_card in ['A', 1] else value
    dealer_val = 'A' if dealer_val == 11 else dealer_val

    values = [card_value(c) for c in player_hand]
    total = sum_hand(player_hand)

    # Default basic strategy
    if total in strategy_basic.index:
        action = strategy_basic.loc[total, dealer_val]
        return convert_action(action)

    return 1  # Default to Hit if no match

def convert_action(action_str):
    """
    Converts strategy action string to BlackjackEnv-compatible action ID.
    """
    action_map = {
        'S': 0,  # Stick
        'H': 1,  # Hit
        'D': 2,  # Double
        'P': 3   # Split
    }
    return action_map.get(action_str, None)

## Evaluation 1

In [16]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=False)
    
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0
    dealer_natural = 0
    player_natural = 0

    for game in range(1, num_games+1):
        obs = env.reset(seed=game)  # Slightly different seed per game
        done = False

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            
            # Determine valid actions
            valid_actions = [0, 1]  # Stick, Hit always valid
            
            if can_double_down(current_hand, env.actionstaken):
                valid_actions.append(2)  # Double down valid

            # Check if split is valid (same rank and 2 cards)
            if len(current_hand) == 2 and card_value(current_hand[0]) == card_value(current_hand[1]):
                valid_actions.append(3)  # Split valid
            
            # Get recommended action from strategy
            recommended_action = get_action_from_strategy(current_hand, dealer_card)
            
            # Use the recommended action only if it's valid, otherwise fall back to hit
            if recommended_action in valid_actions:
                action = recommended_action
            else:
                # If double down was recommended but not valid, hit instead
                if recommended_action == 2:
                    action = 1  # Hit
                # If split was recommended but not valid, hit instead
                elif recommended_action == 3:
                    action = 1  # Hit
                else:
                    action = 1  # Default to Hit if invalid
            
            obs, reward, done, _ = env.step(action)

            if done:
                if is_natural(current_hand):
                    player_natural += 1
                if is_natural(env.dealer):
                    dealer_natural += 1
                total_reward += reward
                if reward > 0:
                    wins += 1
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": num_games,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / num_games) * 100, 4),
        "Loss Rate (%)": round((losses / num_games) * 100, 4),
        "Draw Rate (%)": round((draws / num_games) * 100, 4),
        "Average Reward": round(total_reward / num_games, 4),
        "Player Natural": player_natural,
        "Dealer Natural": dealer_natural,
    })

# Convert to DataFrame
df_simple_1 = pd.DataFrame(results)
df_simple_1

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Player Natural,Dealer Natural
0,1,10000,6196,685,3119,3517.0,61.96,31.19,6.85,0.3517,481,478
1,2,10000,6153,683,3164,3466.0,61.53,31.64,6.83,0.3466,459,449
2,3,10000,6063,673,3264,3233.0,60.63,32.64,6.73,0.3233,470,484
3,4,10000,6098,703,3199,3327.0,60.98,31.99,7.03,0.3327,508,487
4,5,10000,6135,711,3154,3408.0,61.35,31.54,7.11,0.3408,487,458
5,6,10000,6025,716,3259,3200.0,60.25,32.59,7.16,0.32,447,479


## Evaluation 2

In [15]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=False)
    
    money = 100
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0
    player_natural = 0
    dealer_natural = 0

    for game in range(1, num_games+1):
        obs = env.reset(seed=game)  # Slightly different seed per game
        done = False
        money -= 1

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            
            # Determine valid actions
            valid_actions = [0, 1]  # Stick, Hit always valid
            
            if can_double_down(current_hand, env.actionstaken):
                valid_actions.append(2)  # Double down valid

            # Check if split is valid (same rank and 2 cards)
            if len(current_hand) == 2 and card_value(current_hand[0]) == card_value(current_hand[1]):
                valid_actions.append(3)  # Split valid
            
            # Get recommended action from strategy
            recommended_action = get_action_from_strategy(current_hand, dealer_card)
            
            # Use the recommended action only if it's valid, otherwise fall back to hit
            if recommended_action in valid_actions:
                action = recommended_action
            else:
                # If double down was recommended but not valid, hit instead
                if recommended_action == 2:
                    action = 1  # Hit
                # If split was recommended but not valid, hit instead
                elif recommended_action == 3:
                    action = 1  # Hit
                else:
                    action = 1  # Default to Hit if invalid
            
            obs, reward, done, _ = env.step(action)
            
            if done:
                if is_natural(current_hand):
                    player_natural += 1
                if is_natural(env.dealer):
                    dealer_natural += 1
                total_reward += reward
                if reward > 0:
                    wins += 1
                    money += 2
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1
                    money += 1
        
        if money <= 0:
            break

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": game,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / game) * 100, 4),
        "Loss Rate (%)": round((losses / game) * 100, 4),
        "Draw Rate (%)": round((draws / game) * 100, 4),
        "Average Reward": round(total_reward / game, 4),
        "Final Money": money,
        "Player Natural": player_natural,
        "Dealer Natural": dealer_natural,
    })

# Convert to DataFrame
df_simple_2 = pd.DataFrame(results)
df_simple_2

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Final Money,Player Natural,Dealer Natural
0,1,10000,6196,685,3119,3516.0,61.96,31.19,6.85,0.3516,3177,481,478
1,2,10000,6153,683,3164,3466.0,61.53,31.64,6.83,0.3466,3089,459,449
2,3,10000,6063,673,3264,3233.0,60.63,32.64,6.73,0.3233,2899,470,484
3,4,10000,6098,703,3199,3327.0,60.98,31.99,7.03,0.3327,2999,508,487
4,5,10000,6135,711,3154,3408.0,61.35,31.54,7.11,0.3408,3081,487,458
5,6,10000,6025,716,3259,3200.0,60.25,32.59,7.16,0.32,2866,447,479
