# Set Environment (No Split, No Double Down)

In [98]:
import gym
from gym import spaces
from gym.utils import seeding
import random

# Full deck with distinct face cards
CARDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, '10', 'J', 'Q', 'K'] * 4

def card_value(card):
    return 10 if card in ['10', 'J', 'Q', 'K'] else card

def draw_card(deck):
    return deck.pop()

def draw_hand(deck):
    return [draw_card(deck), draw_card(deck)]

def usable_ace(hand):
    return 1 in hand and sum(card_value(c) for c in hand) + 10 <= 21

def sum_hand(hand):
    total = sum(card_value(c) for c in hand)
    return total + 10 if usable_ace(hand) else total

def is_bust(hand):
    return sum_hand(hand) > 21

def score(hand):
    return 0 if is_bust(hand) else sum_hand(hand)

def is_natural(hand):
    return set(hand) == {1, '10'} or set(hand) == {1, 'J'} or set(hand) == {1, 'Q'} or set(hand) == {1, 'K'}

class BlackjackEnv(gym.Env):
    metadata = {"render.modes": ["human"]}

    def __init__(self, numdecks=4, natural=True):
        super().__init__()
        self.action_space = spaces.Discrete(2)  # 0: Stick, 1: Hit
        self.observation_space = spaces.Tuple((
            spaces.Tuple((spaces.Discrete(32), spaces.Discrete(32))),  # Player hand (2 cards)
            spaces.Discrete(11),  # Dealer's showing card
            spaces.Discrete(2)    # Usable ace
        ))

        self.natural = natural
        self.numdecks = numdecks
        self.decks = CARDS * self.numdecks
        random.shuffle(self.decks)
        self.seed()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        random.seed(seed)
        return [seed]

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        if seed is not None:
            self.seed(seed)

        if self._deck_is_out():
            self.decks = CARDS * self.numdecks
            random.shuffle(self.decks)

        self.dealer = draw_hand(self.decks)
        first_hand = draw_hand(self.decks)
        self.hands = [first_hand]
        self.current_hand = 0
        self.actionstaken = 0
        self.hand_results = []
        return self._get_obs()

    def step(self, action):
        assert self.action_space.contains(action), f"Invalid action: {action}"
        if self._deck_is_out():
            self.decks = CARDS * self.numdecks
            random.shuffle(self.decks)

        done = False
        reward = 0
        hand = self.hands[self.current_hand]

        if action == 0:  # Stick
            self._finalize_current_hand()

        elif action == 1:  # Hit
            hand.append(draw_card(self.decks))
            if is_bust(hand):
                self.hand_results.append(-1)
                self._advance_hand()

        self.actionstaken += 1

        if self.current_hand >= len(self.hands):
            while sum_hand(self.dealer) < 17:
                self.dealer.append(draw_card(self.decks))

            if len(self.hand_results) < len(self.hands):
                self._finalize_current_hand()

            reward = sum(self.hand_results)
            done = True

        return self._get_obs(), reward, done, {}

    def _finalize_current_hand(self):
        hand = self.hands[self.current_hand]
        player_score = score(hand)
        dealer_score = score(self.dealer)
        result = float(player_score > dealer_score) - float(player_score < dealer_score)
        if is_natural(hand) and result == 1 and self.natural:
            result = 1.5
        self.hand_results.append(result)
        self._advance_hand()

    def _advance_hand(self):
        self.current_hand += 1
        self.actionstaken = 0

    def _get_obs(self):
        if self.current_hand >= len(self.hands):
            return ((0, 0), card_value(self.dealer[0]), 0)

        hand = self.hands[self.current_hand]
        padded = hand[:2] + [0] * (2 - len(hand))
        return (
            tuple(card_value(c) if c != 0 else 0 for c in padded[:2]),
            card_value(self.dealer[0]),
            int(usable_ace(hand))
        )

    def _deck_is_out(self):
        return len(self.decks) < self.numdecks * len(CARDS) * 0.1

# Set the Mixed Model

In [99]:
import random
import pandas as pd

# === Load Strategy Tables ===
basic_data = [['H']*10, ['H']*10, ['H']*10, ['H']*10, ['H']*10,
              ['H'] + ['D']*4 + ['H']*5,
              ['D']*8 + ['H']*2, ['D']*10, ['H']*2 + ['S']*3 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*10, ['S']*10, ['S']*10, ['S']*10, ['S']*10]

strategy_basic = pd.DataFrame(
    index=[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21],
    columns=[2,3,4,5,6,7,8,9,10,'A'],
    data=basic_data
)

ace_data = [['H']*2 + ['S']*3 + ['H']*5,
            ['H']*3 + ['D']*2 + ['H']*5, ['H']*3 + ['D']*2 + ['H']*5,
            ['H']*2 + ['D']*3 + ['H']*5, ['H']*2 + ['D']*3 + ['H']*5,
            ['H'] + ['D']*4 + ['H']*5, ['S'] + ['D']*4 + ['S']*2 + ['H']*3,
            ['S']*10, ['S']*10, ['S']*10]

strategy_ace = pd.DataFrame(
    index=[12,13,14,15,16,17,18,19,20,21],
    columns=[2,3,4,5,6,7,8,9,10,'A'],
    data=ace_data
)

pair_data = [['P']*5 + ['H']*5, ['P']*5 + ['H']*5, ['H']*3 + ['P']*2 + ['H']*5,
             ['D']*8 + ['H']*2, ['P']*5 + ['H']*5, ['P']*6 + ['H']*4,
             ['P']*10, ['P']*5 + ['S'] + ['P']*2 + ['S']*2, ['S']*10, ['P']*10]

strategy_pair = pd.DataFrame(
    index=[4,6,8,10,12,14,16,18,20,22],
    columns=[2,3,4,5,6,7,8,9,10,'A'],
    data=pair_data
)

# === Strategy-based Action Selection ===
def get_action_from_strategy(player_hand, dealer_card):
    value = card_value(dealer_card)
    dealer_val = 'A' if dealer_card in ['A', 1] else value
    dealer_val = 'A' if dealer_val == 11 else dealer_val

    values = [card_value(c) for c in player_hand]
    total = sum_hand(player_hand)

    # Check for pair
    if len(player_hand) == 2 and values[0] == values[1]:
        pair_total = values[0] * 2
        if pair_total in strategy_pair.index:
            action = strategy_pair.loc[pair_total, dealer_val]
            return convert_action(action)

    # Check for usable ace
    if usable_ace(player_hand) and total in strategy_ace.index:
        action = strategy_ace.loc[total, dealer_val]
        return convert_action(action)

    # Default basic strategy
    if total in strategy_basic.index:
        action = strategy_basic.loc[total, dealer_val]
        return convert_action(action)

    return 1  # Default to Hit if no match

def convert_action(action_str):
    if action_str == 'S':
        return 0  # Stick
    elif action_str == 'H':
        return 1  # Hit
    elif action_str == 'D':
        return 1  # Treat Double as Hit (env doesn't support Double)
    elif action_str == 'P':
        return 1  # Treat Split as Hit (env doesn't support Split)
    else:
        return 1

## Evaluation 1

In [100]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=True)
    
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0

    for game in range(num_games):
        obs = env.reset()  # Slightly different seed per game
        done = False

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            action = get_action_from_strategy(current_hand, dealer_card)
            obs, reward, done, _ = env.step(action)

            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": num_games,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / num_games) * 100, 4),
        "Loss Rate (%)": round((losses / num_games) * 100, 4),
        "Draw Rate (%)": round((draws / num_games) * 100, 4),
        "Average Reward": round(total_reward / num_games, 4)
    })

# Convert to DataFrame
df_mixed_1 = pd.DataFrame(results)
df_mixed_1

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward
0,1,10000,6096,651,3253,3072.5,60.96,32.53,6.51,0.3073
1,2,10000,6071,676,3253,3049.5,60.71,32.53,6.76,0.3049
2,3,10000,6055,694,3251,3036.5,60.55,32.51,6.94,0.3036
3,4,10000,6047,744,3209,3064.5,60.47,32.09,7.44,0.3065
4,5,10000,6156,712,3132,3266.5,61.56,31.32,7.12,0.3266
5,6,10000,6162,666,3172,3231.5,61.62,31.72,6.66,0.3231


## Evaluation 2

In [101]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=True)
    
    money = 100
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0

    for game in range(1, num_games+1):
        obs = env.reset()  # Slightly different seed per game
        done = False
        money -= 1

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            action = get_action_from_strategy(current_hand, dealer_card)
            obs, reward, done, _ = env.step(action)
            
            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                    money += 2
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1
                    money += 1
        
        if money <= 0:
            break

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": game,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / game) * 100, 4),
        "Loss Rate (%)": round((losses / game) * 100, 4),
        "Draw Rate (%)": round((draws / game) * 100, 4),
        "Average Reward": round(total_reward / game, 4),
        "Final Money": money,
    })

# Convert to DataFrame
df_mixed_2 = pd.DataFrame(results)
df_mixed_2

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Final Money
0,1,10000,6236,660,3104,3363.0,62.36,31.04,6.6,0.3363,3232
1,2,10000,6125,656,3219,3145.5,61.25,32.19,6.56,0.3145,3006
2,3,10000,6122,721,3157,3188.5,61.22,31.57,7.21,0.3189,3065
3,4,10000,5996,734,3270,2948.0,59.96,32.7,7.34,0.2948,2826
4,5,10000,6085,684,3231,3089.5,60.85,32.31,6.84,0.309,2954
5,6,10000,6057,747,3196,3094.0,60.57,31.96,7.47,0.3094,2961


# Set the Simple Model

In [84]:
# Reset Basic
basic_data = [['H']*10, ['H']*10, ['H']*10, ['H']*10, ['H']*10,
              ['H'] + ['D']*4 + ['H']*5,
              ['D']*8 + ['H']*2, ['D']*10, ['H']*2 + ['S']*3 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*10,['S']*10, ['S']*10, ['S']*10,['S']*10]
strategy_basic = pd.DataFrame(index=[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21],
                        columns=[2,3,4,5,6,7,8,9,10,'A'], data=basic_data)

# Will stick with standard bet of 50 everytime
strategy_basic.replace('D', 'H', inplace=True)
strategy_ace.replace('D', 'H', inplace=True)
strategy_pair.replace(['D', 'P'], 'H', inplace=True)

# === Strategy-based Action Selection ===
def get_action_from_strategy(player_hand, dealer_card):
    value = card_value(dealer_card)
    dealer_val = 'A' if dealer_card in ['A', 1] else value
    dealer_val = 'A' if dealer_val == 11 else dealer_val

    values = [card_value(c) for c in player_hand]
    total = sum_hand(player_hand)

    # Default basic strategy
    if total in strategy_basic.index:
        action = strategy_basic.loc[total, dealer_val]
        return convert_action(action)

    return 1  # Default to Hit if no match

def convert_action(action_str):
    if action_str == 'S':
        return 0  # Stick
    elif action_str == 'H':
        return 1  # Hit
    elif action_str == 'D':
        return 1  # Treat Double as Hit (env doesn't support Double)
    elif action_str == 'P':
        return 1  # Treat Split as Hit (env doesn't support Split)
    else:
        return 1

In [None]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=True)
    
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0

    for game in range(num_games):
        obs = env.reset()  # Slightly different seed per game
        done = False

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            action = get_action_from_strategy(current_hand, dealer_card)
            obs, reward, done, _ = env.step(action)

            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": num_games,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / num_games) * 100, 4),
        "Loss Rate (%)": round((losses / num_games) * 100, 4),
        "Draw Rate (%)": round((draws / num_games) * 100, 4),
        "Average Reward": round(total_reward / num_games, 4)
    })

# Convert to DataFrame
df_results = pd.DataFrame(results)
df_results

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward
0,1,10000,6160,695,3145,3248.0,61.6,31.45,6.95,0.3248
1,2,10000,6103,741,3156,3165.5,61.03,31.56,7.41,0.3165
2,3,10000,6116,687,3197,3140.0,61.16,31.97,6.87,0.314
3,4,10000,6086,704,3210,3091.0,60.86,32.1,7.04,0.3091
4,5,10000,6162,687,3151,3250.0,61.62,31.51,6.87,0.325
5,6,10000,6204,696,3100,3328.5,62.04,31.0,6.96,0.3328


In [None]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=True)
    
    money = 100
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0

    for game in range(1, num_games+1):
        obs = env.reset()  # Slightly different seed per game
        done = False
        money -= 1

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            action = get_action_from_strategy(current_hand, dealer_card)
            obs, reward, done, _ = env.step(action)

            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                    money += 2
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1
                    money += 1
        
        if money <= 0:
            break

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": game,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / game) * 100, 4),
        "Loss Rate (%)": round((losses / game) * 100, 4),
        "Draw Rate (%)": round((draws / game) * 100, 4),
        "Average Reward": round(total_reward / game, 4),
        "Final Money": money,
    })

# Convert to DataFrame
df_simple_2 = pd.DataFrame(results)
df_simple_2

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Final Money
0,1,10000,6182,687,3131,3291.0,61.82,31.31,6.87,0.3291,3151
1,2,10000,6104,737,3159,3164.0,61.04,31.59,7.37,0.3164,3045
2,3,10000,6093,692,3215,3093.5,60.93,32.15,6.92,0.3094,2978
3,4,10000,6102,703,3195,3123.0,61.02,31.95,7.03,0.3123,3007
4,5,10000,6156,687,3157,3230.5,61.56,31.57,6.87,0.3231,3099
5,6,10000,6215,699,3086,3356.0,62.15,30.86,6.99,0.3356,3229
