# Set Environment (No Split, No Double Down)

In [2]:
import gym
from gym import spaces
from gym.utils import seeding
import random

# Full deck with distinct face cards
CARDS = [1, 2, 3, 4, 5, 6, 7, 8, 9, '10', 'J', 'Q', 'K'] * 4

def card_value(card):
    return 10 if card in ['10', 'J', 'Q', 'K'] else card

def draw_card(deck):
    return deck.pop()

def draw_hand(deck):
    return [draw_card(deck), draw_card(deck)]

def usable_ace(hand):
    return 1 in hand and sum(card_value(c) for c in hand) + 10 <= 21

def sum_hand(hand):
    total = sum(card_value(c) for c in hand)
    return total + 10 if usable_ace(hand) else total

def is_bust(hand):
    return sum_hand(hand) > 21

def score(hand):
    return 0 if is_bust(hand) else sum_hand(hand)

def is_natural(hand):
    return set(hand) == {1, '10'} or set(hand) == {1, 'J'} or set(hand) == {1, 'Q'} or set(hand) == {1, 'K'}

class BlackjackEnv(gym.Env):
    metadata = {"render.modes": ["human"]}

    def __init__(self, numdecks=4, natural=True):
        super().__init__()
        self.action_space = spaces.Discrete(2)  # 0: Stick, 1: Hit
        self.observation_space = spaces.Tuple((
            spaces.Tuple((spaces.Discrete(32), spaces.Discrete(32))),  # Player hand (2 cards)
            spaces.Discrete(11),  # Dealer's showing card
            spaces.Discrete(2)    # Usable ace
        ))

        self.natural = natural
        self.numdecks = numdecks
        self.decks = CARDS * self.numdecks
        random.shuffle(self.decks)
        self.seed()

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        random.seed(seed)
        return [seed]

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        if seed is not None:
            self.seed(seed)

        if self._deck_is_out():
            self.decks = CARDS * self.numdecks
            random.shuffle(self.decks)

        self.dealer = draw_hand(self.decks)
        first_hand = draw_hand(self.decks)
        self.hands = [first_hand]
        self.current_hand = 0
        self.actionstaken = 0
        self.hand_results = []
        return self._get_obs()

    def step(self, action):
        assert self.action_space.contains(action), f"Invalid action: {action}"
        if self._deck_is_out():
            self.decks = CARDS * self.numdecks
            random.shuffle(self.decks)

        done = False
        reward = 0
        hand = self.hands[self.current_hand]

        if action == 0:  # Stick
            self._finalize_current_hand()

        elif action == 1:  # Hit
            hand.append(draw_card(self.decks))
            if is_bust(hand):
                self.hand_results.append(-1)
                self._advance_hand()

        self.actionstaken += 1

        if self.current_hand >= len(self.hands):
            while sum_hand(self.dealer) < 17:
                self.dealer.append(draw_card(self.decks))

            if len(self.hand_results) < len(self.hands):
                self._finalize_current_hand()

            reward = sum(self.hand_results)
            done = True

        return self._get_obs(), reward, done, {}

    def _finalize_current_hand(self):
        hand = self.hands[self.current_hand]
        player_score = score(hand)
        dealer_score = score(self.dealer)
        result = float(player_score > dealer_score) - float(player_score < dealer_score)
        if is_natural(hand) and result == 1 and self.natural:
            result = 1.5
        self.hand_results.append(result)
        self._advance_hand()

    def _advance_hand(self):
        self.current_hand += 1
        self.actionstaken = 0

    def _get_obs(self):
        if self.current_hand >= len(self.hands):
            return ((0, 0), card_value(self.dealer[0]), 0)

        hand = self.hands[self.current_hand]
        padded = hand[:2] + [0] * (2 - len(hand))
        return (
            tuple(card_value(c) if c != 0 else 0 for c in padded[:2]),
            card_value(self.dealer[0]),
            int(usable_ace(hand))
        )

    def _deck_is_out(self):
        return len(self.decks) < self.numdecks * len(CARDS) * 0.1

# Set the Mixed Model

In [14]:
import random
import pandas as pd

# === Load Strategy Tables ===
basic_data = [['H']*10, ['H']*10, ['H']*10, ['H']*10, ['H']*10,
              ['H'] + ['D']*4 + ['H']*5,
              ['D']*8 + ['H']*2, ['D']*10, ['H']*2 + ['S']*3 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*10, ['S']*10, ['S']*10, ['S']*10, ['S']*10]

strategy_basic = pd.DataFrame(
    index=[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21],
    columns=[2,3,4,5,6,7,8,9,10,'A'],
    data=basic_data
)

ace_data = [['H']*2 + ['S']*3 + ['H']*5,
            ['H']*3 + ['D']*2 + ['H']*5, ['H']*3 + ['D']*2 + ['H']*5,
            ['H']*2 + ['D']*3 + ['H']*5, ['H']*2 + ['D']*3 + ['H']*5,
            ['H'] + ['D']*4 + ['H']*5, ['S'] + ['D']*4 + ['S']*2 + ['H']*3,
            ['S']*10, ['S']*10, ['S']*10]

strategy_ace = pd.DataFrame(
    index=[12,13,14,15,16,17,18,19,20,21],
    columns=[2,3,4,5,6,7,8,9,10,'A'],
    data=ace_data
)

pair_data = [['P']*5 + ['H']*5, ['P']*5 + ['H']*5, ['H']*3 + ['P']*2 + ['H']*5,
             ['D']*8 + ['H']*2, ['P']*5 + ['H']*5, ['P']*6 + ['H']*4,
             ['P']*10, ['P']*5 + ['S'] + ['P']*2 + ['S']*2, ['S']*10, ['P']*10]

strategy_pair = pd.DataFrame(
    index=[4,6,8,10,12,14,16,18,20,22],
    columns=[2,3,4,5,6,7,8,9,10,'A'],
    data=pair_data
)

# === Strategy-based Action Selection ===
def get_action_from_strategy(player_hand, dealer_card):
    value = card_value(dealer_card)
    dealer_val = 'A' if dealer_card in ['A', 1] else value
    dealer_val = 'A' if dealer_val == 11 else dealer_val

    values = [card_value(c) for c in player_hand]
    total = sum_hand(player_hand)

    # Check for pair
    if len(player_hand) == 2 and values[0] == values[1]:
        pair_total = values[0] * 2
        if pair_total in strategy_pair.index:
            action = strategy_pair.loc[pair_total, dealer_val]
            return convert_action(action)

    # Check for usable ace
    if usable_ace(player_hand) and total in strategy_ace.index:
        action = strategy_ace.loc[total, dealer_val]
        return convert_action(action)

    # Default basic strategy
    if total in strategy_basic.index:
        action = strategy_basic.loc[total, dealer_val]
        return convert_action(action)

    return 1  # Default to Hit if no match

def convert_action(action_str):
    if action_str == 'S':
        return 0  # Stick
    elif action_str == 'H':
        return 1  # Hit
    elif action_str == 'D':
        return 1  # Treat Double as Hit (env doesn't support Double)
    elif action_str == 'P':
        return 1  # Treat Split as Hit (env doesn't support Split)
    else:
        return 1

## Evaluation 1

In [16]:

import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=False)
    player_natural = 0
    dealer_natural = 0
    player_blackjack = 0
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0

    for game in range(1, num_games+1):
        obs = env.reset(seed=game)  # Slightly different seed per game
        done = False

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            action = get_action_from_strategy(current_hand, dealer_card)
            obs, reward, done, _ = env.step(action)
            
            if is_natural(current_hand):
                player_natural += 1
            if is_natural(env.dealer):
                dealer_natural += 1

            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": num_games,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / num_games) * 100, 4),
        "Loss Rate (%)": round((losses / num_games) * 100, 4),
        "Draw Rate (%)": round((draws / num_games) * 100, 4),
        "Average Reward": round(total_reward / num_games, 4),
        "Player Natural": player_natural,
        "Dealer Natural": dealer_natural,
    })

# Convert to DataFrame
df_mixed_1 = pd.DataFrame(results)
df_mixed_1

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Player Natural,Dealer Natural
0,1,10000,6089,684,3227,2862.0,60.89,32.27,6.84,0.2862,460,799
1,2,10000,6095,671,3234,2861.0,60.95,32.34,6.71,0.2861,492,758
2,3,10000,6009,711,3280,2729.0,60.09,32.8,7.11,0.2729,484,800
3,4,10000,6061,703,3236,2825.0,60.61,32.36,7.03,0.2825,463,764
4,5,10000,6112,715,3173,2939.0,61.12,31.73,7.15,0.2939,470,783
5,6,10000,6099,673,3228,2871.0,60.99,32.28,6.73,0.2871,503,828


## Evaluation 2

In [15]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=False)
    player_natural = 0
    dealer_natural = 0
    money = 100
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0

    for game in range(1, num_games+1):
        obs = env.reset(seed=game)  # Slightly different seed per game
        done = False
        money -= 1

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            action = get_action_from_strategy(current_hand, dealer_card)
            obs, reward, done, _ = env.step(action)
            if is_natural(current_hand):
                player_natural += 1
            if is_natural(env.dealer):
                dealer_natural += 1
            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                    money += 2
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1
                    money += 1
        
        if money <= 0:
            break

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": game,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / game) * 100, 4),
        "Loss Rate (%)": round((losses / game) * 100, 4),
        "Draw Rate (%)": round((draws / game) * 100, 4),
        "Average Reward": round(total_reward / game, 4),
        "Final Money": money,
        "Player Natural": player_natural,
        "Dealer Natural": dealer_natural,
    })

# Convert to DataFrame
df_mixed_2 = pd.DataFrame(results)
df_mixed_2

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Final Money,Player Natural,Dealer Natural
0,1,10000,6089,684,3227,2862.0,60.89,32.27,6.84,0.2862,2962,460,799
1,2,10000,6095,671,3234,2861.0,60.95,32.34,6.71,0.2861,2961,492,758
2,3,10000,6009,711,3280,2729.0,60.09,32.8,7.11,0.2729,2829,484,800
3,4,10000,6061,703,3236,2825.0,60.61,32.36,7.03,0.2825,2925,463,764
4,5,10000,6112,715,3173,2939.0,61.12,31.73,7.15,0.2939,3039,470,783
5,6,10000,6099,673,3228,2871.0,60.99,32.28,6.73,0.2871,2971,503,828


# Set the Simple Model

In [5]:
import pandas as pd

# Reset Basic
basic_data = [['H']*10, ['H']*10, ['H']*10, ['H']*10, ['H']*10,
              ['H'] + ['D']*4 + ['H']*5,
              ['D']*8 + ['H']*2, ['D']*10, ['H']*2 + ['S']*3 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*5 + ['H']*5, ['S']*5 + ['H']*5,
              ['S']*10,['S']*10, ['S']*10, ['S']*10,['S']*10]
strategy_basic = pd.DataFrame(index=[4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21],
                        columns=[2,3,4,5,6,7,8,9,10,'A'], data=basic_data)

# Will stick with standard bet of 50 everytime
strategy_basic.replace('D', 'H', inplace=True)

# === Strategy-based Action Selection ===
def get_action_from_strategy(player_hand, dealer_card):
    value = card_value(dealer_card)
    dealer_val = 'A' if dealer_card in ['A', 1] else value
    dealer_val = 'A' if dealer_val == 11 else dealer_val

    values = [card_value(c) for c in player_hand]
    total = sum_hand(player_hand)

    # Default basic strategy
    if total in strategy_basic.index:
        action = strategy_basic.loc[total, dealer_val]
        return convert_action(action)

    return 1  # Default to Hit if no match

def convert_action(action_str):
    if action_str == 'S':
        return 0  # Stick
    elif action_str == 'H':
        return 1  # Hit
    elif action_str == 'D':
        return 1  # Treat Double as Hit (env doesn't support Double)
    elif action_str == 'P':
        return 1  # Treat Split as Hit (env doesn't support Split)
    else:
        return 1

strategy_basic

Unnamed: 0,2,3,4,5,6,7,8,9,10,A
4,H,H,H,H,H,H,H,H,H,H
5,H,H,H,H,H,H,H,H,H,H
6,H,H,H,H,H,H,H,H,H,H
7,H,H,H,H,H,H,H,H,H,H
8,H,H,H,H,H,H,H,H,H,H
9,H,H,H,H,H,H,H,H,H,H
10,H,H,H,H,H,H,H,H,H,H
11,H,H,H,H,H,H,H,H,H,H
12,H,H,S,S,S,H,H,H,H,H
13,S,S,S,S,S,H,H,H,H,H


## Evaluation 1

In [11]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=False)
    
    wins = 0
    losses = 0
    draws = 0
    player_natural = 0
    dealer_natural = 0
    total_reward = 0

    for game in range(1, num_games+1):
        obs = env.reset(seed=game)  # Slightly different seed per game
        done = False

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            action = get_action_from_strategy(current_hand, dealer_card)
            obs, reward, done, _ = env.step(action)
            if is_natural(current_hand):
                player_natural += 1
            if is_natural(env.dealer):
                dealer_natural += 1
            
            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": num_games,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / num_games) * 100, 4),
        "Loss Rate (%)": round((losses / num_games) * 100, 4),
        "Draw Rate (%)": round((draws / num_games) * 100, 4),
        "Average Reward": round(total_reward / num_games, 4),
        "Player Natural": player_natural,
        "Dealer Natural": dealer_natural,
    })

# Convert to DataFrame
df_results = pd.DataFrame(results)
df_results

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Player Natural,Dealer Natural
0,1,10000,6182,693,3125,3057.0,61.82,31.25,6.93,0.3057,502,794
1,2,10000,6136,734,3130,3006.0,61.36,31.3,7.34,0.3006,495,752
2,3,10000,6064,744,3192,2872.0,60.64,31.92,7.44,0.2872,478,806
3,4,10000,6090,686,3224,2866.0,60.9,32.24,6.86,0.2866,466,772
4,5,10000,6124,682,3194,2930.0,61.24,31.94,6.82,0.293,487,767
5,6,10000,6070,768,3162,2908.0,60.7,31.62,7.68,0.2908,494,789


## Evaluation 2

In [9]:
import pandas as pd
import random

# Prepare result storage
results = []

# === Simulation ===
num_games = 10_000
num_decks = 6

for num_deck in range(1, num_decks + 1):
    env = BlackjackEnv(numdecks=num_deck, natural=False)
    
    money = 100
    wins = 0
    losses = 0
    draws = 0
    total_reward = 0
    player_natural = 0
    dealer_natural = 0

    for game in range(1, num_games+1):
        obs = env.reset(seed=game)  # Slightly different seed per game
        done = False
        money -= 1

        while not done:
            current_hand = env.hands[env.current_hand]
            dealer_card = env.dealer[0]
            action = get_action_from_strategy(current_hand, dealer_card)
            obs, reward, done, _ = env.step(action)
            
            if is_natural(current_hand):
                player_natural += 1
            if is_natural(env.dealer):
                dealer_natural += 1

            if done:
                total_reward += reward
                if reward > 0:
                    wins += 1
                    money += 2
                elif reward < 0:
                    losses += 1
                else:
                    draws += 1
                    money += 1
        
        if money <= 0:
            break

    # Store results
    results.append({
        "Decks": num_deck,
        "Games": game,
        "Wins": wins,
        "Draws": draws,
        "Losses": losses,
        "Total Reward": total_reward,
        "Win Rate (%)": round((wins / game) * 100, 4),
        "Loss Rate (%)": round((losses / game) * 100, 4),
        "Draw Rate (%)": round((draws / game) * 100, 4),
        "Average Reward": round(total_reward / game, 4),
        "Final Money": money,
        "Player Natural": player_natural,
        "Dealer Natural": dealer_natural,
    })

# Convert to DataFrame
df_simple_2 = pd.DataFrame(results)
df_simple_2

Unnamed: 0,Decks,Games,Wins,Draws,Losses,Total Reward,Win Rate (%),Loss Rate (%),Draw Rate (%),Average Reward,Final Money,Player Natural,Dealer Natural
0,1,10000,6182,693,3125,3057.0,61.82,31.25,6.93,0.3057,3157,502,794
1,2,10000,6136,734,3130,3006.0,61.36,31.3,7.34,0.3006,3106,495,752
2,3,10000,6064,744,3192,2872.0,60.64,31.92,7.44,0.2872,2972,478,806
3,4,10000,6090,686,3224,2866.0,60.9,32.24,6.86,0.2866,2966,466,772
4,5,10000,6124,682,3194,2930.0,61.24,31.94,6.82,0.293,3030,487,767
5,6,10000,6070,768,3162,2908.0,60.7,31.62,7.68,0.2908,3008,494,789
