# Aim
Get familiarized with the Blackjack Environment

In [20]:
import numpy as np
import random

class SimpleBlackjackEnv:
    def __init__(self):
        self.action_space = [0, 1]  # 0: Stick, 1: Hit
        self.reset()  # Initialize the game state

    def reset(self):
        # Initialize the player's score, dealer's score, and usable ace flag
        self.player_score = random.randint(12, 21)  # Simulate a player's score between 12 and 21
        self.dealer_score = random.randint(1, 11)  # Dealer shows one card (between 1 and 11)
        self.usable_ace = random.choice([False, True])  # Randomly decide if player has a usable ace
        return (self.player_score, self.dealer_score, self.usable_ace)

    def step(self, action):
        if action == 1:  # Hit
            # Simulate drawing a card (1 to 11) and update the player's score
            new_card = random.randint(1, 11)
            self.player_score += new_card
            # Check if player busts
            if self.player_score > 21:
                return (self.player_score, self.dealer_score, self.usable_ace), -1, True, {}
        
        # If the player sticks or doesn't bust
        if self.player_score >= 21:
            return (self.player_score, self.dealer_score, self.usable_ace), 1, True, {}
        
        return (self.player_score, self.dealer_score, self.usable_ace), 0, False, {}

def print_observation(observation):
    """
    Print the observation in an interpretable format.
    """
    score, dealer_score, usable_ace = observation
    print(f'Player Score: {score} (Usable Ace: {usable_ace}), Dealer Score: {dealer_score}')

def strategy(observation):
    """
    Decide the action to take based on the current observation.
    """
    score, dealer_score, usable_ace = observation
    return 0 if score >= 20 else 1  # Stick if score >= 20, otherwise hit

# Initialize the Blackjack environment
env = SimpleBlackjackEnv()

# Define the number of episodes and iterations
n_episodes = 20
n_iter = 100

# Variables to track performance
total_reward = 0
wins = 0
losses = 0

# Run multiple episodes
for episode in range(n_episodes):
    observation = env.reset()  # Reset the environment for a new episode
    
    print(f"Episode {episode + 1}:\n")
    
    for it in range(n_iter):
        print_observation(observation)
        action = strategy(observation)
        print("Taking action: {}".format(["Stick", "Hit"][action]))
        
        # Step through the environment
        observation, reward, done, _ = env.step(action)
        
        total_reward += reward  # Accumulate the total reward
        
        # Check for termination
        if done:
            print_observation(observation)
            print('Game ended. Reward = {} \n'.format(float(reward)))
            # Track wins and losses based on reward
            if reward > 0:
                wins += 1
            else:
                losses += 1
            break  # Exit the loop if the game is over

# Print final statistics after all episodes
print(f"Total Reward: {total_reward}")
print(f"Wins: {wins}, Losses: {losses}, Win Rate: {wins / n_episodes * 100:.2f}%")


Episode 1:

Player Score: 17 (Usable Ace: True), Dealer Score: 3
Taking action: Hit
Player Score: 22 (Usable Ace: True), Dealer Score: 3
Game ended. Reward = -1.0 

Episode 2:

Player Score: 17 (Usable Ace: True), Dealer Score: 10
Taking action: Hit
Player Score: 19 (Usable Ace: True), Dealer Score: 10
Taking action: Hit
Player Score: 27 (Usable Ace: True), Dealer Score: 10
Game ended. Reward = -1.0 

Episode 3:

Player Score: 14 (Usable Ace: True), Dealer Score: 9
Taking action: Hit
Player Score: 16 (Usable Ace: True), Dealer Score: 9
Taking action: Hit
Player Score: 24 (Usable Ace: True), Dealer Score: 9
Game ended. Reward = -1.0 

Episode 4:

Player Score: 14 (Usable Ace: True), Dealer Score: 2
Taking action: Hit
Player Score: 15 (Usable Ace: True), Dealer Score: 2
Taking action: Hit
Player Score: 25 (Usable Ace: True), Dealer Score: 2
Game ended. Reward = -1.0 

Episode 5:

Player Score: 20 (Usable Ace: True), Dealer Score: 4
Taking action: Stick
Player Score: 20 (Usable Ace: True)

In [9]:
import numpy as np
import sys
if "../" not in sys.path:
    sys.path.append("../") 

from lib.envs.blackjack import BlackjackEnv

In [10]:
# Get the environment
env = BlackjackEnv()

In [11]:
# Number of episodes to run
n_episodes = 20

# Number of iterations to run
n_iter = 100

In [12]:
def print_observation(observation):
    """
    Print the observation in an interpretable format.
    """
    score, dealer_score, usable_ace = observation
    print('Player Score: {} (Usable ace: {}), Dealer Score: {}'.format(score, usable_ace, dealer_score))

def strategy(observation):
    """
    Decide the action to take for the given observation.
    """
    score, dealer_score, usable_ace = observation
    
    # Action 1: Hit (show the next card)
    # Action 0: Stick (stop showing cards)
    return 0 if score >= 20 else 1

In [13]:
for i in range(n_episodes):
    observation = env.reset()  # Ensure this returns a valid observation
    
    # Check if observation is valid
    if observation is None:
        print("Reset returned None, skipping episode.")
        continue
    
    for it in range(n_iter):
        print_observation(observation)
        action = strategy(observation)
        print("Taking action {}".format(["Stick", "Hit"][action]))
        observation, reward, done, _ = env.step(action)
        
        # Termination
        if done:
            print_observation(observation)
            print('Game ended. Reward = {} \n'.format(float(reward)))
            break

Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
Reset returned None, skipping episode.
