In [1]:
import numpy as np
import sys
if "../../" not in sys.path:
  sys.path.append("../../") 
from lib.envs.blackjack import BlackjackEnv
from tqdm.notebook import tqdm, trange
import matplotlib.pyplot as plt
from matplotlib import cm

In [24]:
%matplotlib widget

In [32]:
class Model():
    def __init__(self):
        self.env = BlackjackEnv()
        # Set of state
        self.set_of_states = {
            "dealer_showing": (1,10),
            "player_sum": (12,21),
            "usable_ace": (0,1),
        }
        # Value function
        self.V = np.zeros((   
            self.set_of_states["dealer_showing"][1]-self.set_of_states["dealer_showing"][0]+1,
            self.set_of_states["player_sum"][1]-self.set_of_states["player_sum"][0]+1,
            self.set_of_states["usable_ace"][1]-self.set_of_states["usable_ace"][0]+1))
        
        
        self.gamma = 1
        
        self.NV = np.zeros(self.V.shape)
        
            
    def strategy(self, observation):
        score, dealer_score, usable_ace = observation
        # Stick (action 0) if the score is > 20, hit (action 1) otherwise
        return 0 if score >= 20 else 1

    def print_observation(self, observation):
        score, dealer_score, usable_ace = observation
        print("Player Score: {} (Usable Ace: {}), Dealer Score: {}".format(
              score, usable_ace, dealer_score))
    def aproximateV(self,episodes):
        for i in tqdm(range(episodes)):
            states, rewards, actions = self.generate_episode()
            #print(f"States: {states}")
            #print(f"Rewards: {rewards}")
            #print(f"Actions: {actions}")
            states = states[:-1]
            G = 0
            for t in range(0,len(states))[::-1]:
                
                G = self.gamma * G + rewards[t]
                #print(states[t])
                case_pos = (states[t][0]-self.set_of_states["dealer_showing"][0],states[t][1]-self.set_of_states["player_sum"][0],states[t][2]-self.set_of_states["usable_ace"][0])
                
                self.NV[case_pos[0]][case_pos[1]][case_pos[2]] += 1
                self.V[case_pos[0]][case_pos[1]][case_pos[2]] += (G-self.V[case_pos[0]][case_pos[1]][case_pos[2]]) /self.NV[case_pos[0]][case_pos[1]][case_pos[2]]
                
            
            
    def generate_episode(self):
        
        observation = self.env.reset()
        
        states, rewards, actions = [], [], []
        states.append([observation[1],observation[0],int(observation[2])])
        
        for t in range(100):
            #self.print_observation(observation)
            action = self.strategy(observation)
            actions.append(action)
            #print("Taking action: {}".format( ["Stick", "Hit"][action]))
            observation, reward, done, _ = self.env.step(action)
            rewards.append(reward)
            states.append([observation[1],observation[0],int(observation[2])])
            
            if done:
                #self.print_observation(observation)
                #print("Game end. Reward: {}\n".format(float(reward)))
                break
        return states, rewards, actions
         
    

In [33]:
m = Model()
m.aproximateV(100000)
m1 = Model()
m1.aproximateV(10000)

  0%|          | 0/100000 [00:00<?, ?it/s]

  0%|          | 0/10000 [00:00<?, ?it/s]

In [29]:
fig = plt.figure(figsize=plt.figaspect(0.5))
fig.set_size_inches(10, 10)

# Make data.
X = np.arange(12,22)
Y = np.arange(1,11)
X, Y = np.meshgrid(X, Y)


ax = fig.add_subplot(2, 2, 1, projection='3d')
ax.plot_surface(X, Y, m.V[:,:,0], linewidth=0, antialiased=False,cmap=cm.plasma)

ax = fig.add_subplot(2, 2, 2, projection='3d')
ax.plot_surface(X, Y, m.V[:,:,1], linewidth=0, antialiased=False,cmap=cm.plasma)

ax = fig.add_subplot(2, 2, 3, projection='3d')
ax.plot_surface(X, Y, m1.V[:,:,0], linewidth=0, antialiased=False, cmap=cm.coolwarm)

ax = fig.add_subplot(2, 2, 4, projection='3d')
ax.plot_surface(X, Y, m1.V[:,:,1], linewidth=0, antialiased=False, cmap=cm.coolwarm)

plt.show()

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …