# Blackjack
Planner __init__ expects a reward and transition matrix P, which is a nested dictionary 
[OpenAI Gym](https://www.gymlibrary.ml/) style discrete environment where 
P[state][action] is a list of tuples (probability, next state, reward, terminal).

The gym blackjack environment does not include this matrix, so we'll use the BlackjackWrapper class to create a wrapped gym environment, which modifies the observation space and includes P.  

In [1]:
!pip install bettermdptools



In [2]:
import gymnasium as gym
from bettermdptools.utils.blackjack_wrapper import BlackjackWrapper
from bettermdptools.utils.test_env import TestEnv
from bettermdptools.algorithms.planner import Planner
from bettermdptools.algorithms.rl import RL
import numpy as np

In [3]:
base_env = gym.make('Blackjack-v1', render_mode=None)
blackjack = BlackjackWrapper(base_env)

# run VI
V, V_track, pi, pi_track, timings = Planner(blackjack.P).value_iteration()

#test policy
test_scores = TestEnv.test_env(env=blackjack, n_iters=100, render=False, pi=pi, user_input=False)
print(np.mean(test_scores))

# Q-learning
Q, V, pi, Q_track, pi_track = RL(blackjack).q_learning()

#test policy
test_scores = TestEnv.test_env(env=blackjack, n_iters=100, render=False, pi=pi, user_input=False)
print(np.mean(test_scores))

  if not isinstance(terminated, (bool, np.bool8)):


Total Q updates: 37272
runtime = 0.02 seconds
0.72


                                                       

runtime = 0.93 seconds
0.735




In [7]:
blackjack._P.items()

dict_items([(0, {0: [(0.6463918636046386, -1, -1.0, True), (0.35360813639536137, -1, 1.0, True)], 1: [(0.07692307692307693, 20, 0.0, False), (0.07692307692307693, 30, 0.0, False), (0.07692307692307693, 40, 0.0, False), (0.07692307692307693, 50, 0.0, False), (0.07692307692307693, 60, 0.0, False), (0.07692307692307693, 70, 0.0, False), (0.07692307692307693, 80, 0.0, False), (0.07692307692307693, 90, 0.0, False), (0.3076923076923077, 100, 0.0, False), (0.07692307692307693, 210, 0.0, False)]}), (1, {0: [(0.6261251146178569, -1, -1.0, True), (0.37387488538214325, -1, 1.0, True)], 1: [(0.07692307692307693, 21, 0.0, False), (0.07692307692307693, 31, 0.0, False), (0.07692307692307693, 41, 0.0, False), (0.07692307692307693, 51, 0.0, False), (0.07692307692307693, 61, 0.0, False), (0.07692307692307693, 71, 0.0, False), (0.07692307692307693, 81, 0.0, False), (0.07692307692307693, 91, 0.0, False), (0.3076923076923077, 101, 0.0, False), (0.07692307692307693, 211, 0.0, False)]}), (2, {0: [(0.60553155