# Testing the gym-2048 0.2.6 environment

In [51]:
import numpy as np
import time
from IPython.display import clear_output
import matplotlib.pyplot as plt
from tqdm import tqdm

In [52]:
from envs import game2048_env

In [53]:
id_action_dict = {0 : "UP",
                  1 : "RIGHT",
                  2 : "DOWN",
                  3 : "LEFT"}

In [73]:
env = game2048_env.Game2048Env()
env.seed()

env.reset()
#env.render()

n_plays = 10000

reward_array = []
highest_array = []

for _ in tqdm(range(n_plays)):
    done = False
    moves = 0

    reward_sum = 0
    while not done:
        #clear_output(wait=True)

        action = env.np_random.choice(range(4), 1).item()
        next_state, reward, done, info = env.step(action)
        moves += 1
        reward_sum += reward

        #print(info)

        #print(f'Next Action: {id_action_dict[action]} \nReward: {reward} \nReward Sum: {reward_sum}')
        #rgb = env.render()

        #plt.show()

        #time.sleep(0.1)
        
        if done:
            reward_array.append(reward_sum)
            highest_array.append(info['highest'])
            env.reset()

std_reward = np.std(reward_array)
mean_reward = np.mean(reward_array)

std_highest = np.std(highest_array)
mean_highest = np.mean(highest_array)

print(f"Mean rewards across {n_plays} plays : {mean_reward} +/- {1.96 * std_reward / np.sqrt(n_plays)}")
print(f"Mean highest across {n_plays} plays : {mean_highest} +/- {1.96 * std_highest / np.sqrt(n_plays)}")

100%|████████████████████████████████████| 10000/10000 [01:33<00:00, 107.16it/s]

Mean rewards across 10000 plays : 1095.9148 +/- 10.572980993945992
Mean highest across 10000 plays : 107.5904 +/- 1.0968311706033043





In [33]:
np.rollaxis(next_state, 2).shape

(16, 4, 4)

In [76]:
next_state

array([[[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],

       [[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],

       [[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],

       [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]])

# Testing gym-2048

https://github.com/mllobet/gym-2048

**ATTENTION** : the gym version to use is **0.17.0**

In [1]:
import numpy as np
import time
from IPython.display import clear_output

In [2]:
import gym_2048

In [3]:
import gym

In [4]:
gym.__version__

'0.17.0'

In [6]:
env = gym.make('game-2048-v0')
#env.seed()

state = env.reset()

done = False
moves = 0
while not done:
    clear_output(wait=True)
    
    valid_moves = np.where(env.moves_available())[0]
    
    action = np.random.choice(valid_moves, 1).item()
    next_state, reward, done, info = env.step(action)
    moves += 1
    
    #print(f'Next Action: {id_action_dict[action]} \nReward: {reward}')
    env.render('human')
    print(info)
    
    time.sleep(.5)

print('\nTotal Moves: {}'.format(moves))

 0  0  4  0
 0  2 16  0
 2  0  4  4
 4  0  2 16
Score: 124
Move: 2
{'score': 124, 'won': False}


KeyboardInterrupt: 

In [7]:
next_state

[[0, 0, 4, 0], [0, 2, 16, 0], [2, 0, 4, 4], [4, 0, 2, 16]]