# Tic Tac Toe - Kaggle Environment

In [10]:
from kaggle_environments import make

env = make("tictactoe")
print(env.name, env.version)
print("Default Agents: ", *env.agents)

tictactoe 1.0.0
Default Agents:  random reaction


## TLDR;

In [2]:
def agent(observation):
    board = observation.board
    play_order = [4, 0, 2, 6, 8, 1, 3, 5, 7]
    EMPTY = 0
    return [c for c in play_order if board[c] == EMPTY][0]


env = make("tictactoe", debug=True)

# play agent above vs default random agent.
env.run([agent, "random"])
env.render(mode="ipython")

## Specification

In [3]:
import json

print("Observation:", json.dumps(env.specification.observation, indent=4, sort_keys=True))
print("Action:", json.dumps(env.specification.action, indent=4, sort_keys=True))

Observation: {
    "board": {
        "default": [
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0
        ],
        "description": "Serialized 3x3 grid. 0 = Empty, 1 = X, 2 = O",
        "maxItems": 9,
        "minItems": 9,
        "type": "array"
    },
    "mark": {
        "description": "Mark for the agent to use",
        "enum": [
            1,
            2
        ]
    }
}
Action: {
    "default": 0,
    "description": "Position to place a mark on the board.",
    "maximum": 8,
    "minimum": 0,
    "type": "integer"
}


## Training using Gym

In [9]:
import gym

from kaggle_environments import make


class TicTacToe(gym.Env):
    def __init__(self):
        self.env = make("tictactoe", debug=True)
        self.trainer = self.env.train([None, "random"])

        # Define required gym fields (examples):
        self.action_space = gym.spaces.Discrete(9)
        self.observation_space = gym.spaces.Discrete(27)

    def step(self, action):
        return self.trainer.step(action)

    def reset(self):
        return self.trainer.reset()

    def render(self, **kwargs):
        return self.env.render(**kwargs)


env = TicTacToe()

done = False
obs = env.reset()
while not done:
    # Choose first available empty cell as the action.
    action = [i for i in range(len(obs.board)) if obs.board[i] == 0][0]
    obs, reward, done, info = env.step(action)
env.render()

 X | X | O 
---+---+---
 X | O |   
---+---+---
 O |   |   
