# Zoning Game AlphaZero Demo

## Individually construct bits and pieces

In [1]:
from nsai_experiments.general_az_1p.game import Game
from nsai_experiments.general_az_1p.policy_value_net import PolicyValueNet
from nsai_experiments.general_az_1p.agent import Agent

from nsai_experiments.general_az_1p.zoning_game.zoning_game_az_impl import ZoningGameGame
from nsai_experiments.general_az_1p.zoning_game.zoning_game_az_impl import ZoningGamePolicyValueNet

### The `Game`

In [2]:
mygame = ZoningGameGame()
assert isinstance(mygame, Game)
mygame.reset_wrapper(seed=47)
print(mygame.render().read())  # type: ignore[union-attr]

Tile grid:
[[0 0 0 5 1 0]
 [0 4 0 0 0 0]
 [0 3 0 3 2 4]
 [0 0 0 0 0 0]
 [2 0 0 0 0 0]
 [0 0 0 0 3 1]]
Tile queue (leftmost next): [1 4 2 1 5 2 3 3 2 3 1 1 1 4 2 2 1 5 5 2 1 5 3 2 5 1 0 0 0 0 0 0 0 0 0 0]
where 0 = EMPTY, 1 = RESIDENTIAL, 2 = COMMERCIAL, 3 = INDUSTRIAL, 4 = DOWNTOWN, 5 = PARK.
After 0 moves, current grid score is 3; terminated = False, truncated = False.



### The `PolicyValueNet`

In [3]:
import torch
from nsai_experiments.zoning_game.notebook_utils import get_zg_data
from nsai_experiments.zoning_game.zg_policy import create_policy_indiv_greedy

torch.manual_seed(47)
n_games = 20_000
savedir = "../../zoning_game/zg_data"
valid_frac = 0.15
test_frac = 0.15

states_tensor, values_tensor, moves_tensor = get_zg_data(create_policy_indiv_greedy, n_games = n_games, savedir = savedir)
indices = torch.randperm(len(values_tensor))
full_dataset_3 = torch.utils.data.TensorDataset(states_tensor[indices], moves_tensor[indices], values_tensor[indices])

valid_size_3 = int(valid_frac * len(full_dataset_3))
test_size_3 = int(test_frac * len(full_dataset_3))
train_size_3 = len(full_dataset_3) - valid_size_3 - test_size_3
train_dataset_3, valid_dataset_3, test_dataset_3 = torch.utils.data.random_split(full_dataset_3, [train_size_3, valid_size_3, test_size_3])
print("Done loading, shuffling, splitting data")

Loading data from disk: ../../zoning_game/zg_data/create_policy_indiv_greedy__20000
Done loading, shuffling, splitting data


In [4]:
mynet = ZoningGamePolicyValueNet(training_params={"epochs": 3})
assert isinstance(mynet, PolicyValueNet)
# mynet.train(train_dataset_3, needs_reshape=False)
mynet.predict(mygame.obs)

(array([0.02878359, 0.02750564, 0.02725394, 0.02644118, 0.02703556,
        0.02739549, 0.02875161, 0.02773597, 0.02912001, 0.02731342,
        0.02837674, 0.02837252, 0.02701781, 0.02933569, 0.02749422,
        0.02898956, 0.02915286, 0.025719  , 0.02784868, 0.02766767,
        0.02893063, 0.02690739, 0.02791698, 0.0266863 , 0.02847183,
        0.0276081 , 0.02711248, 0.02722818, 0.02664321, 0.02836256,
        0.02602571, 0.02749578, 0.02793976, 0.02626782, 0.02930332,
        0.02978883], dtype=float32),
 0.02528020739555359)

### The `Agent` and `MCTS`

In [5]:
import logging

logging.getLogger().setLevel(logging.WARN)  # TODO
myagent = Agent(mygame, mynet)
train_examples = myagent.play_single_game()
print(len(train_examples))

Taking move 2 with probability 0.04
Taking move 14 with probability 0.04
Taking move 1 with probability 0.05
Taking move 30 with probability 0.04
Taking move 23 with probability 0.04
Taking move 33 with probability 0.05
Taking move 27 with probability 0.05
Taking move 29 with probability 0.06
Taking move 22 with probability 0.05
Taking move 8 with probability 0.06
Taking move 5 with probability 0.06
Taking move 9 with probability 0.06
Taking move 32 with probability 0.07
Taking move 0 with probability 0.08
Taking move 6 with probability 0.08
Taking move 11 with probability 0.09
Taking move 21 with probability 0.08
Taking move 28 with probability 0.11
Taking move 25 with probability 0.13
Taking move 12 with probability 0.13
Taking move 18 with probability 0.62
Taking move 31 with probability 0.96
Taking move 20 with probability 1.00
Taking move 26 with probability 1.00
Taking move 19 with probability 1.00
Taking move 10 with probability 1.00
26
