# Imports and settings

In [1]:
%load_ext autoreload
%autoreload 1

import numpy as np
import os
from environments import HexGame
from networks import CNN, ANN
from mcts import MCTS
from agents import MCTSAgent, ANNAgent
from memory import Memory
from misc import LiteModel
import matplotlib.pyplot as plt
import ray

os.environ["WANDB_NOTEBOOK_NAME"] = "testing.ipynb"
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mda9elkh[0m (use `wandb login --relogin` to force relogin)


True

In [2]:
def preprocessing():
    filename = "/Users/daniel/Documents/AIProg/Assignments/Assignment 2/cases/r_1500_mcts/train_samples"
    states = np.loadtxt(filename + '_states.txt', dtype=np.int32)
    dists = np.loadtxt(filename + '_dists.txt', dtype=np.float32)

    bits = lambda s: format(s if s == 1 else 2, f"0{2}b")
    new_states = np.zeros((states.shape[0], 100))

    for i in range(len(states)):
        new_states[i] = np.array([float(s) for s in list(''.join([bits(s) for s in states[i]]))])

    return new_states.astype(np.float32), dists

states, dists = preprocessing()

In [None]:
env = HexGame(size=4)
ann = ANN.build(learning_rate=0.001, input_size=len(env.ann_state), output_size=len(env.legal_binary_moves), activation="relu", optimizer="adam", hidden_size=(100, 100))
annagent = ANNAgent(network=ann)
mcts = MCTS(environment=env, rollout_policy_agent=annagent, use_time_budget=False, rollouts=1500, c=1.4, epsilon=1)
agent = MCTSAgent(environment=env, mcts=mcts)
memory = Memory(sample_size=1.0, queue_size=10000, verbose=False)

## PLOTTING
train_accuracies = []
train_loss = []

fig = plt.figure(figsize=(12, 5))
gs = fig.add_gridspec(1, 2)
ax1 = fig.add_subplot(gs[0, 0])
ax1.set_title("Accuracy")
ax2 = fig.add_subplot(gs[0, 1])
ax2.set_title("Loss")
## /PLOTTING

decay = 0.05**(1/1000)
i=0

def save_model():
    ann.save_model(suffix=f"S{env.size}_B{i}")

save_model()

wandb.init(project="hex", config={"mcts": mcts.config, "ann": ann.config})

while True:
    env.reset()
    while not env.is_game_over:
        best_move, distribution = agent.get_move(greedy=False)
        memory.register("player", env.current_player)
        memory.register("action", best_move)
        memory.register("state", env.ann_state)
        memory.register("distribution", distribution.flatten().tolist())
        env.play(best_move)

    memory.register_result(env.result)

    samples = memory.all()
    result = ann.train_on_batch(samples[2], samples[3], samples[4])
    i += 1

    wandb.log({"accuracy": result["accuracy"], "loss": result["loss"], "epsilon": mcts.epsilon})
    mcts.epsilon *= decay


    if i % 20 == 0:
        save_model()


In [2]:
env = HexGame(size=4)
ann = ANN.build(learning_rate=0.001, input_size=len(env.ann_state), output_size=len(env.legal_binary_moves), activation="relu", optimizer="adam", hidden_size=(100, 100))
annagent = ANNAgent(network=ann)
mcts = MCTS(environment=env, rollout_policy_agent=annagent, use_time_budget=False, rollouts=1500, c=1.4, epsilon=1)
agent = MCTSAgent(environment=env, mcts=mcts)
memory = Memory(sample_size=1.0, queue_size=10000, verbose=False)

2022-04-03 23:26:07.061644: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
from tensorflow.keras import Sequential 

In [4]:
model = Sequential.from_config(ann.model.get_config())
model.set_weights(ann.model.get_weights())

In [5]:
from tensorflow.keras.optimizers import Optimizer


In [6]:
agent1 = ANNAgent(environment=env, network=ANN(model=model))

In [7]:
agent1.get_move(greedy=True)

((0, 1),
 array([0.06404223, 0.06711077, 0.06206467, 0.0631833 , 0.06142657,
        0.06087947, 0.05806388, 0.06501137, 0.06426103, 0.06147993,
        0.06528415, 0.06341712, 0.05925861, 0.06046248, 0.06593056,
        0.05812385], dtype=float32))

In [8]:
annagent.environment = env
annagent.get_move(greedy=True)

((0, 1),
 array([0.06404223, 0.06711077, 0.06206467, 0.0631833 , 0.06142657,
        0.06087947, 0.05806388, 0.06501137, 0.06426103, 0.06147993,
        0.06528415, 0.06341712, 0.05925861, 0.06046248, 0.06593056,
        0.05812385], dtype=float32))

In [10]:
lmodel = LiteModel.from_keras_model(model)
agent2 = ANNAgent(environment=env, network=ANN(model=lmodel))
agent2.get_move(greedy=True)

INFO:tensorflow:Assets written to: /var/folders/6t/6j6cdw090wn8tw74l39_zy_00000gp/T/tmpevupc_2p/assets


INFO:tensorflow:Assets written to: /var/folders/6t/6j6cdw090wn8tw74l39_zy_00000gp/T/tmpevupc_2p/assets
2022-04-03 23:26:56.521302: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:363] Ignored output_format.
2022-04-03 23:26:56.521321: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:366] Ignored drop_control_dependency.
2022-04-03 23:26:56.521433: I tensorflow/cc/saved_model/reader.cc:43] Reading SavedModel from: /var/folders/6t/6j6cdw090wn8tw74l39_zy_00000gp/T/tmpevupc_2p
2022-04-03 23:26:56.522769: I tensorflow/cc/saved_model/reader.cc:107] Reading meta graph with tags { serve }
2022-04-03 23:26:56.522794: I tensorflow/cc/saved_model/reader.cc:148] Reading SavedModel debug info (if present) from: /var/folders/6t/6j6cdw090wn8tw74l39_zy_00000gp/T/tmpevupc_2p
2022-04-03 23:26:56.528201: I tensorflow/cc/saved_model/loader.cc:210] Restoring SavedModel bundle.
2022-04-03 23:26:56.549386: I tensorflow/cc/saved_model/loader.cc:194] Running initializatio

((0, 1),
 array([0.06404223, 0.06711077, 0.06206467, 0.0631833 , 0.06142657,
        0.06087947, 0.05806388, 0.06501137, 0.06426103, 0.06147994,
        0.06528415, 0.06341712, 0.05925861, 0.06046248, 0.06593056,
        0.05812385], dtype=float32))

AttributeError: 'numpy.ndarray' object has no attribute 'numpy'

In [None]:

@ray.remote
class ANNTrainer:
    def __initialize__(self, memory):
        self.memory = memory
        self.ann = ANN.build(learning_rate=0.001, input_size=len(env.ann_state), output_size=len(env.legal_binary_moves), activation="relu", optimizer="adam", hidden_size=(100, 100))

    def train():
        result = ann.train_on_batch(samples[2], samples[3], samples[4])


@ray.remote
class ANETPredictor:
    def __initialize__(self, storage):
        pass


@ray.remote
class MCTSWorker:
    def __initialize__(self, memory, anet_config):
        self.environment = HexGame(size=4)
        self.model = Sequential.from_config(anet_config)
        self.memory = memory

    def simulate(self):
        self.set_weights()
        self.environment.reset()
        while not env.is_game_over:
            best_move, distribution = agent.get_move(greedy=False)
            
            memory.register("player", self.environment.current_player)
            memory.register("action", best_move)
            memory.register("state", self.environment.ann_state)
            memory.register("distribution", distribution.flatten().tolist())
            
            self.environment.play(best_move)

        memory.register_result(env.result)
        pass

    def set_weights(self):
        self.model.set_weights(ray.get(storage)["weights"])


# Memory
memory = Memory(sample_size=1.0, queue_size=10000, verbose=False)
ray_memory = ray.put(memory)

# Workers
trainer = ANNTrainer.remote(memory)
mcts_workers = [MCTSWorker.remote(memory, anet_config) for i in range(2)]

# Testing a complete solution

In [2]:
import ray
ray.init()

import numpy as np
from environments import HexGame
from networks import ANN
from mcts import MCTS
from agents import MCTSAgent, ANNAgent
from misc import LiteModel

class GameHistory:
    def __init__(self):
        self.result = 0
        self.players = []
        self.states = []
        self.distributions = []
        self.moves = []

    def register_result(self, result):
        self.result = result

    def register_move(self, player, move, state, distribution):
        self.players.append(player)
        self.states.append(state)
        self.moves.append(move)
        self.distributions.append(distribution)

    def stack(self):
        memory = []
        for i in range(len(self.states)):
            player = self.players[i]
            move = self.moves[i]   
            state = self.states[i]
            distribution = self.distributions[i]
            result = self.result            
            memory.append([player, move, state, distribution, result])
        return memory

@ray.remote
class Storage:
    def __init__(self):
        self.data = { "terminate": False }

    def get_info(self, key):
        return self.data[key]

    def set_info(self, key, value):
        self.data[key] = value

    def all(self):
        return self.data

@ray.remote
class Buffer:
    def __init__(self):
        self.buffer = {}
        self.num_games = 0
        self.num_samples = 0

    def store(self, game_history): # Game history
        self.buffer[self.num_games] = game_history
        self.num_games += 1
        self.num_samples += len(game_history.states)
        
    def get_batch(self, sample_size):
        keys = random.choice(list(self.buffer.keys()), sample_size)
        returns = [self.buffer[key] for key in keys]
        return returns

    def length(self):
        return self.num_samples


@ray.remote
class Trainer:
    def __init__(self, network):
        self.network = network
        self.initialized = False

    def loop(self, storage, buffer):
        if not self.initialized:
            self.initialize_ann(storage)
        
        while not (ray.get(storage.get_info.remote("terminate")) and ray.get(storage.get_info.remote("num_played_games")) < 1):
            histories = ray.get(buffer.get_batch.remote(256))

            states = np.array([history.states for history in histories])
            distributions = np.array([history.distributions for history in histories])

            results = self.network.train_on_batch(states, distributions)

            weights = self.network.model.get_weights()
            storage.set_info("weights", weights)

    def initialize_ann(self, storage):
        weights = self.network.model.get_weights()
        config = self.network.model.get_config()
        storage.set_info("nn_weights", weights)
        storage.set_info("nn_config", config)
        self.initialized = True

    def store(self):
        pass
        # Save model
        

@ray.remote
class MCTSWorker:
    def __init__(self, model):
        self.initialized = False
        self.environment = HexGame(size=4)
        self.model = LiteModel.from_keras_model(model)
        self.network = ANN(model=self.model)
        self.ann_agent = ANNAgent(environment=self.environment, network=self.network)
        self.mcts = MCTS(environment=env, rollout_policy_agent=self.ann_agent, use_time_budget=False, rollouts=1500, c=1.4, epsilon=1)
        self.agent = MCTSAgent(environment=env, mcts=mcts)
    
    def update_weights(self, storage):
        weights = ray.get(storage.get_info.remote("nn_weights"))
        config = ray.get(storage.get_info.remote("nn_config"))

        seq_model = Sequential.from_config(config)
        seq_model.set_weights(weights)

        self.model.update_keras_model(seq_model)

    def loop(self, storage, buffer):
        while not ray.get(storage.get_info.remote("terminate")):
            self.update_weights(storage)
            self.environment.reset()
            
            gh = GameHistory()

            while not self.enviroment.is_game_over:
                move, distribution = self.agent.get_move(greedy=False)

                # Store move information in buffer
                gh.register_move(self.environment.current_player, move, self.environment.flat_state, distribution)

                self.environment.play(move)

            buffer.store.remote(gh)

In [12]:
#### STARTING HERE ####
network = ANN.build(learning_rate=0.001, input_size=len(env.ann_state), output_size=len(env.legal_binary_moves), activation="relu", optimizer="adam", hidden_size=(100, 100))

buffer = Buffer.remote()
storage = Storage.remote()
trainer = Trainer.remote(network)
workers = [MCTSWorker.remote(network.model) for _ in range(2)]


# Run loops
trainer.loop.remote(storage, buffer)
for worker in mcts_workers:
    worker.loop.remote(storage, buffer)

while True:
    print(ray.get(buffer.length.remote()))
    time.sleep(0.5)